llm.rb 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +120 -119
  3. data/lib/llm/bot/builder.rb +2 -2
  4. data/lib/llm/bot.rb +13 -22
  5. data/lib/llm/buffer.rb +7 -0
  6. data/lib/llm/file.rb +22 -12
  7. data/lib/llm/function.rb +8 -7
  8. data/lib/llm/message.rb +8 -0
  9. data/lib/llm/multipart.rb +0 -1
  10. data/lib/llm/object/kernel.rb +8 -0
  11. data/lib/llm/object.rb +9 -3
  12. data/lib/llm/provider.rb +10 -12
  13. data/lib/llm/providers/anthropic/format/completion_format.rb +10 -5
  14. data/lib/llm/providers/anthropic/models.rb +4 -9
  15. data/lib/llm/providers/anthropic/response/completion.rb +39 -0
  16. data/lib/llm/providers/anthropic.rb +13 -25
  17. data/lib/llm/providers/deepseek/format/completion_format.rb +3 -3
  18. data/lib/llm/providers/deepseek.rb +16 -1
  19. data/lib/llm/providers/gemini/audio.rb +9 -13
  20. data/lib/llm/providers/gemini/files.rb +19 -34
  21. data/lib/llm/providers/gemini/format/completion_format.rb +20 -5
  22. data/lib/llm/providers/gemini/images.rb +12 -11
  23. data/lib/llm/providers/gemini/models.rb +4 -10
  24. data/lib/llm/providers/gemini/{response_parser/completion_parser.rb → response/completion.rb} +10 -24
  25. data/lib/llm/providers/gemini/response/embedding.rb +8 -0
  26. data/lib/llm/providers/gemini/response/file.rb +11 -0
  27. data/lib/llm/providers/gemini/response/image.rb +26 -0
  28. data/lib/llm/providers/gemini.rb +18 -29
  29. data/lib/llm/providers/llamacpp.rb +18 -1
  30. data/lib/llm/providers/ollama/format/completion_format.rb +8 -5
  31. data/lib/llm/providers/ollama/models.rb +2 -8
  32. data/lib/llm/providers/ollama/response/completion.rb +28 -0
  33. data/lib/llm/providers/ollama/response/embedding.rb +9 -0
  34. data/lib/llm/providers/ollama.rb +13 -19
  35. data/lib/llm/providers/openai/audio.rb +10 -10
  36. data/lib/llm/providers/openai/files.rb +22 -34
  37. data/lib/llm/providers/openai/format/completion_format.rb +11 -4
  38. data/lib/llm/providers/openai/format/moderation_format.rb +2 -2
  39. data/lib/llm/providers/openai/format/respond_format.rb +7 -4
  40. data/lib/llm/providers/openai/images.rb +18 -17
  41. data/lib/llm/providers/openai/models.rb +4 -9
  42. data/lib/llm/providers/openai/moderations.rb +9 -11
  43. data/lib/llm/providers/openai/response/audio.rb +7 -0
  44. data/lib/llm/providers/openai/{response_parser/completion_parser.rb → response/completion.rb} +14 -30
  45. data/lib/llm/providers/openai/response/embedding.rb +9 -0
  46. data/lib/llm/providers/openai/response/file.rb +7 -0
  47. data/lib/llm/providers/openai/response/image.rb +16 -0
  48. data/lib/llm/providers/openai/response/moderations.rb +34 -0
  49. data/lib/llm/providers/openai/{response_parser/respond_parser.rb → response/responds.rb} +7 -29
  50. data/lib/llm/providers/openai/responses.rb +16 -34
  51. data/lib/llm/providers/openai/stream_parser.rb +1 -0
  52. data/lib/llm/providers/openai/vector_stores.rb +188 -0
  53. data/lib/llm/providers/openai.rb +24 -9
  54. data/lib/llm/providers/xai/images.rb +58 -0
  55. data/lib/llm/providers/xai.rb +72 -0
  56. data/lib/llm/response.rb +42 -13
  57. data/lib/llm/version.rb +1 -1
  58. data/lib/llm.rb +12 -13
  59. data/llm.gemspec +5 -5
  60. metadata +29 -38
  61. data/lib/llm/model.rb +0 -32
  62. data/lib/llm/providers/anthropic/response_parser/completion_parser.rb +0 -51
  63. data/lib/llm/providers/anthropic/response_parser.rb +0 -24
  64. data/lib/llm/providers/gemini/response_parser.rb +0 -46
  65. data/lib/llm/providers/ollama/response_parser/completion_parser.rb +0 -42
  66. data/lib/llm/providers/ollama/response_parser.rb +0 -30
  67. data/lib/llm/providers/openai/response_parser.rb +0 -65
  68. data/lib/llm/providers/voyageai/error_handler.rb +0 -32
  69. data/lib/llm/providers/voyageai/response_parser.rb +0 -13
  70. data/lib/llm/providers/voyageai.rb +0 -44
  71. data/lib/llm/response/audio.rb +0 -13
  72. data/lib/llm/response/audio_transcription.rb +0 -14
  73. data/lib/llm/response/audio_translation.rb +0 -14
  74. data/lib/llm/response/completion.rb +0 -51
  75. data/lib/llm/response/download_file.rb +0 -15
  76. data/lib/llm/response/embedding.rb +0 -23
  77. data/lib/llm/response/file.rb +0 -42
  78. data/lib/llm/response/filelist.rb +0 -18
  79. data/lib/llm/response/image.rb +0 -29
  80. data/lib/llm/response/modellist.rb +0 -18
  81. data/lib/llm/response/moderationlist/moderation.rb +0 -47
  82. data/lib/llm/response/moderationlist.rb +0 -51
  83. data/lib/llm/response/respond.rb +0 -56
  84. /data/lib/llm/{event_handler.rb → eventhandler.rb} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39e538d8185cf5c8c5a36da0e1bf5b0b9e0055945a02570cd00fefc805b288d0
4
- data.tar.gz: 7fc0d3a4422fe10bb3058c7b1b5b9bc80693ccc0dbf6b62bda46d42fb7c2830c
3
+ metadata.gz: 2cd935a4ccd3b911e92b5ff54335cfc143247cbb5fe55214fd563551f7349da4
4
+ data.tar.gz: c76b36f2877c0cec7fdde54471a81ae19a4ec044158077742eba9acd26cd1483
5
5
  SHA512:
6
- metadata.gz: a2b3de69ce317d856ec593074e22883ec2b96ddcdc2637cb2b4c555885c1c771b2ffd447b255cdc16a2c7f1c2b72362ab1b6e29ec0fdf775e977292b03fd3e34
7
- data.tar.gz: c2ba0f853b7eaac4ca8fab15f497a3fa375e054b7da928b2f0798e393909baff20d2381afd48e793fd878261a0d838474e4be3be5b5232e480cf162af57dbe2e
6
+ metadata.gz: f654d042a6f44cba15b2dc0049d3933aa442f631293be446486e524773ff01bfc0f13f89ecbf09659a175c3ff9f7c6512ae8bde5716f6935c8d3d05528d3e4e9
7
+ data.tar.gz: 73945113e01d89188301a1a7db921c8c07ac19a847fa71528bc2e62eb5162ac656aca02a5f17e02bc4918dc973c298f0021afbb14a4cb38e7f81705950c4ed5b
data/README.md CHANGED
@@ -1,16 +1,16 @@
1
1
  ## About
2
2
 
3
3
  llm.rb is a zero-dependency Ruby toolkit for Large Language Models that
4
- includes OpenAI, Gemini, Anthropic, DeepSeek, Ollama, and LlamaCpp. The
5
- toolkit includes full support for chat, streaming, tool calling, audio,
6
- images, files, and JSON Schema generation.
4
+ includes OpenAI, Gemini, Anthropic, xAI (grok), DeepSeek, Ollama, and
5
+ LlamaCpp. The toolkit includes full support for chat, streaming, tool calling,
6
+ audio, images, files, and JSON Schema generation.
7
7
 
8
8
  ## Features
9
9
 
10
10
  #### General
11
11
  - ✅ A single unified interface for multiple providers
12
12
  - 📦 Zero dependencies outside Ruby's standard library
13
- - 🚀 Efficient API design that minimizes the request count
13
+ - 🚀 Efficient API design that minimizes the number of requests made
14
14
 
15
15
  #### Chat, Agents
16
16
  - 🧠 Stateless and stateful chat via completions and responses API
@@ -24,27 +24,39 @@ images, files, and JSON Schema generation.
24
24
  - 📎 File uploads and prompt-aware file interaction
25
25
  - 💡 Multimodal prompts (text, images, PDFs, URLs, files)
26
26
 
27
- #### Miscellaneous
27
+ #### Embeddings
28
28
  - 🧮 Text embeddings and vector support
29
- - 🔌 Retrieve models dynamically for introspection and selection
29
+ - 🧱 Includes support for OpenAI's vector stores API
30
+
31
+ #### Miscellaneous
32
+ - 📜 Model management and selection
33
+ - 🔧 Includes support for OpenAI's responses, moderations, and vector stores APIs
34
+
35
+ ## Matrix
36
+
37
+ While the Features section above gives you the high-level picture, the table below
38
+ breaks things down by provider, so you can see exactly what’s supported where.
30
39
 
31
- ## Demos
32
40
 
33
- > The
34
- > [llmrb/llm-shell](https://github.com/llmrb/llm-shell)
35
- > project is built with llm.rb and its demos have been
36
- > included to provide a better idea of what llm.rb
37
- > is capable of.
41
+ | Feature / Provider | OpenAI | Anthropic | Gemini | DeepSeek | xAI (Grok) | Ollama | LlamaCpp |
42
+ |--------------------------------------|:------:|:---------:|:------:|:--------:|:----------:|:------:|:--------:|
43
+ | **Chat Completions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
44
+ | **Streaming** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
45
+ | **Tool Calling** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
46
+ | **JSON Schema / Structured Output** | ✅ | ❌ | ✅ | ❌ | ✅ | ✅* | ✅* |
47
+ | **Audio (TTS / Transcribe / Translate)** | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
48
+ | **Image Generation & Editing** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ |
49
+ | **File Uploads** | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
50
+ | **Multimodal Prompts** *(text+image)* | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
51
+ | **Embeddings** | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
52
+ | **Models API** g| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
53
+ | **Local Model Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ |
54
+ | **Vector Stores (RAG)** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
55
+ | **Responses** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
56
+ | **Moderations** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
38
57
 
39
- <details>
40
- <summary><b>1. An introduction to tool calls</b></summary>
41
- <img src="https://github.com/llmrb/llm/raw/main/share/llm-shell/examples/toolcalls_v2.gif">
42
- </details>
58
+ \* JSON Schema support in Ollama/LlamaCpp depends on the model, not the API.
43
59
 
44
- <details>
45
- <summary><b>2. Add files as conversation context</b></summary>
46
- <img src="https://github.com/llmrb/llm/raw/main/share/llm-shell/examples/files-runtime_v2.gif">
47
- </details>
48
60
 
49
61
  ## Examples
50
62
 
@@ -66,8 +78,8 @@ require "llm"
66
78
  llm = LLM.openai(key: "yourapikey")
67
79
  llm = LLM.gemini(key: "yourapikey")
68
80
  llm = LLM.anthropic(key: "yourapikey")
81
+ llm = LLM.xai(key: "yourapikey")
69
82
  llm = LLM.deepseek(key: "yourapikey")
70
- llm = LLM.voyageai(key: "yourapikey")
71
83
 
72
84
  ##
73
85
  # local providers
@@ -86,12 +98,11 @@ llm = LLM.llamacpp(key: nil)
86
98
 
87
99
  The following example creates an instance of
88
100
  [LLM::Bot](https://0x1eef.github.io/x/llm.rb/LLM/Bot.html)
89
- by entering into a conversation where messages are buffered and
90
- sent to the provider on-demand. This is the default behavior
91
- because it can reduce the number of requests sent to a provider,
92
- and avoids unneccessary requests until an attempt to iterate over
101
+ and enters into a conversation where messages are buffered and
102
+ sent to the provider on-demand. The implementation is designed to
103
+ buffer messages by waiting until an attempt to iterate over
93
104
  [LLM::Bot#messages](https://0x1eef.github.io/x/llm.rb/LLM/Bot.html#messages-instance_method)
94
- is made:
105
+ is made before sending a request to the LLM:
95
106
 
96
107
  ```ruby
97
108
  #!/usr/bin/env ruby
@@ -99,11 +110,12 @@ require "llm"
99
110
 
100
111
  llm = LLM.openai(key: ENV["KEY"])
101
112
  bot = LLM::Bot.new(llm)
113
+ url = "https://upload.wikimedia.org/wikipedia/commons/thumb/9/9a/Cognac_glass.jpg/500px-Cognac_glass.jpg"
102
114
  msgs = bot.chat do |prompt|
103
- prompt.system File.read("./share/llm/prompts/system.txt")
104
- prompt.user "Tell me the answer to 5 + 15"
105
- prompt.user "Tell me the answer to (5 + 15) * 2"
106
- prompt.user "Tell me the answer to ((5 + 15) * 2) / 10"
115
+ prompt.system "Your task is to answer all user queries"
116
+ prompt.user ["Tell me about this URL", URI(url)]
117
+ prompt.user ["Tell me about this pdf", File.open("spec/fixtures/documents/freebsd.sysctl.pdf", "rb")]
118
+ prompt.user "Is the URL and PDF similar to each other?"
107
119
  end
108
120
 
109
121
  # At this point, we execute a single request
@@ -118,15 +130,11 @@ msgs.each { print "[#{_1.role}] ", _1.content, "\n" }
118
130
  > [docs/](docs/STREAMING.md#scopes) for more details.
119
131
 
120
132
  The following example streams the messages in a conversation
121
- as they are generated in real-time. This feature can be useful
122
- when you want to stream a conversation in real time, or when you
123
- want to avoid potential read timeouts during the generation of a
124
- response.
125
-
126
- The `stream` option can be set to an IO object, or the value `true`
127
- to enable streaming &ndash; and at the end of the request, `bot.chat`
128
- returns the same response as the non-streaming version which allows
129
- you to process a response in the same way:
133
+ as they are generated in real-time. The `stream` option can
134
+ be set to an IO object, or the value `true` to enable streaming
135
+ &ndash; and at the end of the request, `bot.chat` returns the
136
+ same response as the non-streaming version which allows you
137
+ to process a response in the same way:
130
138
 
131
139
  ```ruby
132
140
  #!/usr/bin/env ruby
@@ -134,11 +142,12 @@ require "llm"
134
142
 
135
143
  llm = LLM.openai(key: ENV["KEY"])
136
144
  bot = LLM::Bot.new(llm)
145
+ url = "https://upload.wikimedia.org/wikipedia/commons/thumb/9/9a/Cognac_glass.jpg/500px-Cognac_glass.jpg"
137
146
  bot.chat(stream: $stdout) do |prompt|
138
- prompt.system "You are my math assistant."
139
- prompt.user "Tell me the answer to 5 + 15"
140
- prompt.user "Tell me the answer to (5 + 15) * 2"
141
- prompt.user "Tell me the answer to ((5 + 15) * 2) / 10"
147
+ prompt.system "Your task is to answer all user queries"
148
+ prompt.user ["Tell me about this URL", URI(url)]
149
+ prompt.user ["Tell me about this pdf", File.open("spec/fixtures/documents/freebsd.sysctl.pdf", "rb")]
150
+ prompt.user "Is the URL and PDF similar to each other?"
142
151
  end.to_a
143
152
  ```
144
153
 
@@ -158,7 +167,7 @@ require "llm"
158
167
  ##
159
168
  # Objects
160
169
  llm = LLM.openai(key: ENV["KEY"])
161
- schema = llm.schema.object(answer: llm.schema.integer.required)
170
+ schema = llm.schema.object(probability: llm.schema.integer.required)
162
171
  bot = LLM::Bot.new(llm, schema:)
163
172
  bot.chat "Does the earth orbit the sun?", role: :user
164
173
  bot.messages.find(&:assistant?).content! # => {probability: 1}
@@ -197,11 +206,7 @@ The
197
206
  method returns an array of functions that can be called after sending a message and
198
207
  it will only be populated if the LLM detects a function should be called. Each function
199
208
  corresponds to an element in the "tools" array. The array is emptied after a function call,
200
- and potentially repopulated on the next message.
201
-
202
- The following example defines an agent that can run system commands based on natural language,
203
- and it is only intended to be a fun demo of tool calling - it is not recommended to run
204
- arbitrary commands from a LLM without sanitizing the input first :) Without further ado:
209
+ and potentially repopulated on the next message:
205
210
 
206
211
  ```ruby
207
212
  #!/usr/bin/env ruby
@@ -213,10 +218,10 @@ tool = LLM.function(:system) do |fn|
213
218
  fn.params do |schema|
214
219
  schema.object(command: schema.string.required)
215
220
  end
216
- fn.define do |params|
221
+ fn.define do |command:|
217
222
  ro, wo = IO.pipe
218
223
  re, we = IO.pipe
219
- Process.wait Process.spawn(params.command, out: wo, err: we)
224
+ Process.wait Process.spawn(command, out: wo, err: we)
220
225
  [wo,we].each(&:close)
221
226
  {stderr: re.read, stdout: ro.read}
222
227
  end
@@ -236,6 +241,60 @@ bot.chat bot.functions.map(&:call) # report return value to the LLM
236
241
  # {stderr: "", stdout: "FreeBSD"}
237
242
  ```
238
243
 
244
+ ### Files
245
+
246
+ #### Create
247
+
248
+ The OpenAI and Gemini providers provide a Files API where a client can upload files
249
+ that can be referenced from a prompt, and with other APIs as well. The following
250
+ example uses the OpenAI provider to describe the contents of a PDF file after
251
+ it has been uploaded. The file (a specialized instance of
252
+ [LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
253
+ ) is given as part of a prompt that is understood by llm.rb:
254
+
255
+ ```ruby
256
+ #!/usr/bin/env ruby
257
+ require "llm"
258
+
259
+ llm = LLM.openai(key: ENV["KEY"])
260
+ bot = LLM::Bot.new(llm)
261
+ file = llm.files.create(file: "/books/goodread.pdf")
262
+ bot.chat(["Tell me about this file", file])
263
+ bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
264
+ ```
265
+
266
+ ### Prompts
267
+
268
+ #### Multimodal
269
+
270
+ It is generally a given that an LLM will understand text but they can also
271
+ understand and generate other types of media as well: audio, images, video,
272
+ and even URLs. The object given as a prompt in llm.rb can be a string to
273
+ represent text, a URI object to represent a URL, an LLM::Response object
274
+ to represent a file stored with the LLM, and so on. These are objects you
275
+ can throw at the prompt and have them be understood automatically.
276
+
277
+ A prompt can also have multiple parts, and in that case, an array is given
278
+ as a prompt. Each element is considered to be part of the prompt:
279
+
280
+ ```ruby
281
+ #!/usr/bin/env ruby
282
+ require "llm"
283
+
284
+ llm = LLM.openai(key: ENV["KEY"])
285
+ bot = LLM::Bot.new(llm)
286
+
287
+ bot.chat ["Tell me about this URL", URI("https://example.com/path/to/image.png")]
288
+ [bot.messages.find(&:assistant?)].each { print "[#{_1.role}] ", _1.content, "\n" }
289
+
290
+ file = llm.files.create(file: "/books/goodread.pdf")
291
+ bot.chat ["Tell me about this PDF", file]
292
+ [bot.messages.find(&:assistant?)].each { print "[#{_1.role}] ", _1.content, "\n" }
293
+
294
+ bot.chat ["Tell me about this image", File.open("/images/nemothefish.png", "r")]
295
+ [bot.messages.find(&:assistant?)].each { print "[#{_1.role}] ", _1.content, "\n" }
296
+ ```
297
+
239
298
  ### Audio
240
299
 
241
300
  #### Speech
@@ -363,71 +422,6 @@ res.urls.each.with_index do |url, index|
363
422
  end
364
423
  ```
365
424
 
366
- ### Files
367
-
368
- #### Create
369
-
370
- Most LLM providers provide a Files API where you can upload files
371
- that can be referenced from a prompt and llm.rb has first-class support
372
- for this feature. The following example uses the OpenAI provider to describe
373
- the contents of a PDF file after it has been uploaded. The file (an instance
374
- of [LLM::Response::File](https://0x1eef.github.io/x/llm.rb/LLM/Response/File.html))
375
- is passed directly to the chat method, and generally any object a prompt supports
376
- can be given to the chat method:
377
-
378
-
379
- ```ruby
380
- #!/usr/bin/env ruby
381
- require "llm"
382
-
383
- llm = LLM.openai(key: ENV["KEY"])
384
- bot = LLM::Bot.new(llm)
385
- file = llm.files.create(file: "/documents/openbsd_is_awesome.pdf")
386
- bot.chat(file)
387
- bot.chat("What is this file about?")
388
- bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
389
-
390
- ##
391
- # [assistant] This file is about OpenBSD, a free and open-source Unix-like operating system
392
- # based on the Berkeley Software Distribution (BSD). It is known for its
393
- # emphasis on security, code correctness, and code simplicity. The file
394
- # contains information about the features, installation, and usage of OpenBSD.
395
- ```
396
-
397
- ### Prompts
398
-
399
- #### Multimodal
400
-
401
- Generally all providers accept text prompts but some providers can
402
- also understand URLs, and various file types (eg images, audio, video,
403
- etc). The llm.rb approach to multimodal prompts is to let you pass `URI`
404
- objects to describe links, `LLM::File` | `LLM::Response::File` objects
405
- to describe files, `String` objects to describe text blobs, or an array
406
- of the aforementioned objects to describe multiple objects in a single
407
- prompt. Each object is a first class citizen that can be passed directly
408
- to a prompt:
409
-
410
- ```ruby
411
- #!/usr/bin/env ruby
412
- require "llm"
413
-
414
- llm = LLM.openai(key: ENV["KEY"])
415
- bot = LLM::Bot.new(llm)
416
-
417
- bot.chat [URI("https://example.com/path/to/image.png"), "Describe the image in the link"]
418
- bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
419
-
420
- file = llm.files.create(file: "/documents/openbsd_is_awesome.pdf")
421
- bot.chat [file, "What is this file about?"]
422
- bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
423
-
424
- bot.chat [LLM.File("/images/puffy.png"), "What is this image about?"]
425
- bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
426
-
427
- bot.chat [LLM.File("/images/beastie.png"), "What is this image about?"]
428
- bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
429
- ```
430
-
431
425
  ### Embeddings
432
426
 
433
427
  #### Text
@@ -453,7 +447,7 @@ print res.embeddings.size, "\n"
453
447
  print res.embeddings[0].size, "\n"
454
448
 
455
449
  ##
456
- # LLM::Response::Embedding
450
+ # LLM::Response
457
451
  # 3
458
452
  # 1536
459
453
  ```
@@ -499,8 +493,15 @@ over or doesn't cover at all. The API reference is available at
499
493
 
500
494
  ### Guides
501
495
 
502
- The [docs/](docs/) directory contains some additional documentation that
503
- didn't quite make it into the README.
496
+ * [An introduction to RAG with llm.rb](https://0x1eef.github.io/posts/an-introduction-to-rag-with-llm.rb/) &ndash;
497
+ a blog post that implements the RAG pattern in 32 lines of Ruby code
498
+ * [docs/](docs/) &ndash; the docs directory contains additional guides
499
+
500
+
501
+ ## See also
502
+
503
+ * [llm-shell](https://github.com/llmrb/llm-shell) &ndash; a shell that uses llm.rb to
504
+ provide a command-line interface to LLMs.
504
505
 
505
506
  ## Install
506
507
 
@@ -9,7 +9,7 @@ class LLM::Bot
9
9
  ##
10
10
  # @param [String] prompt The prompt
11
11
  # @param [Hash] params
12
- # @return [LLM::Response::Respond]
12
+ # @return [LLM::Response]
13
13
  def create_response!(prompt, params)
14
14
  @provider.responses.create(
15
15
  prompt,
@@ -20,7 +20,7 @@ class LLM::Bot
20
20
  ##
21
21
  # @param [String] prompt The prompt
22
22
  # @param [Hash] params
23
- # @return [LLM::Response::Completion]
23
+ # @return [LLM::Response]
24
24
  def create_completion!(prompt, params)
25
25
  @provider.complete(
26
26
  prompt,
data/lib/llm/bot.rb CHANGED
@@ -2,36 +2,27 @@
2
2
 
3
3
  module LLM
4
4
  ##
5
- # {LLM::Bot LLM::Bot} provides a bot object that can maintain a
6
- # a conversation. A conversation can use the chat completions API
7
- # that all LLM providers support or the responses API that a select
8
- # few LLM providers support.
5
+ # {LLM::Bot LLM::Bot} provides an object that can maintain a
6
+ # conversation. A conversation can use the chat completions API
7
+ # that all LLM providers support or the responses API that currently
8
+ # only OpenAI supports.
9
9
  #
10
- # @example example #1
10
+ # @example
11
11
  # #!/usr/bin/env ruby
12
12
  # require "llm"
13
13
  #
14
- # llm = LLM.openai(ENV["KEY"])
14
+ # llm = LLM.openai(key: ENV["KEY"])
15
15
  # bot = LLM::Bot.new(llm)
16
+ # url = "https://upload.wikimedia.org/wikipedia/commons/thumb/9/9a/Cognac_glass.jpg/500px-Cognac_glass.jpg"
16
17
  # msgs = bot.chat do |prompt|
17
- # prompt.system "Answer the following questions."
18
- # prompt.user "What is 5 + 7 ?"
19
- # prompt.user "Why is the sky blue ?"
20
- # prompt.user "Why did the chicken cross the road ?"
18
+ # prompt.system "Your task is to answer all user queries"
19
+ # prompt.user ["Tell me about this URL", URI(url)]
20
+ # prompt.user ["Tell me about this pdf", File.open("freebsd_book.pdf", "rb")]
21
+ # prompt.user "Is the URL and PDF similar to each other?"
21
22
  # end
22
- # msgs.each { print "[#{_1.role}]", _1.content, "\n" }
23
23
  #
24
- # @example example #2
25
- # #!/usr/bin/env ruby
26
- # require "llm"
27
- #
28
- # llm = LLM.openai(ENV["KEY"])
29
- # bot = LLM::Bot.new(llm)
30
- # bot.chat "Answer the following questions.", role: :system
31
- # bot.chat "What is 5 + 7 ?", role: :user
32
- # bot.chat "Why is the sky blue ?", role: :user
33
- # bot.chat "Why did the chicken cross the road ?", role: :user
34
- # bot.messages.each { print "[#{_1.role}]", _1.content, "\n" }
24
+ # # At this point, we execute a single request
25
+ # msgs.each { print "[#{_1.role}] ", _1.content, "\n" }
35
26
  class Bot
36
27
  require_relative "bot/prompt/completion"
37
28
  require_relative "bot/prompt/respond"
data/lib/llm/buffer.rb CHANGED
@@ -47,6 +47,13 @@ module LLM
47
47
  reverse_each.find(...)
48
48
  end
49
49
 
50
+ ##
51
+ # Returns the last message in the buffer
52
+ # @return [LLM::Message, nil]
53
+ def last
54
+ to_a[-1]
55
+ end
56
+
50
57
  ##
51
58
  # @param [[LLM::Message, Hash]] item
52
59
  # A message and its parameters
data/lib/llm/file.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  ##
4
- # The {LLM::File LLM::File} class represents a local file. It can
5
- # be used as a prompt with certain providers (eg: Ollama, Gemini),
6
- # and as an input with certain methods
4
+ # {LLM::File LLM::File} represents a local file. It can be used
5
+ # as a prompt with certain providers (eg: Ollama, Gemini),
6
+ # and as an input with certain methods. It is usually not necessary
7
+ # to create an instance of LLM::File directly.
7
8
  class LLM::File
8
9
  ##
9
10
  # @return [String]
@@ -29,12 +30,19 @@ class LLM::File
29
30
  end
30
31
 
31
32
  ##
32
- # @return [String]
33
+ # @return [Boolean]
33
34
  # Returns true if the file is an image
34
35
  def image?
35
36
  mime_type.start_with?("image/")
36
37
  end
37
38
 
39
+ ##
40
+ # @return [Boolean]
41
+ # Returns true if the file is a PDF document
42
+ def pdf?
43
+ mime_type == "application/pdf"
44
+ end
45
+
38
46
  ##
39
47
  # @return [Integer]
40
48
  # Returns the size of the file in bytes
@@ -68,14 +76,16 @@ class LLM::File
68
76
  end
69
77
 
70
78
  ##
71
- # @param [String] path
72
- # The path to a file
79
+ # @param [String, File, LLM::Response] obj
80
+ # The path to the file, or an existing file reference
73
81
  # @return [LLM::File]
74
- def LLM.File(path)
75
- case path
76
- when LLM::File, LLM::Response::File
77
- path
78
- else
79
- LLM::File.new(path)
82
+ def LLM.File(obj)
83
+ case obj
84
+ when File
85
+ obj.close unless obj.closed?
86
+ LLM.File(obj.path)
87
+ when LLM::File, LLM::Response then obj
88
+ when String then LLM::File.new(obj)
89
+ else raise TypeError, "don't know how to handle #{obj.class} objects"
80
90
  end
81
91
  end
data/lib/llm/function.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  ##
4
- # The {LLM::Function LLM::Function} class represents a
5
- # local function that can be called by an LLM.
4
+ # The {LLM::Function LLM::Function} class represents a local
5
+ # function that can be called by an LLM.
6
6
  #
7
7
  # @example example #1
8
8
  # LLM.function(:system) do |fn|
@@ -10,15 +10,15 @@
10
10
  # fn.params do |schema|
11
11
  # schema.object(command: schema.string.required)
12
12
  # end
13
- # fn.define do |params|
14
- # {success: Kernel.system(params.command)}
13
+ # fn.define do |command:|
14
+ # {success: Kernel.system(command)}
15
15
  # end
16
16
  # end
17
17
  #
18
18
  # @example example #2
19
19
  # class System
20
- # def call(params)
21
- # {success: Kernel.system(params.command)}
20
+ # def call(command:)
21
+ # {success: Kernel.system(command)}
22
22
  # end
23
23
  # end
24
24
  #
@@ -91,7 +91,8 @@ class LLM::Function
91
91
  # Call the function
92
92
  # @return [LLM::Function::Return] The result of the function call
93
93
  def call
94
- Return.new id, (Class === @runner) ? @runner.new.call(arguments) : @runner.call(arguments)
94
+ runner = ((Class === @runner) ? @runner.new : @runner)
95
+ Return.new(id, runner.call(**arguments))
95
96
  ensure
96
97
  @called = true
97
98
  end
data/lib/llm/message.rb CHANGED
@@ -109,6 +109,14 @@ module LLM
109
109
  tool_calls.any?
110
110
  end
111
111
 
112
+ ##
113
+ # @return [Boolean]
114
+ # Returns true when the message represents a function return
115
+ def tool_return?
116
+ LLM::Function::Return === content ||
117
+ [*content].grep(LLM::Function::Return).any?
118
+ end
119
+
112
120
  ##
113
121
  # Returns a string representation of the message
114
122
  # @return [String]
data/lib/llm/multipart.rb CHANGED
@@ -4,7 +4,6 @@
4
4
  ##
5
5
  # @private
6
6
  class LLM::Multipart
7
- require "llm"
8
7
  require "securerandom"
9
8
 
10
9
  ##
@@ -12,6 +12,10 @@ class LLM::Object
12
12
  ::Kernel.instance_method(:instance_of?).bind(self).call(...)
13
13
  end
14
14
 
15
+ def extend(...)
16
+ ::Kernel.instance_method(:extend).bind(self).call(...)
17
+ end
18
+
15
19
  def method(...)
16
20
  ::Kernel.instance_method(:method).bind(self).call(...)
17
21
  end
@@ -41,5 +45,9 @@ class LLM::Object
41
45
  "#<#{self.class}:0x#{object_id.to_s(16)} properties=#{to_h.inspect}>"
42
46
  end
43
47
  alias_method :to_s, :inspect
48
+
49
+ def pretty_print(q)
50
+ q.text(inspect)
51
+ end
44
52
  end
45
53
  end
data/lib/llm/object.rb CHANGED
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  ##
4
- # The {LLM::Object LLM::Object} class encapsulates a Hash object, and it
5
- # allows a consumer to get and set Hash keys via regular methods. It is
4
+ # The {LLM::Object LLM::Object} class encapsulates a Hash object. It is
6
5
  # similar in spirit to OpenStruct, and it was introduced after OpenStruct
7
- # became a bundled gem (and not a default gem) in Ruby 3.5.
6
+ # became a bundled gem rather than a default gem in Ruby 3.5.
8
7
  class LLM::Object < BasicObject
9
8
  require_relative "object/builder"
10
9
  require_relative "object/kernel"
@@ -62,6 +61,13 @@ class LLM::Object < BasicObject
62
61
  def to_h
63
62
  @h
64
63
  end
64
+ alias_method :to_hash, :to_h
65
+
66
+ ##
67
+ # @return [Object, nil]
68
+ def dig(...)
69
+ to_h.dig(...)
70
+ end
65
71
 
66
72
  private
67
73