llm.rb 4.8.0 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +356 -583
  3. data/data/anthropic.json +770 -0
  4. data/data/deepseek.json +75 -0
  5. data/data/google.json +1050 -0
  6. data/data/openai.json +1421 -0
  7. data/data/xai.json +792 -0
  8. data/data/zai.json +330 -0
  9. data/lib/llm/agent.rb +42 -41
  10. data/lib/llm/bot.rb +1 -263
  11. data/lib/llm/buffer.rb +7 -0
  12. data/lib/llm/{session → context}/deserializer.rb +4 -3
  13. data/lib/llm/context.rb +292 -0
  14. data/lib/llm/cost.rb +26 -0
  15. data/lib/llm/error.rb +8 -0
  16. data/lib/llm/function/array.rb +61 -0
  17. data/lib/llm/function/fiber_group.rb +91 -0
  18. data/lib/llm/function/task_group.rb +89 -0
  19. data/lib/llm/function/thread_group.rb +94 -0
  20. data/lib/llm/function.rb +75 -10
  21. data/lib/llm/mcp/command.rb +108 -0
  22. data/lib/llm/mcp/error.rb +31 -0
  23. data/lib/llm/mcp/pipe.rb +82 -0
  24. data/lib/llm/mcp/rpc.rb +118 -0
  25. data/lib/llm/mcp/transport/http/event_handler.rb +66 -0
  26. data/lib/llm/mcp/transport/http.rb +122 -0
  27. data/lib/llm/mcp/transport/stdio.rb +85 -0
  28. data/lib/llm/mcp.rb +116 -0
  29. data/lib/llm/message.rb +13 -11
  30. data/lib/llm/model.rb +2 -2
  31. data/lib/llm/prompt.rb +17 -7
  32. data/lib/llm/provider.rb +32 -17
  33. data/lib/llm/providers/anthropic/files.rb +3 -3
  34. data/lib/llm/providers/anthropic.rb +19 -4
  35. data/lib/llm/providers/deepseek.rb +10 -3
  36. data/lib/llm/providers/{gemini → google}/audio.rb +6 -6
  37. data/lib/llm/providers/{gemini → google}/error_handler.rb +2 -2
  38. data/lib/llm/providers/{gemini → google}/files.rb +11 -11
  39. data/lib/llm/providers/{gemini → google}/images.rb +7 -7
  40. data/lib/llm/providers/{gemini → google}/models.rb +5 -5
  41. data/lib/llm/providers/{gemini → google}/request_adapter/completion.rb +7 -3
  42. data/lib/llm/providers/{gemini → google}/request_adapter.rb +1 -1
  43. data/lib/llm/providers/{gemini → google}/response_adapter/completion.rb +7 -7
  44. data/lib/llm/providers/{gemini → google}/response_adapter/embedding.rb +1 -1
  45. data/lib/llm/providers/{gemini → google}/response_adapter/file.rb +1 -1
  46. data/lib/llm/providers/{gemini → google}/response_adapter/files.rb +1 -1
  47. data/lib/llm/providers/{gemini → google}/response_adapter/image.rb +1 -1
  48. data/lib/llm/providers/{gemini → google}/response_adapter/models.rb +1 -1
  49. data/lib/llm/providers/{gemini → google}/response_adapter/web_search.rb +2 -2
  50. data/lib/llm/providers/{gemini → google}/response_adapter.rb +8 -8
  51. data/lib/llm/providers/{gemini → google}/stream_parser.rb +3 -3
  52. data/lib/llm/providers/{gemini.rb → google.rb} +41 -26
  53. data/lib/llm/providers/llamacpp.rb +10 -3
  54. data/lib/llm/providers/ollama.rb +19 -4
  55. data/lib/llm/providers/openai/files.rb +3 -3
  56. data/lib/llm/providers/openai/response_adapter/completion.rb +9 -1
  57. data/lib/llm/providers/openai/response_adapter/responds.rb +9 -1
  58. data/lib/llm/providers/openai/responses.rb +9 -1
  59. data/lib/llm/providers/openai/stream_parser.rb +2 -0
  60. data/lib/llm/providers/openai.rb +19 -4
  61. data/lib/llm/providers/xai.rb +10 -3
  62. data/lib/llm/providers/zai.rb +9 -2
  63. data/lib/llm/registry.rb +81 -0
  64. data/lib/llm/schema/all_of.rb +31 -0
  65. data/lib/llm/schema/any_of.rb +31 -0
  66. data/lib/llm/schema/one_of.rb +31 -0
  67. data/lib/llm/schema/parser.rb +145 -0
  68. data/lib/llm/schema.rb +49 -8
  69. data/lib/llm/server_tool.rb +5 -5
  70. data/lib/llm/session.rb +10 -1
  71. data/lib/llm/tool.rb +88 -6
  72. data/lib/llm/tracer/logger.rb +1 -1
  73. data/lib/llm/tracer/telemetry.rb +7 -7
  74. data/lib/llm/tracer.rb +3 -3
  75. data/lib/llm/usage.rb +5 -0
  76. data/lib/llm/version.rb +1 -1
  77. data/lib/llm.rb +39 -6
  78. data/llm.gemspec +45 -8
  79. metadata +86 -28
data/data/zai.json ADDED
@@ -0,0 +1,330 @@
1
+ {
2
+ "id": "zai",
3
+ "env": [
4
+ "ZHIPU_API_KEY"
5
+ ],
6
+ "npm": "@ai-sdk/openai-compatible",
7
+ "api": "https://api.z.ai/api/paas/v4",
8
+ "name": "Z.AI",
9
+ "doc": "https://docs.z.ai/guides/overview/pricing",
10
+ "models": {
11
+ "glm-5": {
12
+ "id": "glm-5",
13
+ "name": "GLM-5",
14
+ "family": "glm",
15
+ "attachment": false,
16
+ "reasoning": true,
17
+ "tool_call": true,
18
+ "interleaved": {
19
+ "field": "reasoning_content"
20
+ },
21
+ "temperature": true,
22
+ "release_date": "2026-02-11",
23
+ "last_updated": "2026-02-11",
24
+ "modalities": {
25
+ "input": [
26
+ "text"
27
+ ],
28
+ "output": [
29
+ "text"
30
+ ]
31
+ },
32
+ "open_weights": true,
33
+ "cost": {
34
+ "input": 1,
35
+ "output": 3.2,
36
+ "cache_read": 0.2,
37
+ "cache_write": 0
38
+ },
39
+ "limit": {
40
+ "context": 204800,
41
+ "output": 131072
42
+ }
43
+ },
44
+ "glm-4.5-air": {
45
+ "id": "glm-4.5-air",
46
+ "name": "GLM-4.5-Air",
47
+ "family": "glm-air",
48
+ "attachment": false,
49
+ "reasoning": true,
50
+ "tool_call": true,
51
+ "temperature": true,
52
+ "knowledge": "2025-04",
53
+ "release_date": "2025-07-28",
54
+ "last_updated": "2025-07-28",
55
+ "modalities": {
56
+ "input": [
57
+ "text"
58
+ ],
59
+ "output": [
60
+ "text"
61
+ ]
62
+ },
63
+ "open_weights": true,
64
+ "cost": {
65
+ "input": 0.2,
66
+ "output": 1.1,
67
+ "cache_read": 0.03,
68
+ "cache_write": 0
69
+ },
70
+ "limit": {
71
+ "context": 131072,
72
+ "output": 98304
73
+ }
74
+ },
75
+ "glm-4.5": {
76
+ "id": "glm-4.5",
77
+ "name": "GLM-4.5",
78
+ "family": "glm",
79
+ "attachment": false,
80
+ "reasoning": true,
81
+ "tool_call": true,
82
+ "temperature": true,
83
+ "knowledge": "2025-04",
84
+ "release_date": "2025-07-28",
85
+ "last_updated": "2025-07-28",
86
+ "modalities": {
87
+ "input": [
88
+ "text"
89
+ ],
90
+ "output": [
91
+ "text"
92
+ ]
93
+ },
94
+ "open_weights": true,
95
+ "cost": {
96
+ "input": 0.6,
97
+ "output": 2.2,
98
+ "cache_read": 0.11,
99
+ "cache_write": 0
100
+ },
101
+ "limit": {
102
+ "context": 131072,
103
+ "output": 98304
104
+ }
105
+ },
106
+ "glm-4.5-flash": {
107
+ "id": "glm-4.5-flash",
108
+ "name": "GLM-4.5-Flash",
109
+ "family": "glm-flash",
110
+ "attachment": false,
111
+ "reasoning": true,
112
+ "tool_call": true,
113
+ "temperature": true,
114
+ "knowledge": "2025-04",
115
+ "release_date": "2025-07-28",
116
+ "last_updated": "2025-07-28",
117
+ "modalities": {
118
+ "input": [
119
+ "text"
120
+ ],
121
+ "output": [
122
+ "text"
123
+ ]
124
+ },
125
+ "open_weights": true,
126
+ "cost": {
127
+ "input": 0,
128
+ "output": 0,
129
+ "cache_read": 0,
130
+ "cache_write": 0
131
+ },
132
+ "limit": {
133
+ "context": 131072,
134
+ "output": 98304
135
+ }
136
+ },
137
+ "glm-4.7-flash": {
138
+ "id": "glm-4.7-flash",
139
+ "name": "GLM-4.7-Flash",
140
+ "family": "glm-flash",
141
+ "attachment": false,
142
+ "reasoning": true,
143
+ "tool_call": true,
144
+ "temperature": true,
145
+ "knowledge": "2025-04",
146
+ "release_date": "2026-01-19",
147
+ "last_updated": "2026-01-19",
148
+ "modalities": {
149
+ "input": [
150
+ "text"
151
+ ],
152
+ "output": [
153
+ "text"
154
+ ]
155
+ },
156
+ "open_weights": true,
157
+ "cost": {
158
+ "input": 0,
159
+ "output": 0,
160
+ "cache_read": 0,
161
+ "cache_write": 0
162
+ },
163
+ "limit": {
164
+ "context": 200000,
165
+ "output": 131072
166
+ }
167
+ },
168
+ "glm-4.6": {
169
+ "id": "glm-4.6",
170
+ "name": "GLM-4.6",
171
+ "family": "glm",
172
+ "attachment": false,
173
+ "reasoning": true,
174
+ "tool_call": true,
175
+ "temperature": true,
176
+ "knowledge": "2025-04",
177
+ "release_date": "2025-09-30",
178
+ "last_updated": "2025-09-30",
179
+ "modalities": {
180
+ "input": [
181
+ "text"
182
+ ],
183
+ "output": [
184
+ "text"
185
+ ]
186
+ },
187
+ "open_weights": true,
188
+ "cost": {
189
+ "input": 0.6,
190
+ "output": 2.2,
191
+ "cache_read": 0.11,
192
+ "cache_write": 0
193
+ },
194
+ "limit": {
195
+ "context": 204800,
196
+ "output": 131072
197
+ }
198
+ },
199
+ "glm-4.7": {
200
+ "id": "glm-4.7",
201
+ "name": "GLM-4.7",
202
+ "family": "glm",
203
+ "attachment": false,
204
+ "reasoning": true,
205
+ "tool_call": true,
206
+ "interleaved": {
207
+ "field": "reasoning_content"
208
+ },
209
+ "temperature": true,
210
+ "knowledge": "2025-04",
211
+ "release_date": "2025-12-22",
212
+ "last_updated": "2025-12-22",
213
+ "modalities": {
214
+ "input": [
215
+ "text"
216
+ ],
217
+ "output": [
218
+ "text"
219
+ ]
220
+ },
221
+ "open_weights": true,
222
+ "cost": {
223
+ "input": 0.6,
224
+ "output": 2.2,
225
+ "cache_read": 0.11,
226
+ "cache_write": 0
227
+ },
228
+ "limit": {
229
+ "context": 204800,
230
+ "output": 131072
231
+ }
232
+ },
233
+ "glm-5-turbo": {
234
+ "id": "glm-5-turbo",
235
+ "name": "GLM-5-Turbo",
236
+ "family": "glm",
237
+ "attachment": false,
238
+ "reasoning": true,
239
+ "tool_call": true,
240
+ "interleaved": {
241
+ "field": "reasoning_content"
242
+ },
243
+ "structured_output": true,
244
+ "temperature": true,
245
+ "release_date": "2026-03-16",
246
+ "last_updated": "2026-03-16",
247
+ "modalities": {
248
+ "input": [
249
+ "text"
250
+ ],
251
+ "output": [
252
+ "text"
253
+ ]
254
+ },
255
+ "open_weights": false,
256
+ "cost": {
257
+ "input": 1.2,
258
+ "output": 4,
259
+ "cache_read": 0.24,
260
+ "cache_write": 0
261
+ },
262
+ "limit": {
263
+ "context": 200000,
264
+ "output": 131072
265
+ }
266
+ },
267
+ "glm-4.5v": {
268
+ "id": "glm-4.5v",
269
+ "name": "GLM-4.5V",
270
+ "family": "glm",
271
+ "attachment": true,
272
+ "reasoning": true,
273
+ "tool_call": true,
274
+ "temperature": true,
275
+ "knowledge": "2025-04",
276
+ "release_date": "2025-08-11",
277
+ "last_updated": "2025-08-11",
278
+ "modalities": {
279
+ "input": [
280
+ "text",
281
+ "image",
282
+ "video"
283
+ ],
284
+ "output": [
285
+ "text"
286
+ ]
287
+ },
288
+ "open_weights": true,
289
+ "cost": {
290
+ "input": 0.6,
291
+ "output": 1.8
292
+ },
293
+ "limit": {
294
+ "context": 64000,
295
+ "output": 16384
296
+ }
297
+ },
298
+ "glm-4.6v": {
299
+ "id": "glm-4.6v",
300
+ "name": "GLM-4.6V",
301
+ "family": "glm",
302
+ "attachment": true,
303
+ "reasoning": true,
304
+ "tool_call": true,
305
+ "temperature": true,
306
+ "knowledge": "2025-04",
307
+ "release_date": "2025-12-08",
308
+ "last_updated": "2025-12-08",
309
+ "modalities": {
310
+ "input": [
311
+ "text",
312
+ "image",
313
+ "video"
314
+ ],
315
+ "output": [
316
+ "text"
317
+ ]
318
+ },
319
+ "open_weights": true,
320
+ "cost": {
321
+ "input": 0.3,
322
+ "output": 0.9
323
+ },
324
+ "limit": {
325
+ "context": 128000,
326
+ "output": 32768
327
+ }
328
+ }
329
+ }
330
+ }
data/lib/llm/agent.rb CHANGED
@@ -8,7 +8,7 @@ module LLM
8
8
  #
9
9
  # **Notes:**
10
10
  # * Instructions are injected only on the first request.
11
- # * An agent will automatically execute tool calls (unlike {LLM::Session LLM::Session}).
11
+ # * An agent will automatically execute tool calls (unlike {LLM::Context LLM::Context}).
12
12
  # * The idea originally came from RubyLLM and was adapted to llm.rb.
13
13
  #
14
14
  # @example
@@ -23,6 +23,11 @@ module LLM
23
23
  # agent = SystemAdmin.new(llm)
24
24
  # agent.talk("Run 'date'")
25
25
  class Agent
26
+ ##
27
+ # Returns a provider
28
+ # @return [LLM::Provider]
29
+ attr_reader :llm
30
+
26
31
  ##
27
32
  # Set or get the default model
28
33
  # @param [String, nil] model
@@ -77,11 +82,10 @@ module LLM
77
82
  # @option params [String] :model Defaults to the provider's default model
78
83
  # @option params [Array<LLM::Function>, nil] :tools Defaults to nil
79
84
  # @option params [#to_json, nil] :schema Defaults to nil
80
- def initialize(provider, params = {})
85
+ def initialize(llm, params = {})
81
86
  defaults = {model: self.class.model, tools: self.class.tools, schema: self.class.schema}.compact
82
- @provider = provider
83
- @ses = LLM::Session.new(provider, defaults.merge(params))
84
- @instructions_applied = false
87
+ @llm = llm
88
+ @ctx = LLM::Context.new(llm, defaults.merge(params))
85
89
  end
86
90
 
87
91
  ##
@@ -99,13 +103,12 @@ module LLM
99
103
  # puts response.choices[0].content
100
104
  def talk(prompt, params = {})
101
105
  i, max = 0, Integer(params.delete(:max_tool_rounds) || 10)
102
- res = @ses.talk(apply_instructions(prompt), params)
103
- until @ses.functions.empty?
106
+ res = @ctx.talk(apply_instructions(prompt), params)
107
+ until @ctx.functions.empty?
104
108
  raise LLM::ToolLoopError, "pending tool calls remain" if i >= max
105
- res = @ses.talk @ses.functions.map(&:call), params
109
+ res = @ctx.talk @ctx.functions.map(&:call), params
106
110
  i += 1
107
111
  end
108
- @instructions_applied = true
109
112
  res
110
113
  end
111
114
  alias_method :chat, :talk
@@ -126,40 +129,39 @@ module LLM
126
129
  # puts res.output_text
127
130
  def respond(prompt, params = {})
128
131
  i, max = 0, Integer(params.delete(:max_tool_rounds) || 10)
129
- res = @ses.respond(apply_instructions(prompt), params)
130
- until @ses.functions.empty?
132
+ res = @ctx.respond(apply_instructions(prompt), params)
133
+ until @ctx.functions.empty?
131
134
  raise LLM::ToolLoopError, "pending tool calls remain" if i >= max
132
- res = @ses.respond @ses.functions.map(&:call), params
135
+ res = @ctx.respond @ctx.functions.map(&:call), params
133
136
  i += 1
134
137
  end
135
- @instructions_applied = true
136
138
  res
137
139
  end
138
140
 
139
141
  ##
140
142
  # @return [LLM::Buffer<LLM::Message>]
141
143
  def messages
142
- @ses.messages
144
+ @ctx.messages
143
145
  end
144
146
 
145
147
  ##
146
148
  # @return [Array<LLM::Function>]
147
149
  def functions
148
- @ses.functions
150
+ @ctx.functions
149
151
  end
150
152
 
151
153
  ##
152
154
  # @return [LLM::Object]
153
155
  def usage
154
- @ses.usage
156
+ @ctx.usage
155
157
  end
156
158
 
157
159
  ##
158
- # @param (see LLM::Session#prompt)
159
- # @return (see LLM::Session#prompt)
160
- # @see LLM::Session#prompt
160
+ # @param (see LLM::Context#prompt)
161
+ # @return (see LLM::Context#prompt)
162
+ # @see LLM::Context#prompt
161
163
  def prompt(&b)
162
- @ses.prompt(&b)
164
+ @ctx.prompt(&b)
163
165
  end
164
166
  alias_method :build_prompt, :prompt
165
167
 
@@ -169,7 +171,7 @@ module LLM
169
171
  # @return [LLM::Object]
170
172
  # Returns a tagged object
171
173
  def image_url(url)
172
- @ses.image_url(url)
174
+ @ctx.image_url(url)
173
175
  end
174
176
 
175
177
  ##
@@ -178,7 +180,7 @@ module LLM
178
180
  # @return [LLM::Object]
179
181
  # Returns a tagged object
180
182
  def local_file(path)
181
- @ses.local_file(path)
183
+ @ctx.local_file(path)
182
184
  end
183
185
 
184
186
  ##
@@ -187,54 +189,53 @@ module LLM
187
189
  # @return [LLM::Object]
188
190
  # Returns a tagged object
189
191
  def remote_file(res)
190
- @ses.remote_file(res)
192
+ @ctx.remote_file(res)
191
193
  end
192
194
 
193
195
  ##
194
196
  # @return [LLM::Tracer]
195
197
  # Returns an LLM tracer
196
198
  def tracer
197
- @ses.tracer
199
+ @ctx.tracer
198
200
  end
199
201
 
200
202
  ##
201
203
  # Returns the model an Agent is actively using
202
204
  # @return [String]
203
205
  def model
204
- @ses.model
206
+ @ctx.model
205
207
  end
206
208
 
207
209
  ##
208
- # @param (see LLM::Session#serialize)
209
- # @return (see LLM::Session#serialize)
210
+ # @param (see LLM::Context#serialize)
211
+ # @return (see LLM::Context#serialize)
210
212
  def serialize(**kw)
211
- @ses.serialize(**kw)
213
+ @ctx.serialize(**kw)
212
214
  end
213
215
  alias_method :save, :serialize
214
216
 
215
217
  ##
216
- # @param (see LLM::Session#deserialize)
217
- # @return (see LLM::Session#deserialize)
218
+ # @param (see LLM::Context#deserialize)
219
+ # @return (see LLM::Context#deserialize)
218
220
  def deserialize(**kw)
219
- @ses.deserialize(**kw)
221
+ @ctx.deserialize(**kw)
220
222
  end
221
223
  alias_method :restore, :deserialize
222
224
 
223
225
  private
224
226
 
225
- def apply_instructions(prompt)
227
+ ##
228
+ # @return [LLM::Prompt]
229
+ def apply_instructions(new_prompt)
226
230
  instr = self.class.instructions
227
- return prompt unless instr
228
- if LLM::Prompt === prompt
229
- messages = prompt.to_a
230
- prompt = LLM::Prompt.new(@provider)
231
- prompt.system instr unless @instructions_applied
232
- messages.each { |msg| prompt.talk(msg.content, role: msg.role) }
233
- prompt
231
+ return new_prompt unless instr
232
+ if LLM::Prompt === new_prompt
233
+ @ctx.messages.empty? ? new_prompt.system(instr) : nil
234
+ new_prompt
234
235
  else
235
236
  prompt do
236
- system instr unless @instructions_applied
237
- user prompt
237
+ @ctx.messages.empty? ? _1.system(instr) : nil
238
+ _1.user(new_prompt)
238
239
  end
239
240
  end
240
241
  end