llm.rb 4.14.0 → 4.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/data/zai.json CHANGED
@@ -8,37 +8,6 @@
8
8
  "name": "Z.AI",
9
9
  "doc": "https://docs.z.ai/guides/overview/pricing",
10
10
  "models": {
11
- "glm-4.7-flash": {
12
- "id": "glm-4.7-flash",
13
- "name": "GLM-4.7-Flash",
14
- "family": "glm-flash",
15
- "attachment": false,
16
- "reasoning": true,
17
- "tool_call": true,
18
- "temperature": true,
19
- "knowledge": "2025-04",
20
- "release_date": "2026-01-19",
21
- "last_updated": "2026-01-19",
22
- "modalities": {
23
- "input": [
24
- "text"
25
- ],
26
- "output": [
27
- "text"
28
- ]
29
- },
30
- "open_weights": true,
31
- "cost": {
32
- "input": 0,
33
- "output": 0,
34
- "cache_read": 0,
35
- "cache_write": 0
36
- },
37
- "limit": {
38
- "context": 200000,
39
- "output": 131072
40
- }
41
- },
42
11
  "glm-5v-turbo": {
43
12
  "id": "glm-5v-turbo",
44
13
  "name": "glm-5v-turbo",
@@ -75,9 +44,9 @@
75
44
  "output": 131072
76
45
  }
77
46
  },
78
- "glm-5-turbo": {
79
- "id": "glm-5-turbo",
80
- "name": "GLM-5-Turbo",
47
+ "glm-4.7": {
48
+ "id": "glm-4.7",
49
+ "name": "GLM-4.7",
81
50
  "family": "glm",
82
51
  "attachment": false,
83
52
  "reasoning": true,
@@ -85,10 +54,10 @@
85
54
  "interleaved": {
86
55
  "field": "reasoning_content"
87
56
  },
88
- "structured_output": true,
89
57
  "temperature": true,
90
- "release_date": "2026-03-16",
91
- "last_updated": "2026-03-16",
58
+ "knowledge": "2025-04",
59
+ "release_date": "2025-12-22",
60
+ "last_updated": "2025-12-22",
92
61
  "modalities": {
93
62
  "input": [
94
63
  "text"
@@ -97,29 +66,31 @@
97
66
  "text"
98
67
  ]
99
68
  },
100
- "open_weights": false,
69
+ "open_weights": true,
101
70
  "cost": {
102
- "input": 1.2,
103
- "output": 4,
104
- "cache_read": 0.24,
71
+ "input": 0.6,
72
+ "output": 2.2,
73
+ "cache_read": 0.11,
105
74
  "cache_write": 0
106
75
  },
107
76
  "limit": {
108
- "context": 200000,
77
+ "context": 204800,
109
78
  "output": 131072
110
79
  }
111
80
  },
112
- "glm-4.5": {
113
- "id": "glm-4.5",
114
- "name": "GLM-4.5",
81
+ "glm-5": {
82
+ "id": "glm-5",
83
+ "name": "GLM-5",
115
84
  "family": "glm",
116
85
  "attachment": false,
117
86
  "reasoning": true,
118
87
  "tool_call": true,
88
+ "interleaved": {
89
+ "field": "reasoning_content"
90
+ },
119
91
  "temperature": true,
120
- "knowledge": "2025-04",
121
- "release_date": "2025-07-28",
122
- "last_updated": "2025-07-28",
92
+ "release_date": "2026-02-11",
93
+ "last_updated": "2026-02-11",
123
94
  "modalities": {
124
95
  "input": [
125
96
  "text"
@@ -130,14 +101,14 @@
130
101
  },
131
102
  "open_weights": true,
132
103
  "cost": {
133
- "input": 0.6,
134
- "output": 2.2,
135
- "cache_read": 0.11,
104
+ "input": 1,
105
+ "output": 3.2,
106
+ "cache_read": 0.2,
136
107
  "cache_write": 0
137
108
  },
138
109
  "limit": {
139
- "context": 131072,
140
- "output": 98304
110
+ "context": 204800,
111
+ "output": 131072
141
112
  }
142
113
  },
143
114
  "glm-4.7-flashx": {
@@ -171,17 +142,20 @@
171
142
  "output": 131072
172
143
  }
173
144
  },
174
- "glm-4.6": {
175
- "id": "glm-4.6",
176
- "name": "GLM-4.6",
145
+ "glm-5.1": {
146
+ "id": "glm-5.1",
147
+ "name": "GLM-5.1",
177
148
  "family": "glm",
178
149
  "attachment": false,
179
150
  "reasoning": true,
180
151
  "tool_call": true,
152
+ "interleaved": {
153
+ "field": "reasoning_content"
154
+ },
155
+ "structured_output": true,
181
156
  "temperature": true,
182
- "knowledge": "2025-04",
183
- "release_date": "2025-09-30",
184
- "last_updated": "2025-09-30",
157
+ "release_date": "2026-03-27",
158
+ "last_updated": "2026-03-27",
185
159
  "modalities": {
186
160
  "input": [
187
161
  "text"
@@ -190,34 +164,32 @@
190
164
  "text"
191
165
  ]
192
166
  },
193
- "open_weights": true,
167
+ "open_weights": false,
194
168
  "cost": {
195
- "input": 0.6,
196
- "output": 2.2,
197
- "cache_read": 0.11,
169
+ "input": 1.4,
170
+ "output": 4.4,
171
+ "cache_read": 0.26,
198
172
  "cache_write": 0
199
173
  },
200
174
  "limit": {
201
- "context": 204800,
175
+ "context": 200000,
202
176
  "output": 131072
203
177
  }
204
178
  },
205
- "glm-4.6v": {
206
- "id": "glm-4.6v",
207
- "name": "GLM-4.6V",
179
+ "glm-4.5": {
180
+ "id": "glm-4.5",
181
+ "name": "GLM-4.5",
208
182
  "family": "glm",
209
- "attachment": true,
183
+ "attachment": false,
210
184
  "reasoning": true,
211
185
  "tool_call": true,
212
186
  "temperature": true,
213
187
  "knowledge": "2025-04",
214
- "release_date": "2025-12-08",
215
- "last_updated": "2025-12-08",
188
+ "release_date": "2025-07-28",
189
+ "last_updated": "2025-07-28",
216
190
  "modalities": {
217
191
  "input": [
218
- "text",
219
- "image",
220
- "video"
192
+ "text"
221
193
  ],
222
194
  "output": [
223
195
  "text"
@@ -225,18 +197,20 @@
225
197
  },
226
198
  "open_weights": true,
227
199
  "cost": {
228
- "input": 0.3,
229
- "output": 0.9
200
+ "input": 0.6,
201
+ "output": 2.2,
202
+ "cache_read": 0.11,
203
+ "cache_write": 0
230
204
  },
231
205
  "limit": {
232
- "context": 128000,
233
- "output": 32768
206
+ "context": 131072,
207
+ "output": 98304
234
208
  }
235
209
  },
236
- "glm-4.5-flash": {
237
- "id": "glm-4.5-flash",
238
- "name": "GLM-4.5-Flash",
239
- "family": "glm-flash",
210
+ "glm-4.5-air": {
211
+ "id": "glm-4.5-air",
212
+ "name": "GLM-4.5-Air",
213
+ "family": "glm-air",
240
214
  "attachment": false,
241
215
  "reasoning": true,
242
216
  "tool_call": true,
@@ -254,9 +228,9 @@
254
228
  },
255
229
  "open_weights": true,
256
230
  "cost": {
257
- "input": 0,
258
- "output": 0,
259
- "cache_read": 0,
231
+ "input": 0.2,
232
+ "output": 1.1,
233
+ "cache_read": 0.03,
260
234
  "cache_write": 0
261
235
  },
262
236
  "limit": {
@@ -264,9 +238,9 @@
264
238
  "output": 98304
265
239
  }
266
240
  },
267
- "glm-5": {
268
- "id": "glm-5",
269
- "name": "GLM-5",
241
+ "glm-5-turbo": {
242
+ "id": "glm-5-turbo",
243
+ "name": "GLM-5-Turbo",
270
244
  "family": "glm",
271
245
  "attachment": false,
272
246
  "reasoning": true,
@@ -274,9 +248,10 @@
274
248
  "interleaved": {
275
249
  "field": "reasoning_content"
276
250
  },
251
+ "structured_output": true,
277
252
  "temperature": true,
278
- "release_date": "2026-02-11",
279
- "last_updated": "2026-02-11",
253
+ "release_date": "2026-03-16",
254
+ "last_updated": "2026-03-16",
280
255
  "modalities": {
281
256
  "input": [
282
257
  "text"
@@ -285,29 +260,60 @@
285
260
  "text"
286
261
  ]
287
262
  },
288
- "open_weights": true,
263
+ "open_weights": false,
289
264
  "cost": {
290
- "input": 1,
291
- "output": 3.2,
292
- "cache_read": 0.2,
265
+ "input": 1.2,
266
+ "output": 4,
267
+ "cache_read": 0.24,
293
268
  "cache_write": 0
294
269
  },
295
270
  "limit": {
296
- "context": 204800,
271
+ "context": 200000,
297
272
  "output": 131072
298
273
  }
299
274
  },
300
- "glm-4.5-air": {
301
- "id": "glm-4.5-air",
302
- "name": "GLM-4.5-Air",
303
- "family": "glm-air",
275
+ "glm-4.5v": {
276
+ "id": "glm-4.5v",
277
+ "name": "GLM-4.5V",
278
+ "family": "glm",
279
+ "attachment": true,
280
+ "reasoning": true,
281
+ "tool_call": true,
282
+ "temperature": true,
283
+ "knowledge": "2025-04",
284
+ "release_date": "2025-08-11",
285
+ "last_updated": "2025-08-11",
286
+ "modalities": {
287
+ "input": [
288
+ "text",
289
+ "image",
290
+ "video"
291
+ ],
292
+ "output": [
293
+ "text"
294
+ ]
295
+ },
296
+ "open_weights": true,
297
+ "cost": {
298
+ "input": 0.6,
299
+ "output": 1.8
300
+ },
301
+ "limit": {
302
+ "context": 64000,
303
+ "output": 16384
304
+ }
305
+ },
306
+ "glm-4.6": {
307
+ "id": "glm-4.6",
308
+ "name": "GLM-4.6",
309
+ "family": "glm",
304
310
  "attachment": false,
305
311
  "reasoning": true,
306
312
  "tool_call": true,
307
313
  "temperature": true,
308
314
  "knowledge": "2025-04",
309
- "release_date": "2025-07-28",
310
- "last_updated": "2025-07-28",
315
+ "release_date": "2025-09-30",
316
+ "last_updated": "2025-09-30",
311
317
  "modalities": {
312
318
  "input": [
313
319
  "text"
@@ -318,27 +324,27 @@
318
324
  },
319
325
  "open_weights": true,
320
326
  "cost": {
321
- "input": 0.2,
322
- "output": 1.1,
323
- "cache_read": 0.03,
327
+ "input": 0.6,
328
+ "output": 2.2,
329
+ "cache_read": 0.11,
324
330
  "cache_write": 0
325
331
  },
326
332
  "limit": {
327
- "context": 131072,
328
- "output": 98304
333
+ "context": 204800,
334
+ "output": 131072
329
335
  }
330
336
  },
331
- "glm-4.5v": {
332
- "id": "glm-4.5v",
333
- "name": "GLM-4.5V",
337
+ "glm-4.6v": {
338
+ "id": "glm-4.6v",
339
+ "name": "GLM-4.6V",
334
340
  "family": "glm",
335
341
  "attachment": true,
336
342
  "reasoning": true,
337
343
  "tool_call": true,
338
344
  "temperature": true,
339
345
  "knowledge": "2025-04",
340
- "release_date": "2025-08-11",
341
- "last_updated": "2025-08-11",
346
+ "release_date": "2025-12-08",
347
+ "last_updated": "2025-12-08",
342
348
  "modalities": {
343
349
  "input": [
344
350
  "text",
@@ -351,28 +357,56 @@
351
357
  },
352
358
  "open_weights": true,
353
359
  "cost": {
354
- "input": 0.6,
355
- "output": 1.8
360
+ "input": 0.3,
361
+ "output": 0.9
356
362
  },
357
363
  "limit": {
358
- "context": 64000,
359
- "output": 16384
364
+ "context": 128000,
365
+ "output": 32768
360
366
  }
361
367
  },
362
- "glm-4.7": {
363
- "id": "glm-4.7",
364
- "name": "GLM-4.7",
365
- "family": "glm",
368
+ "glm-4.5-flash": {
369
+ "id": "glm-4.5-flash",
370
+ "name": "GLM-4.5-Flash",
371
+ "family": "glm-flash",
366
372
  "attachment": false,
367
373
  "reasoning": true,
368
374
  "tool_call": true,
369
- "interleaved": {
370
- "field": "reasoning_content"
375
+ "temperature": true,
376
+ "knowledge": "2025-04",
377
+ "release_date": "2025-07-28",
378
+ "last_updated": "2025-07-28",
379
+ "modalities": {
380
+ "input": [
381
+ "text"
382
+ ],
383
+ "output": [
384
+ "text"
385
+ ]
371
386
  },
387
+ "open_weights": true,
388
+ "cost": {
389
+ "input": 0,
390
+ "output": 0,
391
+ "cache_read": 0,
392
+ "cache_write": 0
393
+ },
394
+ "limit": {
395
+ "context": 131072,
396
+ "output": 98304
397
+ }
398
+ },
399
+ "glm-4.7-flash": {
400
+ "id": "glm-4.7-flash",
401
+ "name": "GLM-4.7-Flash",
402
+ "family": "glm-flash",
403
+ "attachment": false,
404
+ "reasoning": true,
405
+ "tool_call": true,
372
406
  "temperature": true,
373
407
  "knowledge": "2025-04",
374
- "release_date": "2025-12-22",
375
- "last_updated": "2025-12-22",
408
+ "release_date": "2026-01-19",
409
+ "last_updated": "2026-01-19",
376
410
  "modalities": {
377
411
  "input": [
378
412
  "text"
@@ -383,13 +417,13 @@
383
417
  },
384
418
  "open_weights": true,
385
419
  "cost": {
386
- "input": 0.6,
387
- "output": 2.2,
388
- "cache_read": 0.11,
420
+ "input": 0,
421
+ "output": 0,
422
+ "cache_read": 0,
389
423
  "cache_write": 0
390
424
  },
391
425
  "limit": {
392
- "context": 204800,
426
+ "context": 200000,
393
427
  "output": 131072
394
428
  }
395
429
  }
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM::ActiveRecord
4
+ ##
5
+ # ActiveRecord integration for persisting {LLM::Context LLM::Context} state.
6
+ #
7
+ # This wrapper maps model columns onto provider selection, model selection,
8
+ # usage accounting, and serialized context data while leaving application-
9
+ # specific concerns such as credentials, associations, and UI shaping to
10
+ # the host app.
11
+ #
12
+ # Context state can be stored as a JSON string (`format: :string`, the
13
+ # default) or as a structured object (`format: :json` / `:jsonb`) for
14
+ # databases such as PostgreSQL that can persist JSON natively.
15
+ # `:json` and `:jsonb` expect a real JSON column type with ActiveRecord
16
+ # handling JSON typecasting for the model.
17
+ module ActsAsLLM
18
+ EMPTY_HASH = {}.freeze
19
+ DEFAULT_USAGE_COLUMNS = {
20
+ input_tokens: :input_tokens,
21
+ output_tokens: :output_tokens,
22
+ total_tokens: :total_tokens
23
+ }.freeze
24
+ DEFAULTS = {
25
+ provider_column: :provider,
26
+ model_column: :model,
27
+ data_column: :data,
28
+ format: :string,
29
+ usage_columns: DEFAULT_USAGE_COLUMNS,
30
+ provider: EMPTY_HASH,
31
+ context: EMPTY_HASH
32
+ }.freeze
33
+
34
+ module Hooks
35
+ ##
36
+ # Called when hooks are extended onto an ActiveRecord model.
37
+ #
38
+ # @param [Class] model
39
+ # @return [void]
40
+ def self.extended(model)
41
+ options = model.llm_plugin_options
42
+ model.validates options[:provider_column], options[:model_column], presence: true
43
+ model.include InstanceMethods unless model.ancestors.include?(InstanceMethods)
44
+ end
45
+ end
46
+
47
+ ##
48
+ # Installs the `acts_as_llm` wrapper on an ActiveRecord model.
49
+ #
50
+ # @param [Hash] options
51
+ # @option options [Symbol] :format
52
+ # Storage format for the serialized context. Use `:string` for text
53
+ # columns, or `:json` / `:jsonb` for structured JSON columns with
54
+ # ActiveRecord JSON typecasting enabled.
55
+ # @return [void]
56
+ def acts_as_llm(options = EMPTY_HASH)
57
+ options = DEFAULTS.merge(options)
58
+ usage_columns = DEFAULT_USAGE_COLUMNS.merge(options[:usage_columns] || EMPTY_HASH)
59
+ class_attribute :llm_plugin_options, instance_accessor: false, default: DEFAULTS unless respond_to?(:llm_plugin_options)
60
+ self.llm_plugin_options = options.merge(usage_columns: usage_columns.freeze).freeze
61
+ extend Hooks
62
+ end
63
+
64
+ module InstanceMethods
65
+ ##
66
+ # Continues the stored context with new input and flushes it.
67
+ # @see LLM::Context#talk
68
+ # @return [LLM::Response]
69
+ def talk(...)
70
+ ctx.talk(...).tap { flush }
71
+ end
72
+
73
+ ##
74
+ # Continues the stored context through the Responses API and flushes it.
75
+ # @see LLM::Context#respond
76
+ # @return [LLM::Response]
77
+ def respond(...)
78
+ ctx.respond(...).tap { flush }
79
+ end
80
+
81
+ ##
82
+ # Waits for queued tool work to finish.
83
+ # @see LLM::Context#wait
84
+ # @return [Array<LLM::Function::Return>]
85
+ def wait(...)
86
+ ctx.wait(...)
87
+ end
88
+
89
+ ##
90
+ # Calls into the stored context.
91
+ # @see LLM::Context#call
92
+ # @return [Object]
93
+ def call(...)
94
+ ctx.call(...)
95
+ end
96
+
97
+ ##
98
+ # @see LLM::Context#messages
99
+ # @return [Array<LLM::Message>]
100
+ def messages
101
+ ctx.messages
102
+ end
103
+
104
+ ##
105
+ # @note The bang keeps the ActiveRecord and Sequel wrappers aligned.
106
+ # @see LLM::Context#model
107
+ # @return [String]
108
+ def model!
109
+ ctx.model
110
+ end
111
+
112
+ ##
113
+ # @see LLM::Context#functions
114
+ # @return [Array<LLM::Function>]
115
+ def functions
116
+ ctx.functions
117
+ end
118
+
119
+ ##
120
+ # @see LLM::Context#cost
121
+ # @return [LLM::Cost]
122
+ def cost
123
+ ctx.cost
124
+ end
125
+
126
+ ##
127
+ # @see LLM::Context#context_window
128
+ # @return [Integer]
129
+ def context_window
130
+ ctx.context_window
131
+ rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
132
+ 0
133
+ end
134
+
135
+ ##
136
+ # Returns usage from the mapped usage columns.
137
+ # @return [LLM::Object]
138
+ def usage
139
+ LLM::Object.from(
140
+ input_tokens: self[columns[:input_tokens]] || 0,
141
+ output_tokens: self[columns[:output_tokens]] || 0,
142
+ total_tokens: self[columns[:total_tokens]] || 0
143
+ )
144
+ end
145
+
146
+ private
147
+
148
+ ##
149
+ # Returns the resolved provider instance for this record.
150
+ # @return [LLM::Provider]
151
+ def llm
152
+ options = self.class.llm_plugin_options
153
+ provider = self[columns[:provider_column]]
154
+ kwargs = resolve_options(options[:provider])
155
+ @llm ||= LLM.method(provider).call(**kwargs)
156
+ end
157
+
158
+ ##
159
+ # @return [LLM::Context]
160
+ def ctx
161
+ @ctx ||= begin
162
+ options = self.class.llm_plugin_options
163
+ params = resolve_options(options[:context]).dup
164
+ params[:model] ||= self[columns[:model_column]]
165
+ ctx = LLM::Context.new(llm, params.compact)
166
+ data = self[columns[:data_column]]
167
+ if data.nil? || data == ""
168
+ ctx
169
+ else
170
+ string = case options[:format]
171
+ when :string then data
172
+ when :json, :jsonb then LLM.json.dump(data)
173
+ else raise ArgumentError, "Unknown format: #{options[:format].inspect}"
174
+ end
175
+ ctx.restore(string:)
176
+ end
177
+ end
178
+ end
179
+
180
+ ##
181
+ # @return [void]
182
+ def flush
183
+ attrs = {
184
+ columns[:data_column] => serialize_context(self.class.llm_plugin_options[:format]),
185
+ columns[:input_tokens] => ctx.usage.input_tokens,
186
+ columns[:output_tokens] => ctx.usage.output_tokens,
187
+ columns[:total_tokens] => ctx.usage.total_tokens
188
+ }
189
+ assign_attributes(attrs)
190
+ save!
191
+ end
192
+
193
+ ##
194
+ # @return [Hash]
195
+ def resolve_option(option)
196
+ case option
197
+ when Proc then instance_exec(&option)
198
+ when Hash then option.dup
199
+ else option
200
+ end
201
+ end
202
+
203
+ ##
204
+ # @return [Hash]
205
+ def resolve_options(option)
206
+ case option
207
+ when Proc, Hash then resolve_option(option)
208
+ else EMPTY_HASH.dup
209
+ end
210
+ end
211
+
212
+ def serialize_context(format)
213
+ case format
214
+ when :string then ctx.to_json
215
+ when :json, :jsonb then ctx.to_h
216
+ else raise ArgumentError, "Unknown format: #{format.inspect}"
217
+ end
218
+ end
219
+
220
+ def columns
221
+ @columns ||= begin
222
+ options = self.class.llm_plugin_options
223
+ usage_columns = options[:usage_columns]
224
+ {
225
+ provider_column: options[:provider_column],
226
+ model_column: options[:model_column],
227
+ data_column: options[:data_column],
228
+ input_tokens: usage_columns[:input_tokens],
229
+ output_tokens: usage_columns[:output_tokens],
230
+ total_tokens: usage_columns[:total_tokens]
231
+ }.freeze
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end
237
+
238
+ ::ActiveRecord::Base.extend(LLM::ActiveRecord::ActsAsLLM)