llm.rb 5.1.0 → 5.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +33 -12
- data/data/deepseek.json +68 -0
- data/data/google.json +26 -26
- data/data/openai.json +55 -0
- data/lib/llm/context.rb +6 -2
- data/lib/llm/mcp.rb +15 -0
- data/lib/llm/message.rb +14 -5
- data/lib/llm/providers/deepseek/request_adapter/completion.rb +30 -7
- data/lib/llm/providers/deepseek.rb +3 -3
- data/lib/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8f9bdef0c733225e44dcf39d75e3397974122bfeb5e705a0797067242fd5c966
|
|
4
|
+
data.tar.gz: 567cc793e1e095e481abf5ef797a6fcb26a04faeed91855c234e531b78e3544a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3d7e026b308228787d2f6ead8de197f847b644f7ba5bd0a1d679270a66e5c0e48c74a7d9494a86613bd061a42c2fd56ff270f73f8f3a627bc213dd7d57de788d
|
|
7
|
+
data.tar.gz: 8586a02d0345e7259f80b32e688a0ad531e28e3d15c8519781647ee1f77f23b6ecdaa9b28ed40efdea138c138511c7c4f88986ed7d646fb562e9faf7e2db687f
|
data/CHANGELOG.md
CHANGED
|
@@ -2,8 +2,68 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
Changes since `v5.2.1`.
|
|
6
|
+
|
|
7
|
+
## v5.2.1
|
|
8
|
+
|
|
9
|
+
Changes since `v5.2.0`.
|
|
10
|
+
|
|
11
|
+
This release tightens the streamed queue fix from `v5.2.0` for concurrent
|
|
12
|
+
workloads. Request-local streams now stay bound long enough for `wait` to
|
|
13
|
+
drain queued work and then clear cleanly so later waits fall back to the
|
|
14
|
+
context's configured stream.
|
|
15
|
+
|
|
16
|
+
### Fix
|
|
17
|
+
|
|
18
|
+
* **Reset request-local streams after `wait` drains queued work** <br>
|
|
19
|
+
Keep per-call `stream:` bindings alive through `LLM::Context#wait` so
|
|
20
|
+
queued streamed tool work still resolves correctly, then clear the
|
|
21
|
+
request-local stream after the wait completes to avoid leaking it into
|
|
22
|
+
later turns.
|
|
23
|
+
|
|
24
|
+
## v5.2.0
|
|
25
|
+
|
|
5
26
|
Changes since `v5.1.0`.
|
|
6
27
|
|
|
28
|
+
This release adds current DeepSeek V4 support through refreshed provider
|
|
29
|
+
metadata, including `deepseek-v4-flash` and `deepseek-v4-pro`, while fixing
|
|
30
|
+
request-local queue handling for concurrent streamed workloads so `wait` and
|
|
31
|
+
interruption use the active per-call stream correctly.
|
|
32
|
+
|
|
33
|
+
### Change
|
|
34
|
+
|
|
35
|
+
* **Add `LLM::MCP#run` for scoped MCP client lifecycle** <br>
|
|
36
|
+
Add `LLM::MCP#run` so MCP clients can be started for the duration of a
|
|
37
|
+
block and then stopped automatically, which simplifies the usual
|
|
38
|
+
`start`/`stop` pattern in examples and application code.
|
|
39
|
+
|
|
40
|
+
* **Refresh provider model metadata** <br>
|
|
41
|
+
Add current DeepSeek and OpenAI model metadata to `data/` and update the
|
|
42
|
+
Google Gemma model entry to match the current provider naming.
|
|
43
|
+
|
|
44
|
+
### Fix
|
|
45
|
+
|
|
46
|
+
* **Reject unsupported DeepSeek multimodal prompt objects early** <br>
|
|
47
|
+
Raise `LLM::PromptError` for `image_url`, `local_file`, and
|
|
48
|
+
`remote_file` in DeepSeek chat requests instead of sending invalid
|
|
49
|
+
OpenAI-compatible payloads that the provider rejects at runtime.
|
|
50
|
+
|
|
51
|
+
* **Preserve DeepSeek reasoning content across tool turns** <br>
|
|
52
|
+
Replay `reasoning_content` when serializing prior assistant messages for
|
|
53
|
+
DeepSeek chat completions, so thinking-mode tool calls can continue into
|
|
54
|
+
follow-up requests without triggering invalid request errors.
|
|
55
|
+
|
|
56
|
+
* **Default DeepSeek to `deepseek-v4-flash`** <br>
|
|
57
|
+
Change `LLM::DeepSeek#default_model` to `deepseek-v4-flash` so new
|
|
58
|
+
contexts and default provider usage align with the current preferred chat
|
|
59
|
+
model.
|
|
60
|
+
|
|
61
|
+
* **Use per-call streams when waiting on streamed tool work** <br>
|
|
62
|
+
Track request-local streams bound through `talk(..., stream:)` and
|
|
63
|
+
`respond(..., stream:)` so `LLM::Context#wait` and interruption-aware
|
|
64
|
+
queue handling use the active stream instead of falling back to pending
|
|
65
|
+
function spawning.
|
|
66
|
+
|
|
7
67
|
## v5.1.0
|
|
8
68
|
|
|
9
69
|
Changes since `v5.0.0`.
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-5.1
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-5.2.1-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
@@ -261,13 +261,17 @@ Remote MCP tools and prompts are not bolted on as a separate integration
|
|
|
261
261
|
stack. They adapt into the same tool and prompt path used by local tools,
|
|
262
262
|
skills, contexts, and agents.
|
|
263
263
|
|
|
264
|
+
Use `mcp.run do ... end` for scoped work where the client should start and
|
|
265
|
+
stop around one block. Use `mcp.start` and `mcp.stop` directly when you need
|
|
266
|
+
finer sequential control across several steps before shutting the client down.
|
|
267
|
+
|
|
264
268
|
```ruby
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
269
|
+
mcp = LLM::MCP.http(
|
|
270
|
+
url: "https://api.githubcopilot.com/mcp/",
|
|
271
|
+
headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
|
|
272
|
+
).persistent
|
|
273
|
+
mcp.run do
|
|
268
274
|
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
269
|
-
ensure
|
|
270
|
-
mcp.stop
|
|
271
275
|
end
|
|
272
276
|
```
|
|
273
277
|
|
|
@@ -281,12 +285,17 @@ Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
|
|
|
281
285
|
twist.
|
|
282
286
|
|
|
283
287
|
```ruby
|
|
288
|
+
require "llm"
|
|
289
|
+
require "io/console"
|
|
290
|
+
|
|
291
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
284
292
|
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
285
293
|
worker = Thread.new do
|
|
286
294
|
ctx.talk("Write a very long essay about network protocols.")
|
|
287
295
|
rescue LLM::Interrupt
|
|
288
296
|
puts "Request was interrupted!"
|
|
289
297
|
end
|
|
298
|
+
|
|
290
299
|
STDIN.getch
|
|
291
300
|
ctx.interrupt!
|
|
292
301
|
worker.join
|
|
@@ -615,9 +624,10 @@ require "io/console"
|
|
|
615
624
|
|
|
616
625
|
llm = LLM.openai(key: ENV["KEY"])
|
|
617
626
|
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
618
|
-
|
|
619
627
|
worker = Thread.new do
|
|
620
628
|
ctx.talk("Write a very long essay about network protocols.")
|
|
629
|
+
rescue LLM::Interrupt
|
|
630
|
+
puts "Request was interrupted!"
|
|
621
631
|
end
|
|
622
632
|
|
|
623
633
|
STDIN.getch
|
|
@@ -695,7 +705,7 @@ puts ticket.talk("How do I rotate my API key?").content
|
|
|
695
705
|
|
|
696
706
|
#### MCP
|
|
697
707
|
|
|
698
|
-
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
708
|
+
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. It expects a GitHub token in `ENV["GITHUB_PAT"]`. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
699
709
|
|
|
700
710
|
```ruby
|
|
701
711
|
require "llm"
|
|
@@ -707,13 +717,24 @@ mcp = LLM::MCP.http(
|
|
|
707
717
|
headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
|
|
708
718
|
).persistent
|
|
709
719
|
|
|
710
|
-
|
|
711
|
-
|
|
720
|
+
mcp.start
|
|
721
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
722
|
+
ctx.talk("Pull information about my GitHub account.")
|
|
723
|
+
ctx.talk(ctx.call(:functions)) while ctx.functions.any?
|
|
724
|
+
mcp.stop
|
|
725
|
+
```
|
|
726
|
+
|
|
727
|
+
For scoped work, `mcp.run do ... end` is shorter and handles cleanup for you:
|
|
728
|
+
|
|
729
|
+
```ruby
|
|
730
|
+
mcp = LLM::MCP.http(
|
|
731
|
+
url: "https://api.githubcopilot.com/mcp/",
|
|
732
|
+
headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
|
|
733
|
+
).persistent
|
|
734
|
+
mcp.run do
|
|
712
735
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
713
736
|
ctx.talk("Pull information about my GitHub account.")
|
|
714
737
|
ctx.talk(ctx.call(:functions)) while ctx.functions.any?
|
|
715
|
-
ensure
|
|
716
|
-
mcp.stop
|
|
717
738
|
end
|
|
718
739
|
```
|
|
719
740
|
|
data/data/deepseek.json
CHANGED
|
@@ -70,6 +70,74 @@
|
|
|
70
70
|
"context": 128000,
|
|
71
71
|
"output": 64000
|
|
72
72
|
}
|
|
73
|
+
},
|
|
74
|
+
"deepseek-v4-flash": {
|
|
75
|
+
"id": "deepseek-v4-flash",
|
|
76
|
+
"name": "DeepSeek V4 Flash",
|
|
77
|
+
"family": "deepseek-flash",
|
|
78
|
+
"attachment": false,
|
|
79
|
+
"reasoning": true,
|
|
80
|
+
"tool_call": true,
|
|
81
|
+
"interleaved": {
|
|
82
|
+
"field": "reasoning_content"
|
|
83
|
+
},
|
|
84
|
+
"structured_output": true,
|
|
85
|
+
"temperature": true,
|
|
86
|
+
"knowledge": "2025-05",
|
|
87
|
+
"release_date": "2026-04-24",
|
|
88
|
+
"last_updated": "2026-04-24",
|
|
89
|
+
"modalities": {
|
|
90
|
+
"input": [
|
|
91
|
+
"text"
|
|
92
|
+
],
|
|
93
|
+
"output": [
|
|
94
|
+
"text"
|
|
95
|
+
]
|
|
96
|
+
},
|
|
97
|
+
"open_weights": true,
|
|
98
|
+
"cost": {
|
|
99
|
+
"input": 0.14,
|
|
100
|
+
"output": 0.28,
|
|
101
|
+
"cache_read": 0.028
|
|
102
|
+
},
|
|
103
|
+
"limit": {
|
|
104
|
+
"context": 1000000,
|
|
105
|
+
"output": 384000
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
"deepseek-v4-pro": {
|
|
109
|
+
"id": "deepseek-v4-pro",
|
|
110
|
+
"name": "DeepSeek V4 Pro",
|
|
111
|
+
"family": "deepseek-thinking",
|
|
112
|
+
"attachment": false,
|
|
113
|
+
"reasoning": true,
|
|
114
|
+
"tool_call": true,
|
|
115
|
+
"interleaved": {
|
|
116
|
+
"field": "reasoning_content"
|
|
117
|
+
},
|
|
118
|
+
"structured_output": true,
|
|
119
|
+
"temperature": true,
|
|
120
|
+
"knowledge": "2025-05",
|
|
121
|
+
"release_date": "2026-04-24",
|
|
122
|
+
"last_updated": "2026-04-24",
|
|
123
|
+
"modalities": {
|
|
124
|
+
"input": [
|
|
125
|
+
"text"
|
|
126
|
+
],
|
|
127
|
+
"output": [
|
|
128
|
+
"text"
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
"open_weights": true,
|
|
132
|
+
"cost": {
|
|
133
|
+
"input": 1.74,
|
|
134
|
+
"output": 3.48,
|
|
135
|
+
"cache_read": 0.145
|
|
136
|
+
},
|
|
137
|
+
"limit": {
|
|
138
|
+
"context": 1000000,
|
|
139
|
+
"output": 384000
|
|
140
|
+
}
|
|
73
141
|
}
|
|
74
142
|
}
|
|
75
143
|
}
|
data/data/google.json
CHANGED
|
@@ -1058,6 +1058,32 @@
|
|
|
1058
1058
|
"output": 8192
|
|
1059
1059
|
}
|
|
1060
1060
|
},
|
|
1061
|
+
"gemma-4-26b-a4b-it": {
|
|
1062
|
+
"id": "gemma-4-26b-a4b-it",
|
|
1063
|
+
"name": "Gemma 4 26B",
|
|
1064
|
+
"family": "gemma",
|
|
1065
|
+
"attachment": false,
|
|
1066
|
+
"reasoning": true,
|
|
1067
|
+
"tool_call": true,
|
|
1068
|
+
"structured_output": true,
|
|
1069
|
+
"temperature": true,
|
|
1070
|
+
"release_date": "2026-04-02",
|
|
1071
|
+
"last_updated": "2026-04-02",
|
|
1072
|
+
"modalities": {
|
|
1073
|
+
"input": [
|
|
1074
|
+
"text",
|
|
1075
|
+
"image"
|
|
1076
|
+
],
|
|
1077
|
+
"output": [
|
|
1078
|
+
"text"
|
|
1079
|
+
]
|
|
1080
|
+
},
|
|
1081
|
+
"open_weights": true,
|
|
1082
|
+
"limit": {
|
|
1083
|
+
"context": 256000,
|
|
1084
|
+
"output": 8192
|
|
1085
|
+
}
|
|
1086
|
+
},
|
|
1061
1087
|
"gemini-2.5-flash-lite": {
|
|
1062
1088
|
"id": "gemini-2.5-flash-lite",
|
|
1063
1089
|
"name": "Gemini 2.5 Flash Lite",
|
|
@@ -1093,32 +1119,6 @@
|
|
|
1093
1119
|
"output": 65536
|
|
1094
1120
|
}
|
|
1095
1121
|
},
|
|
1096
|
-
"gemma-4-26b-it": {
|
|
1097
|
-
"id": "gemma-4-26b-it",
|
|
1098
|
-
"name": "Gemma 4 26B",
|
|
1099
|
-
"family": "gemma",
|
|
1100
|
-
"attachment": false,
|
|
1101
|
-
"reasoning": true,
|
|
1102
|
-
"tool_call": true,
|
|
1103
|
-
"structured_output": true,
|
|
1104
|
-
"temperature": true,
|
|
1105
|
-
"release_date": "2026-04-02",
|
|
1106
|
-
"last_updated": "2026-04-02",
|
|
1107
|
-
"modalities": {
|
|
1108
|
-
"input": [
|
|
1109
|
-
"text",
|
|
1110
|
-
"image"
|
|
1111
|
-
],
|
|
1112
|
-
"output": [
|
|
1113
|
-
"text"
|
|
1114
|
-
]
|
|
1115
|
-
},
|
|
1116
|
-
"open_weights": true,
|
|
1117
|
-
"limit": {
|
|
1118
|
-
"context": 256000,
|
|
1119
|
-
"output": 8192
|
|
1120
|
-
}
|
|
1121
|
-
},
|
|
1122
1122
|
"gemini-2.5-flash-image-preview": {
|
|
1123
1123
|
"id": "gemini-2.5-flash-image-preview",
|
|
1124
1124
|
"name": "Gemini 2.5 Flash Image (Preview)",
|
data/data/openai.json
CHANGED
|
@@ -195,6 +195,61 @@
|
|
|
195
195
|
"output": 16384
|
|
196
196
|
}
|
|
197
197
|
},
|
|
198
|
+
"gpt-5.5": {
|
|
199
|
+
"id": "gpt-5.5",
|
|
200
|
+
"name": "GPT-5.5",
|
|
201
|
+
"family": "gpt",
|
|
202
|
+
"attachment": true,
|
|
203
|
+
"reasoning": true,
|
|
204
|
+
"tool_call": true,
|
|
205
|
+
"structured_output": true,
|
|
206
|
+
"temperature": false,
|
|
207
|
+
"knowledge": "2025-12-01",
|
|
208
|
+
"release_date": "2026-04-23",
|
|
209
|
+
"last_updated": "2026-04-23",
|
|
210
|
+
"modalities": {
|
|
211
|
+
"input": [
|
|
212
|
+
"text",
|
|
213
|
+
"image",
|
|
214
|
+
"pdf"
|
|
215
|
+
],
|
|
216
|
+
"output": [
|
|
217
|
+
"text"
|
|
218
|
+
]
|
|
219
|
+
},
|
|
220
|
+
"open_weights": false,
|
|
221
|
+
"cost": {
|
|
222
|
+
"input": 5,
|
|
223
|
+
"output": 30,
|
|
224
|
+
"cache_read": 0.5,
|
|
225
|
+
"context_over_200k": {
|
|
226
|
+
"input": 10,
|
|
227
|
+
"output": 45,
|
|
228
|
+
"cache_read": 1
|
|
229
|
+
}
|
|
230
|
+
},
|
|
231
|
+
"limit": {
|
|
232
|
+
"context": 1050000,
|
|
233
|
+
"input": 920000,
|
|
234
|
+
"output": 130000
|
|
235
|
+
},
|
|
236
|
+
"experimental": {
|
|
237
|
+
"modes": {
|
|
238
|
+
"fast": {
|
|
239
|
+
"cost": {
|
|
240
|
+
"input": 12.5,
|
|
241
|
+
"output": 75,
|
|
242
|
+
"cache_read": 1.25
|
|
243
|
+
},
|
|
244
|
+
"provider": {
|
|
245
|
+
"body": {
|
|
246
|
+
"service_tier": "priority"
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
},
|
|
198
253
|
"gpt-5-mini": {
|
|
199
254
|
"id": "gpt-5-mini",
|
|
200
255
|
"name": "GPT-5 Mini",
|
data/lib/llm/context.rb
CHANGED
|
@@ -295,7 +295,6 @@ module LLM
|
|
|
295
295
|
# ractor work, in that order.
|
|
296
296
|
# @return [Array<LLM::Function::Return>]
|
|
297
297
|
def wait(strategy)
|
|
298
|
-
stream = @params[:stream]
|
|
299
298
|
if LLM::Stream === stream && !stream.queue.empty?
|
|
300
299
|
@queue = stream.queue
|
|
301
300
|
@queue.wait(strategy)
|
|
@@ -306,6 +305,7 @@ module LLM
|
|
|
306
305
|
end
|
|
307
306
|
ensure
|
|
308
307
|
@queue = nil
|
|
308
|
+
@stream = nil
|
|
309
309
|
end
|
|
310
310
|
|
|
311
311
|
##
|
|
@@ -461,6 +461,7 @@ module LLM
|
|
|
461
461
|
|
|
462
462
|
def bind!(stream, model, tools)
|
|
463
463
|
return unless LLM::Stream === stream
|
|
464
|
+
@stream = stream
|
|
464
465
|
stream.extra[:ctx] = self
|
|
465
466
|
stream.extra[:tracer] = tracer
|
|
466
467
|
stream.extra[:model] = model
|
|
@@ -469,10 +470,13 @@ module LLM
|
|
|
469
470
|
|
|
470
471
|
def queue
|
|
471
472
|
return @queue if @queue
|
|
472
|
-
stream = @params[:stream]
|
|
473
473
|
stream.queue if LLM::Stream === stream
|
|
474
474
|
end
|
|
475
475
|
|
|
476
|
+
def stream
|
|
477
|
+
@stream || @params[:stream]
|
|
478
|
+
end
|
|
479
|
+
|
|
476
480
|
def load_skills(skills)
|
|
477
481
|
[*skills].map { LLM::Skill.load(_1).to_tool(self) }
|
|
478
482
|
end
|
data/lib/llm/mcp.rb
CHANGED
|
@@ -103,6 +103,21 @@ class LLM::MCP
|
|
|
103
103
|
nil
|
|
104
104
|
end
|
|
105
105
|
|
|
106
|
+
##
|
|
107
|
+
# Starts the MCP client for the duration of a block and then stops it.
|
|
108
|
+
# @yield Runs with the MCP client started
|
|
109
|
+
# @raise [LocalJumpError]
|
|
110
|
+
# When called without a block
|
|
111
|
+
# @raise [StandardError]
|
|
112
|
+
# Propagates errors raised by {#start}, the block itself, or {#stop}
|
|
113
|
+
# @return [void]
|
|
114
|
+
def run
|
|
115
|
+
start
|
|
116
|
+
yield
|
|
117
|
+
ensure
|
|
118
|
+
stop
|
|
119
|
+
end
|
|
120
|
+
|
|
106
121
|
##
|
|
107
122
|
# Configures an HTTP MCP transport to use a persistent connection pool
|
|
108
123
|
# via the optional dependency [Net::HTTP::Persistent](https://github.com/drbrain/net-http-persistent)
|
data/lib/llm/message.rb
CHANGED
|
@@ -33,11 +33,15 @@ module LLM
|
|
|
33
33
|
# Returns a Hash representation of the message.
|
|
34
34
|
# @return [Hash]
|
|
35
35
|
def to_h
|
|
36
|
-
{
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
{
|
|
37
|
+
role:,
|
|
38
|
+
content:,
|
|
39
|
+
reasoning_content:,
|
|
40
|
+
compaction: extra.compaction,
|
|
41
|
+
tools: extra.tool_calls&.map { LLM::Object === _1 ? _1.to_h : _1 },
|
|
42
|
+
usage:,
|
|
43
|
+
original_tool_calls: extra.original_tool_calls
|
|
44
|
+
}.compact.then { preserve_nil_content(_1) }
|
|
41
45
|
end
|
|
42
46
|
|
|
43
47
|
##
|
|
@@ -208,6 +212,11 @@ module LLM
|
|
|
208
212
|
|
|
209
213
|
private
|
|
210
214
|
|
|
215
|
+
def preserve_nil_content(hash)
|
|
216
|
+
hash[:content] = content if content.nil?
|
|
217
|
+
hash
|
|
218
|
+
end
|
|
219
|
+
|
|
211
220
|
def tool_calls
|
|
212
221
|
@tool_calls ||= LLM::Object.from(extra.tool_calls || [])
|
|
213
222
|
end
|
|
@@ -19,7 +19,7 @@ module LLM::DeepSeek::RequestAdapter
|
|
|
19
19
|
if Hash === message
|
|
20
20
|
{role: message[:role], content: adapt_content(message[:content])}
|
|
21
21
|
elsif message.tool_call?
|
|
22
|
-
|
|
22
|
+
wrap(content: nil, tool_calls: message.extra[:original_tool_calls])
|
|
23
23
|
else
|
|
24
24
|
adapt_message
|
|
25
25
|
end
|
|
@@ -30,25 +30,34 @@ module LLM::DeepSeek::RequestAdapter
|
|
|
30
30
|
|
|
31
31
|
def adapt_content(content)
|
|
32
32
|
case content
|
|
33
|
+
when LLM::Object
|
|
34
|
+
adapt_object(content)
|
|
33
35
|
when String
|
|
34
|
-
content.to_s
|
|
36
|
+
[{type: :text, text: content.to_s}]
|
|
35
37
|
when LLM::Message
|
|
36
38
|
adapt_content(content.content)
|
|
37
39
|
when LLM::Function::Return
|
|
38
40
|
throw(:abort, {role: "tool", tool_call_id: content.id, content: LLM.json.dump(content.value)})
|
|
39
|
-
when LLM::Object
|
|
40
|
-
prompt_error!(content)
|
|
41
41
|
else
|
|
42
42
|
prompt_error!(content)
|
|
43
43
|
end
|
|
44
44
|
end
|
|
45
45
|
|
|
46
|
+
def adapt_object(object)
|
|
47
|
+
case object.kind
|
|
48
|
+
when :image_url, :local_file, :remote_file
|
|
49
|
+
prompt_error!(object)
|
|
50
|
+
else
|
|
51
|
+
prompt_error!(object)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
46
55
|
def adapt_message
|
|
47
56
|
case content
|
|
48
57
|
when Array
|
|
49
58
|
adapt_array
|
|
50
59
|
else
|
|
51
|
-
|
|
60
|
+
wrap(content: adapt_content(content))
|
|
52
61
|
end
|
|
53
62
|
end
|
|
54
63
|
|
|
@@ -58,13 +67,13 @@ module LLM::DeepSeek::RequestAdapter
|
|
|
58
67
|
elsif returns.any?
|
|
59
68
|
returns.map { {role: "tool", tool_call_id: _1.id, content: LLM.json.dump(_1.value)} }
|
|
60
69
|
else
|
|
61
|
-
|
|
70
|
+
wrap(content: content.flat_map { adapt_content(_1) })
|
|
62
71
|
end
|
|
63
72
|
end
|
|
64
73
|
|
|
65
74
|
def prompt_error!(object)
|
|
66
75
|
if LLM::Object === object
|
|
67
|
-
raise LLM::PromptError, "The given LLM::Object with kind '#{
|
|
76
|
+
raise LLM::PromptError, "The given LLM::Object with kind '#{object.kind}' is not " \
|
|
68
77
|
"supported by the DeepSeek API"
|
|
69
78
|
else
|
|
70
79
|
raise LLM::PromptError, "The given object (an instance of #{object.class}) " \
|
|
@@ -72,8 +81,22 @@ module LLM::DeepSeek::RequestAdapter
|
|
|
72
81
|
end
|
|
73
82
|
end
|
|
74
83
|
|
|
84
|
+
def wrap(content:, tool_calls: nil)
|
|
85
|
+
{
|
|
86
|
+
role: message.role,
|
|
87
|
+
content:,
|
|
88
|
+
tool_calls: tool_calls&.map { LLM::Object === _1 ? _1.to_h : _1 },
|
|
89
|
+
reasoning_content: message.reasoning_content
|
|
90
|
+
}.compact.then { preserve_nil_content(_1) }
|
|
91
|
+
end
|
|
92
|
+
|
|
75
93
|
def message = @message
|
|
76
94
|
def content = message.content
|
|
77
95
|
def returns = content.grep(LLM::Function::Return)
|
|
96
|
+
|
|
97
|
+
def preserve_nil_content(hash)
|
|
98
|
+
hash[:content] = content if content.nil?
|
|
99
|
+
hash
|
|
100
|
+
end
|
|
78
101
|
end
|
|
79
102
|
end
|
|
@@ -15,7 +15,7 @@ module LLM
|
|
|
15
15
|
#
|
|
16
16
|
# llm = LLM.deepseek(key: ENV["KEY"])
|
|
17
17
|
# ctx = LLM::Context.new(llm)
|
|
18
|
-
# ctx.talk
|
|
18
|
+
# ctx.talk "Hello"
|
|
19
19
|
# ctx.messages.select(&:assistant?).each { print "[#{_1.role}]", _1.content, "\n" }
|
|
20
20
|
class DeepSeek < OpenAI
|
|
21
21
|
require_relative "deepseek/request_adapter"
|
|
@@ -73,10 +73,10 @@ module LLM
|
|
|
73
73
|
|
|
74
74
|
##
|
|
75
75
|
# Returns the default model for chat completions
|
|
76
|
-
# @see https://api-docs.deepseek.com/quick_start/pricing deepseek-
|
|
76
|
+
# @see https://api-docs.deepseek.com/quick_start/pricing deepseek-v4-flash
|
|
77
77
|
# @return [String]
|
|
78
78
|
def default_model
|
|
79
|
-
"deepseek-
|
|
79
|
+
"deepseek-v4-flash"
|
|
80
80
|
end
|
|
81
81
|
end
|
|
82
82
|
end
|
data/lib/llm/version.rb
CHANGED