open_router_enhanced 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Gemfile.lock +24 -25
- data/README.md +151 -1228
- data/docs/observability.md +3 -0
- data/docs/plugins.md +183 -0
- data/docs/streaming.md +18 -3
- data/docs/structured_outputs.md +466 -146
- data/lib/open_router/client.rb +50 -4
- data/lib/open_router/version.rb +1 -1
- data/lib/open_router.rb +6 -0
- metadata +3 -2
data/docs/observability.md
CHANGED
|
@@ -104,12 +104,15 @@ end
|
|
|
104
104
|
client.on(:on_healing) do |healing_data|
|
|
105
105
|
if healing_data[:healed]
|
|
106
106
|
puts "Successfully healed JSON response"
|
|
107
|
+
puts "Attempts: #{healing_data[:attempts]}"
|
|
107
108
|
else
|
|
108
109
|
puts "JSON healing failed: #{healing_data[:error]}"
|
|
109
110
|
end
|
|
110
111
|
end
|
|
111
112
|
```
|
|
112
113
|
|
|
114
|
+
**Note**: For detailed information about when auto-healing triggers, how it works, and configuration options, see the [Structured Outputs documentation](structured_outputs.md#json-auto-healing).
|
|
115
|
+
|
|
113
116
|
### 4. Streaming Observability
|
|
114
117
|
Enhanced streaming support with detailed event callbacks:
|
|
115
118
|
|
data/docs/plugins.md
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# OpenRouter Plugins
|
|
2
|
+
|
|
3
|
+
OpenRouter provides plugins that extend model capabilities. The gem supports all OpenRouter plugins and automatically enables response healing for structured outputs.
|
|
4
|
+
|
|
5
|
+
## Available Plugins
|
|
6
|
+
|
|
7
|
+
| Plugin | ID | Description |
|
|
8
|
+
|--------|-----|-------------|
|
|
9
|
+
| Response Healing | `response-healing` | Fixes malformed JSON responses |
|
|
10
|
+
| Web Search | `web-search` | Augments responses with real-time web search |
|
|
11
|
+
| PDF Inputs | `pdf-inputs` | Parses and extracts content from PDF files |
|
|
12
|
+
|
|
13
|
+
## Basic Usage
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
# Specify plugins in your request
|
|
17
|
+
response = client.complete(
|
|
18
|
+
messages,
|
|
19
|
+
model: "openai/gpt-4o-mini",
|
|
20
|
+
plugins: [{ id: "web-search" }]
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Multiple plugins
|
|
24
|
+
response = client.complete(
|
|
25
|
+
messages,
|
|
26
|
+
model: "openai/gpt-4o-mini",
|
|
27
|
+
plugins: [
|
|
28
|
+
{ id: "web-search" },
|
|
29
|
+
{ id: "pdf-inputs" }
|
|
30
|
+
]
|
|
31
|
+
)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Response Healing Plugin
|
|
35
|
+
|
|
36
|
+
The response-healing plugin fixes common JSON formatting issues server-side:
|
|
37
|
+
|
|
38
|
+
- Missing brackets, commas, and quotes
|
|
39
|
+
- Trailing commas
|
|
40
|
+
- Markdown-wrapped JSON
|
|
41
|
+
- Text mixed with JSON
|
|
42
|
+
- Unquoted object keys
|
|
43
|
+
|
|
44
|
+
### Automatic Activation
|
|
45
|
+
|
|
46
|
+
The gem **automatically adds** the response-healing plugin when:
|
|
47
|
+
1. Using structured outputs (`response_format` is set)
|
|
48
|
+
2. Not streaming
|
|
49
|
+
3. `auto_native_healing` is enabled (default: true)
|
|
50
|
+
|
|
51
|
+
```ruby
|
|
52
|
+
# Response-healing is automatically added here
|
|
53
|
+
response = client.complete(
|
|
54
|
+
messages,
|
|
55
|
+
model: "openai/gpt-4o-mini",
|
|
56
|
+
response_format: schema
|
|
57
|
+
)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Disable Automatic Healing
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
# Via configuration
|
|
64
|
+
OpenRouter.configure do |config|
|
|
65
|
+
config.auto_native_healing = false
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Via environment variable
|
|
69
|
+
# OPENROUTER_AUTO_NATIVE_HEALING=false
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Manual Control
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
# Explicitly add response-healing
|
|
76
|
+
response = client.complete(
|
|
77
|
+
messages,
|
|
78
|
+
model: "openai/gpt-4o-mini",
|
|
79
|
+
plugins: [{ id: "response-healing" }],
|
|
80
|
+
response_format: { type: "json_object" }
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Disable for a specific request (when auto is enabled)
|
|
84
|
+
response = client.complete(
|
|
85
|
+
messages,
|
|
86
|
+
model: "openai/gpt-4o-mini",
|
|
87
|
+
plugins: [{ id: "response-healing", enabled: false }],
|
|
88
|
+
response_format: schema
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Limitations
|
|
93
|
+
|
|
94
|
+
- **Non-streaming only**: Does not work with `stream: proc`
|
|
95
|
+
- **Syntax only**: Fixes JSON syntax, not schema conformance
|
|
96
|
+
- **Truncation issues**: May fail if response was cut off by `max_tokens`
|
|
97
|
+
|
|
98
|
+
For schema validation failures, use the gem's [client-side auto-healing](structured_outputs.md#json-auto-healing-client-side).
|
|
99
|
+
|
|
100
|
+
## Web Search Plugin
|
|
101
|
+
|
|
102
|
+
Augments model responses with real-time web search results.
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
response = client.complete(
|
|
106
|
+
[{ role: "user", content: "What are the latest AI developments?" }],
|
|
107
|
+
model: "openai/gpt-4o-mini",
|
|
108
|
+
plugins: [{ id: "web-search" }]
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Shortcut**: Append `:online` to model ID:
|
|
113
|
+
|
|
114
|
+
```ruby
|
|
115
|
+
response = client.complete(
|
|
116
|
+
messages,
|
|
117
|
+
model: "openai/gpt-4o-mini:online" # Enables web-search
|
|
118
|
+
)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## PDF Inputs Plugin
|
|
122
|
+
|
|
123
|
+
Enables models to process PDF file content.
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
response = client.complete(
|
|
127
|
+
[{ role: "user", content: "Summarize this PDF: [pdf content]" }],
|
|
128
|
+
model: "openai/gpt-4o-mini",
|
|
129
|
+
plugins: [{ id: "pdf-inputs" }]
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Plugin Configuration Options
|
|
134
|
+
|
|
135
|
+
Plugins can accept additional configuration:
|
|
136
|
+
|
|
137
|
+
```ruby
|
|
138
|
+
# Enable/disable a plugin explicitly
|
|
139
|
+
plugins: [{ id: "response-healing", enabled: true }]
|
|
140
|
+
|
|
141
|
+
# Disable a default plugin for one request
|
|
142
|
+
plugins: [{ id: "response-healing", enabled: false }]
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Prediction Parameter (Latency Optimization)
|
|
146
|
+
|
|
147
|
+
The `prediction` parameter reduces latency by providing the model with an expected output:
|
|
148
|
+
|
|
149
|
+
```ruby
|
|
150
|
+
response = client.complete(
|
|
151
|
+
[{ role: "user", content: "What is the capital of France?" }],
|
|
152
|
+
model: "openai/gpt-4o",
|
|
153
|
+
prediction: { type: "content", content: "The capital of France is Paris." }
|
|
154
|
+
)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**When to use**:
|
|
158
|
+
- Code completion with predictable boilerplate
|
|
159
|
+
- Template filling where most content is known
|
|
160
|
+
- Minor corrections/refinements to existing text
|
|
161
|
+
|
|
162
|
+
**How it works**: Instead of generating from scratch, the model confirms/refines your prediction, which is faster when accurate.
|
|
163
|
+
|
|
164
|
+
## Best Practices
|
|
165
|
+
|
|
166
|
+
1. **Use native healing for structured outputs**: It's free and adds <1ms latency
|
|
167
|
+
2. **Don't combine response-healing with streaming**: It won't work
|
|
168
|
+
3. **Check model compatibility**: Not all models support all plugins
|
|
169
|
+
4. **Monitor costs**: Web search may add to response latency
|
|
170
|
+
|
|
171
|
+
## Comparison: Native vs Client-Side Healing
|
|
172
|
+
|
|
173
|
+
| Aspect | Native (Plugin) | Client-Side (Gem) |
|
|
174
|
+
|--------|-----------------|-------------------|
|
|
175
|
+
| Location | Server-side | Client-side |
|
|
176
|
+
| Cost | Free | API call per attempt |
|
|
177
|
+
| Latency | <1ms | Full LLM call |
|
|
178
|
+
| Fixes syntax | Yes | Yes |
|
|
179
|
+
| Fixes schema | No | Yes |
|
|
180
|
+
| Streaming | No | Yes |
|
|
181
|
+
| Auto-enabled | For structured outputs | When `auto_heal_responses = true` |
|
|
182
|
+
|
|
183
|
+
**Recommendation**: Use both! Native healing catches 80%+ of issues for free. Client-side healing handles the rest and validates against your schema.
|
data/docs/streaming.md
CHANGED
|
@@ -214,7 +214,7 @@ end
|
|
|
214
214
|
|
|
215
215
|
## Structured Outputs with Streaming
|
|
216
216
|
|
|
217
|
-
Streaming works seamlessly with structured outputs
|
|
217
|
+
Streaming works seamlessly with structured outputs. The response is streamed in real-time, then validated and parsed after accumulation completes.
|
|
218
218
|
|
|
219
219
|
```ruby
|
|
220
220
|
# Define schema
|
|
@@ -225,18 +225,33 @@ user_schema = OpenRouter::Schema.define("user") do
|
|
|
225
225
|
end
|
|
226
226
|
|
|
227
227
|
# Stream with structured output
|
|
228
|
+
# IMPORTANT: accumulate_response must be true for structured outputs
|
|
228
229
|
response = streaming_client.stream_complete(
|
|
229
230
|
[{ role: "user", content: "Create a user: John Doe, 30, john@example.com" }],
|
|
230
231
|
model: "openai/gpt-4o",
|
|
231
232
|
response_format: user_schema,
|
|
232
|
-
accumulate_response: true
|
|
233
|
+
accumulate_response: true # Required for structured_output access
|
|
233
234
|
)
|
|
234
235
|
|
|
235
|
-
# Access structured output after streaming
|
|
236
|
+
# Access structured output after streaming completes
|
|
236
237
|
user_data = response.structured_output
|
|
237
238
|
puts "User: #{user_data['name']}, Age: #{user_data['age']}"
|
|
238
239
|
```
|
|
239
240
|
|
|
241
|
+
### How Structured Outputs Work with Streaming
|
|
242
|
+
|
|
243
|
+
1. **During Streaming**: Content chunks are streamed and displayed in real-time
|
|
244
|
+
2. **After Accumulation**: The complete response is validated against your schema
|
|
245
|
+
3. **Auto-Healing**: If enabled and needed, healing occurs after streaming completes
|
|
246
|
+
4. **Validation**: Schema validation happens on the accumulated response
|
|
247
|
+
|
|
248
|
+
**Important Notes:**
|
|
249
|
+
- You must set `accumulate_response: true` to use `response.structured_output`
|
|
250
|
+
- Auto-healing (if configured) happens after streaming completes, not during streaming
|
|
251
|
+
- The `on_finish` callback receives the final, validated response
|
|
252
|
+
|
|
253
|
+
For detailed information on auto-healing, native vs forced outputs, and troubleshooting, see the [Structured Outputs documentation](structured_outputs.md).
|
|
254
|
+
|
|
240
255
|
## Configuration Options
|
|
241
256
|
|
|
242
257
|
The streaming client accepts all the same configuration options as the regular client:
|