rlm-rb 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -2
- data/README.md +157 -55
- data/examples/plain_ruby_invoice_extraction.rb +85 -0
- data/lib/rlm/code_extractor.rb +125 -0
- data/lib/rlm/file.rb +1 -1
- data/lib/rlm/lm/mock.rb +45 -0
- data/lib/rlm/lm/ruby_llm.rb +99 -0
- data/lib/rlm/predict.rb +18 -9
- data/lib/rlm/prompt_builder.rb +199 -0
- data/lib/rlm/runtime/bridge.rb +146 -0
- data/lib/rlm/runtime/signature_registry.rb +75 -0
- data/lib/rlm/runtime.rb +352 -0
- data/lib/rlm/sandbox/unsafe_in_process.rb +116 -0
- data/lib/rlm/signature/dspy.rb +155 -0
- data/lib/rlm/signature.rb +76 -0
- data/lib/rlm/trace.rb +2 -0
- data/lib/rlm/version.rb +1 -1
- data/lib/rlm.rb +9 -0
- metadata +66 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 287e62dff4524b7ff81b1df336ee22d086dec01a9b16d82047ccb6d3cb6adce2
|
|
4
|
+
data.tar.gz: e3f19ce47df41aa2ac26d284f1cc13741aaff2703a9b0004a662107798c555ca
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b34d4334b16262fa6e4a8bfb32168998fc89a3e77c47cc413356ff1f316b9186d88d25298eca0014637fd82098d2c713c41af69c0fa26e7983e436f633c708b1
|
|
7
|
+
data.tar.gz: a3136dfd0a13da006b8b719a1565af16cb1692b1eda272a174944608c01660d990260af50cf0d36284d6c846c7606317f343a3d3fdc02b76199a4bad0e326cf8
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.0] - 2026-05-15
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Shipped `examples/plain_ruby_invoice_extraction.rb` as an opt-in live plain Ruby smoke example for real RubyLLM
|
|
15
|
+
and dspy adapters.
|
|
16
|
+
- `RLM::Lm::RubyLLM` provider adapter for root and sub-LM calls through RubyLLM.
|
|
17
|
+
- `RLM::Signature::Dspy` adapter for wrapping dspy.rb signatures behind the existing RLM signature protocol.
|
|
18
|
+
- `RLM::Signature.coerce_output` hook for normalizing parsed final output before validation.
|
|
19
|
+
- Optional `usage` payloads on `:root_lm_called` and `:sub_lm_called` trace events for adapters that expose token
|
|
20
|
+
and cost metadata.
|
|
21
|
+
- `RLM::CodeExtractor` for strict `<rlm-code>` / `<rlm-final>` response parsing.
|
|
22
|
+
- `RLM::Lm::Mock` for deterministic runtime-spine tests.
|
|
23
|
+
- `RLM::PromptBuilder` for deterministic strict prompt construction from signatures, inputs, context
|
|
24
|
+
manifests, and limits.
|
|
25
|
+
- `RLM::Runtime::Bridge` for sandbox-exposed `predict`, `tool`, `submit`, `read_file`,
|
|
26
|
+
`list_files`, and `log` runtime services.
|
|
27
|
+
- `RLM::Signature` protocol helpers for runtime-independent signature validation.
|
|
28
|
+
- `RLM::Sandbox::UnsafeInProcess` for dev/test-only runtime-spine integration tests.
|
|
29
|
+
- `RLM::Runtime` mock execution loop with prompt building, LM calls, code/final extraction,
|
|
30
|
+
sandbox execution, recursive subcalls, validation, budget policies, and `RLM::Result` output.
|
|
31
|
+
- `RLM::Predict#call` now delegates to the runtime spine.
|
|
32
|
+
- Budget enforcement expanded to `max_sub_lm_calls`, `max_tool_calls`, `max_cost_cents`, and `max_runtime_seconds`.
|
|
33
|
+
- Budget policies are honored: `:fail`, `:needs_review`, and conservative `:return_partial` when a valid submitted
|
|
34
|
+
output already exists.
|
|
35
|
+
- `trace_store` is forwarded into runtime as a best-effort callable hook receiving the terminal `RLM::Result`.
|
|
36
|
+
- `RLM::ToolError` is preserved through sandbox execution and reported as `status: :tool_error`.
|
|
37
|
+
- Trace event completeness: `:budget_checked` recorded at all budget checks, `:run_failed` recorded on all failure paths.
|
|
38
|
+
- PromptBuilder v0.2 contract: signature description, input/output fields, available helpers, safety instructions.
|
|
39
|
+
- Parse failures are deterministic and fail-closed (deferred repair attempts to future milestone).
|
|
40
|
+
- Sandbox cleanup proven across all failure modes (success, validation, parse, provider, budget, sandbox errors).
|
|
41
|
+
- `RLM::Sandbox::UnsafeInProcess` serializes process-global stream capture with a mutex while remaining dev/test-only
|
|
42
|
+
and unsuitable for production isolation.
|
|
43
|
+
|
|
44
|
+
### Changed
|
|
45
|
+
|
|
46
|
+
- Ruby compatibility now requires Ruby `>= 3.3.0` because dspy.rb support is part of the plain Ruby milestone.
|
|
47
|
+
- Runtime final-output validation now runs after signature-level output coercion.
|
|
48
|
+
|
|
49
|
+
### Fixed
|
|
50
|
+
|
|
51
|
+
- Unknown RubyLLM provider costs are recorded as `cost_known: false`, contribute `0` cents for that call, and do not
|
|
52
|
+
crash cost accounting.
|
|
53
|
+
|
|
10
54
|
## [0.1.0] - 2026-05-12
|
|
11
55
|
|
|
12
56
|
Skeleton release. Establishes the public types, configuration surface, sandbox
|
|
@@ -28,9 +72,8 @@ v0.2.
|
|
|
28
72
|
- `RLM::Tool` base class with category DSL.
|
|
29
73
|
- `RLM::Predict` skeleton (`#call` raises `NotImplementedError` until the runtime loop lands).
|
|
30
74
|
|
|
31
|
-
### Not yet implemented (tracked for
|
|
75
|
+
### Not yet implemented (tracked for future milestones)
|
|
32
76
|
|
|
33
|
-
- Runtime execution loop, code extractor, runtime bridge, recursive `predict(...)`.
|
|
34
77
|
- RubyLLM root/sub-LM adapters.
|
|
35
78
|
- dspy.rb signature adapter and output validation.
|
|
36
79
|
- `RLM::Sandbox::Subprocess` backend.
|
data/README.md
CHANGED
|
@@ -3,16 +3,19 @@
|
|
|
3
3
|
[](https://badge.fury.io/rb/rlm-rb)
|
|
4
4
|
[](https://github.com/dpaluy/rlm/actions/workflows/ci.yml)
|
|
5
5
|
|
|
6
|
-
Recursive Language Models for Ruby
|
|
6
|
+
Recursive Language Models for Ruby.
|
|
7
7
|
|
|
8
|
-
RLM.rb is a Ruby
|
|
9
|
-
It
|
|
10
|
-
for typed signatures
|
|
11
|
-
recursive sub-LM calls, typed final output, budget
|
|
8
|
+
RLM.rb is a Ruby runtime for typed, sandbox-oriented, auditable AI jobs over large application context.
|
|
9
|
+
It integrates with [RubyLLM](https://github.com/crmne/ruby_llm) for provider access and
|
|
10
|
+
[dspy.rb](https://github.com/vicentereig/dspy.rb) for typed signatures. The current plain Ruby milestone includes the
|
|
11
|
+
recursive execution spine: prompt loop, file and context mounting, recursive sub-LM calls, typed final output, budget
|
|
12
|
+
controls, trace events, a RubyLLM LM adapter, a dspy signature adapter, and a minimal trace persistence hook.
|
|
12
13
|
|
|
13
|
-
> **Status:
|
|
14
|
-
>
|
|
15
|
-
>
|
|
14
|
+
> **Status: Plain Ruby adapter milestone.** The released gem is v0.2.0. It includes `RLM::Lm::RubyLLM`,
|
|
15
|
+
> `RLM::Signature::Dspy`, `RLM::Lm::Mock`, `RLM::Sandbox::UnsafeInProcess`, budget enforcement and budget policies,
|
|
16
|
+
> trace events, recursive `predict`, prompt building, and a best-effort `trace_store` callable hook.
|
|
17
|
+
> Rails integration, subprocess/container sandboxing, tools, skills, cache, telemetry, and evals remain future
|
|
18
|
+
> milestones. `UnsafeInProcess` is dev/test-only and executes generated code in the host Ruby process.
|
|
16
19
|
|
|
17
20
|
## Why
|
|
18
21
|
|
|
@@ -25,6 +28,9 @@ typed LLM functions only when needed, and returns validated Ruby objects with a
|
|
|
25
28
|
|
|
26
29
|
## Install
|
|
27
30
|
|
|
31
|
+
RLM.rb requires Ruby 3.3 or newer. Ruby 3.2 and older are not supported because dspy.rb is mandatory for the plain
|
|
32
|
+
Ruby adapter milestone.
|
|
33
|
+
|
|
28
34
|
Add the gem to your Gemfile:
|
|
29
35
|
|
|
30
36
|
```ruby
|
|
@@ -41,9 +47,8 @@ gem install rlm-rb
|
|
|
41
47
|
|
|
42
48
|
```ruby
|
|
43
49
|
RLM.configure do |config|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# config.sub_lm = RubyLLM.chat(model: "openai/gpt-5-mini")
|
|
50
|
+
config.root_lm = RLM::Lm::RubyLLM.new(model: "gpt-5-mini")
|
|
51
|
+
config.sub_lm = RLM::Lm::RubyLLM.new(model: "gpt-5-mini")
|
|
47
52
|
|
|
48
53
|
config.sandbox = RLM::Sandbox::Mock.new
|
|
49
54
|
|
|
@@ -58,14 +63,20 @@ RLM.configure do |config|
|
|
|
58
63
|
end
|
|
59
64
|
```
|
|
60
65
|
|
|
61
|
-
|
|
66
|
+
`RLM::Lm::RubyLLM` creates a fresh `RubyLLM.chat` for each runtime LM call. That keeps RLM prompts standalone and
|
|
67
|
+
prevents conversation history from leaking between root and sub-model calls.
|
|
68
|
+
|
|
69
|
+
## Plain Ruby API
|
|
62
70
|
|
|
63
71
|
```ruby
|
|
72
|
+
require "dspy"
|
|
73
|
+
require "rlm"
|
|
74
|
+
|
|
64
75
|
class InvoiceExtraction < DSPy::Signature
|
|
65
76
|
description "Extract normalized invoice fields from a vendor invoice."
|
|
66
77
|
|
|
67
78
|
input do
|
|
68
|
-
const :
|
|
79
|
+
const :invoice_text, String
|
|
69
80
|
const :vendor_id, Integer
|
|
70
81
|
end
|
|
71
82
|
|
|
@@ -73,29 +84,110 @@ class InvoiceExtraction < DSPy::Signature
|
|
|
73
84
|
const :vendor_name, String
|
|
74
85
|
const :invoice_number, String
|
|
75
86
|
const :total_cents, Integer
|
|
76
|
-
const :confidence, Float
|
|
77
|
-
const :needs_review, T::Boolean
|
|
78
87
|
end
|
|
79
88
|
end
|
|
80
89
|
|
|
90
|
+
RLM.configure do |config|
|
|
91
|
+
config.root_lm = RLM::Lm::RubyLLM.new(model: "gpt-5-mini")
|
|
92
|
+
config.sub_lm = RLM::Lm::RubyLLM.new(model: "gpt-5-mini")
|
|
93
|
+
config.sandbox = RLM::Sandbox::UnsafeInProcess.new # dev/test only
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
signature = RLM::Signature::Dspy.new(InvoiceExtraction)
|
|
97
|
+
|
|
81
98
|
result = RLM.predict(
|
|
82
|
-
|
|
99
|
+
signature,
|
|
83
100
|
input: {
|
|
84
|
-
|
|
101
|
+
invoice_text: "Vendor: Acme\nInvoice: INV-001\nTotal: $100.00",
|
|
85
102
|
vendor_id: 123
|
|
86
103
|
},
|
|
87
|
-
max_iterations:
|
|
88
|
-
max_llm_calls: 30,
|
|
89
|
-
max_cost_cents: 150
|
|
104
|
+
limits: RLM::Limits.new(max_iterations: 8, max_llm_calls: 25)
|
|
90
105
|
)
|
|
91
106
|
|
|
92
|
-
result.output
|
|
93
|
-
|
|
107
|
+
result.output
|
|
108
|
+
# => { vendor_name: "Acme", invoice_number: "INV-001", total_cents: 10000 }
|
|
109
|
+
|
|
110
|
+
result.trace.events.find { |event| event[:type] == :root_lm_called }[:payload][:usage]
|
|
111
|
+
# => { model_id: "...", input_tokens: ..., output_tokens: ..., cost_cents: ..., cost_known: true }
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Usage metadata is recorded on `:root_lm_called` and `:sub_lm_called` trace events when an adapter exposes it. It is not
|
|
115
|
+
duplicated onto `RLM::Result` in this milestone. RubyLLM cost helpers can return `nil` when model pricing is unknown;
|
|
116
|
+
RLM records `cost_known: false`, contributes `0` cents for that call, and cannot enforce unknown provider cost.
|
|
117
|
+
|
|
118
|
+
## Run a Live Plain Ruby Example
|
|
119
|
+
|
|
120
|
+
The gem ships one opt-in live example at `examples/plain_ruby_invoice_extraction.rb`. By default it exits before
|
|
121
|
+
provider credential checks, LM configuration, or `RLM.predict`, even if provider credentials are already present:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
bundle exec ruby examples/plain_ruby_invoice_extraction.rb
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
To run the live path, configure provider credentials and opt in explicitly:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
RLM_RUN_LIVE_EXAMPLE=1 OPENAI_API_KEY="$OPENAI_API_KEY" \
|
|
131
|
+
bundle exec ruby examples/plain_ruby_invoice_extraction.rb
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
The example uses `RLM::Lm::RubyLLM` for root and sub-LM calls, wraps a real `DSPy::Signature` with
|
|
135
|
+
`RLM::Signature::Dspy`, calls the public `RLM.predict(...)` API, and prints result status, typed output, trace id, cost,
|
|
136
|
+
and usage payloads when RubyLLM exposes them. Set `RLM_EXAMPLE_MODEL` and `RLM_EXAMPLE_SUB_MODEL` to override the
|
|
137
|
+
default model.
|
|
138
|
+
|
|
139
|
+
The live example uses `RLM::Sandbox::UnsafeInProcess`, which is dev/test-only and runs generated Ruby code in the host
|
|
140
|
+
process. Rails integration, subprocess/container sandboxing, tools, skills, evals, telemetry, and production execution
|
|
141
|
+
examples remain future milestones.
|
|
142
|
+
|
|
143
|
+
## Mock Runtime API
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
class InvoiceExtraction
|
|
147
|
+
def self.name = "InvoiceExtraction"
|
|
148
|
+
def self.description = "Extract normalized invoice fields from a vendor invoice."
|
|
149
|
+
def self.input_fields = { invoice_pdf: :file, vendor_id: :integer }
|
|
150
|
+
def self.output_fields = { vendor_name: :string, invoice_number: :string, total_cents: :integer }
|
|
151
|
+
def self.validate_input(input) = input.key?(:vendor_id) ? [] : ["vendor_id is required"]
|
|
152
|
+
def self.validate_output(output) = output.key?(:vendor_name) ? [] : ["vendor_name is required"]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Mock LM for testing (no provider needed)
|
|
156
|
+
lm = RLM::Lm::Mock.new(responses: ['<rlm-final>{"vendor_name":"Acme","invoice_number":"INV-001","total_cents":10000}</rlm-final>'])
|
|
157
|
+
|
|
158
|
+
result = RLM.predict(
|
|
159
|
+
InvoiceExtraction,
|
|
160
|
+
input: { vendor_id: 123 },
|
|
161
|
+
lm: lm,
|
|
162
|
+
sandbox: RLM::Sandbox::UnsafeInProcess.new, # dev/test only: executes in host process
|
|
163
|
+
limits: RLM::Limits.new(max_iterations: 8, max_llm_calls: 25)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
result.output # { "vendor_name" => "Acme", ... }
|
|
167
|
+
result.trace # full event stream
|
|
94
168
|
result.cost_cents # accumulated cost
|
|
95
|
-
result.status # :completed, :
|
|
169
|
+
result.status # :completed, :budget_exceeded, :failed_validation, ...
|
|
96
170
|
```
|
|
97
171
|
|
|
98
|
-
##
|
|
172
|
+
## dspy Signature Adapter
|
|
173
|
+
|
|
174
|
+
`RLM::Signature::Dspy` wraps a `DSPy::Signature` class behind RLM's internal signature protocol:
|
|
175
|
+
|
|
176
|
+
- `description`
|
|
177
|
+
- `input_fields`
|
|
178
|
+
- `output_fields`
|
|
179
|
+
- `validate_input`
|
|
180
|
+
- `validate_output`
|
|
181
|
+
- `coerce_output`
|
|
182
|
+
|
|
183
|
+
The adapter derives fields and simple validation from dspy JSON schema metadata. Output coercion normalizes parsed
|
|
184
|
+
JSON/hash output to schema keys before validation.
|
|
185
|
+
|
|
186
|
+
## Rails
|
|
187
|
+
|
|
188
|
+
Rails integration is not yet implemented. Rails remains a v2 milestone tracked in `docs/postponed-issues.md`.
|
|
189
|
+
|
|
190
|
+
## What's Implemented
|
|
99
191
|
|
|
100
192
|
| Component | Status |
|
|
101
193
|
|-----------|--------|
|
|
@@ -106,26 +198,34 @@ result.status # :completed, :needs_review, :budget_exceeded, ...
|
|
|
106
198
|
| `RLM::Trace` with NDJSON / JSON export | Ready |
|
|
107
199
|
| `RLM::Result` with full status enum | Ready |
|
|
108
200
|
| `RLM::Sandbox::Base` interface + `Mock` backend | Ready |
|
|
201
|
+
| `RLM::Sandbox::UnsafeInProcess` | Ready for dev/test only; executes in host process and mutates global streams during serialized capture |
|
|
109
202
|
| `RLM::Tool` base class with category DSL | Ready |
|
|
110
203
|
| Error hierarchy | Ready |
|
|
111
|
-
| `RLM::Predict`
|
|
112
|
-
|
|
|
113
|
-
|
|
|
114
|
-
|
|
|
115
|
-
| `RLM::
|
|
116
|
-
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
204
|
+
| `RLM::Predict#call` | Delegates to `RLM::Runtime` |
|
|
205
|
+
| `RLM::Runtime` mock loop | Ready (with `RLM::Lm::Mock`) |
|
|
206
|
+
| `RLM::PromptBuilder` | Ready (v0.2 contract) |
|
|
207
|
+
| `RLM::CodeExtractor` | Ready |
|
|
208
|
+
| `RLM::Runtime::Bridge` | Ready for runtime-owned subcalls, tools, submission, file reads, and logging |
|
|
209
|
+
| Budget enforcement and policies (`max_llm_calls`, `max_sub_lm_calls`, `max_tool_calls`, `max_iterations`, `max_cost_cents`, `max_runtime_seconds`, `on_budget_exceeded`) | Ready |
|
|
210
|
+
| `trace_store` callable hook | Ready (best-effort; receives terminal `RLM::Result`) |
|
|
211
|
+
| Recursive `predict` + depth limit | Ready |
|
|
212
|
+
| `RLM::Lm::RubyLLM` provider adapter | Ready |
|
|
213
|
+
| `RLM::Signature::Dspy` signature adapter | Ready |
|
|
214
|
+
| Trace usage metadata for RubyLLM calls | Ready |
|
|
215
|
+
| `RLM::Sandbox::Subprocess` | Future milestone |
|
|
216
|
+
| Rails Railtie, generator, migrations, ActiveStorage adapter | Future milestone |
|
|
217
|
+
|
|
218
|
+
The table above reflects the current unreleased plain Ruby adapter implementation status.
|
|
219
|
+
|
|
220
|
+
## Rails setup (intended v2 milestone)
|
|
121
221
|
|
|
122
222
|
The Rails integration is not yet implemented, but the intended setup is:
|
|
123
223
|
|
|
124
224
|
```ruby
|
|
125
225
|
# config/initializers/rlm.rb
|
|
126
226
|
RLM.configure do |config|
|
|
127
|
-
config.root_lm = RubyLLM.
|
|
128
|
-
config.sub_lm
|
|
227
|
+
config.root_lm = RLM::Lm::RubyLLM.new(model: Rails.application.credentials.dig(:rlm, :root_model))
|
|
228
|
+
config.sub_lm = RLM::Lm::RubyLLM.new(model: Rails.application.credentials.dig(:rlm, :sub_model))
|
|
129
229
|
|
|
130
230
|
config.sandbox = RLM::Sandbox::Subprocess.new # development
|
|
131
231
|
# config.sandbox = RLM::Sandbox::Docker.new # production (v0.4)
|
|
@@ -173,9 +273,6 @@ rescue RLM::ToolError => e
|
|
|
173
273
|
rescue RLM::ParseError => e
|
|
174
274
|
# Root LM response could not be parsed into <rlm-code>/<rlm-final>.
|
|
175
275
|
raise
|
|
176
|
-
rescue RLM::NoProgressError => e
|
|
177
|
-
# The model emitted no new progress across iterations.
|
|
178
|
-
raise
|
|
179
276
|
rescue RLM::ConfigurationError => e
|
|
180
277
|
# Missing signature, missing root LM, invalid sandbox, etc.
|
|
181
278
|
raise
|
|
@@ -186,33 +283,38 @@ end
|
|
|
186
283
|
```
|
|
187
284
|
|
|
188
285
|
Soft failures land on `result.status` instead of raising. Inspect `result.success?`, `result.needs_review?`,
|
|
189
|
-
`result.failed?`, and `result.validation_errors` to branch.
|
|
286
|
+
`result.failed?`, and `result.validation_errors` to branch. Budget handling honors `limits.on_budget_exceeded`:
|
|
287
|
+
`:fail` returns `:budget_exceeded`, `:needs_review` returns `:needs_review`, and `:return_partial` returns
|
|
288
|
+
`:needs_review` only when a valid submitted output already exists; otherwise it fails as `:budget_exceeded`.
|
|
190
289
|
|
|
191
290
|
| Status | Predicate | Meaning |
|
|
192
291
|
|--------|-----------|---------|
|
|
193
292
|
| `:completed` | `success?` | Output valid, ready to use. |
|
|
194
|
-
| `:needs_review` | `needs_review?` |
|
|
195
|
-
| `:failed_validation` | `failed?` | Output invalid after
|
|
196
|
-
| `:budget_exceeded` | `failed?` | Hit a hard limit
|
|
293
|
+
| `:needs_review` | `needs_review?` | Budget policy requested review, optionally with a valid submitted partial output. |
|
|
294
|
+
| `:failed_validation` | `failed?` | Output invalid after validation. |
|
|
295
|
+
| `:budget_exceeded` | `failed?` | Hit a hard limit with `:fail`, or `:return_partial` had no valid submitted output. |
|
|
197
296
|
| `:sandbox_error` | `failed?` | Sandbox violation or crash. |
|
|
198
297
|
| `:tool_error` | `failed?` | Tool raised or returned invalid output. |
|
|
199
298
|
| `:provider_error` | `failed?` | RubyLLM provider failure. |
|
|
200
299
|
| `:aborted` | `failed?` | Run cancelled by caller. |
|
|
201
300
|
|
|
202
|
-
## Production safety
|
|
301
|
+
## Production safety
|
|
203
302
|
|
|
204
|
-
-
|
|
205
|
-
-
|
|
206
|
-
|
|
207
|
-
-
|
|
303
|
+
- `RLM::Sandbox::UnsafeInProcess` executes generated code in the host Ruby process. It is dev/test-only and unsafe.
|
|
304
|
+
- `UnsafeInProcess` captures `$stdout`/`$stderr` by mutating process-global streams; capture is serialized with a mutex,
|
|
305
|
+
but the sandbox remains unsuitable for production and should not be treated as concurrency-safe isolation.
|
|
306
|
+
- The subprocess sandbox is a future milestone for local development.
|
|
307
|
+
- Production deployments should use a container sandbox or remote isolated runner (future milestone).
|
|
308
|
+
- Generated code must not execute inside the host Ruby process in production. The codebase will hold this invariant.
|
|
309
|
+
- Mounted files are data, not instructions; generated code should treat file contents as untrusted input.
|
|
208
310
|
|
|
209
311
|
## Development
|
|
210
312
|
|
|
211
313
|
```bash
|
|
212
|
-
bundle install
|
|
213
|
-
|
|
214
|
-
bundle exec rubocop
|
|
215
|
-
bundle exec rake
|
|
314
|
+
zsh -lc 'source ~/.zshrc && eval "$(mise activate zsh)" && bundle install'
|
|
315
|
+
zsh -lc 'source ~/.zshrc && eval "$(mise activate zsh)" && bundle exec rake test'
|
|
316
|
+
zsh -lc 'source ~/.zshrc && eval "$(mise activate zsh)" && bundle exec rubocop'
|
|
317
|
+
zsh -lc 'source ~/.zshrc && eval "$(mise activate zsh)" && bundle exec rake'
|
|
216
318
|
```
|
|
217
319
|
|
|
218
320
|
## Contributing
|
|
@@ -221,10 +323,10 @@ Issues and pull requests welcome at https://github.com/dpaluy/rlm.
|
|
|
221
323
|
|
|
222
324
|
## API reference
|
|
223
325
|
|
|
224
|
-
RLM.rb
|
|
326
|
+
RLM.rb integrates with these upstream libraries. For provider or signature details, go to source:
|
|
225
327
|
|
|
226
|
-
- [RubyLLM](https://github.com/crmne/ruby_llm), [
|
|
227
|
-
- [dspy.rb](https://github.com/vicentereig/dspy.rb), [Signatures guide](https://
|
|
328
|
+
- [RubyLLM](https://github.com/crmne/ruby_llm), [chat guide](https://rubyllm.com/chat/) for provider, chat, token, and cost APIs.
|
|
329
|
+
- [dspy.rb](https://github.com/vicentereig/dspy.rb), [Signatures guide](https://oss.vicente.services/dspy.rb/core-concepts/signatures/) for typed input/output contracts.
|
|
228
330
|
- The [Recursive Language Models](https://github.com/alexzhang13/rlm) reference implementation and the
|
|
229
331
|
[DSPy RLM module](https://dspy.ai/api/modules/RLM/) for the underlying idea.
|
|
230
332
|
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "dspy"
|
|
5
|
+
require "json"
|
|
6
|
+
require "rlm"
|
|
7
|
+
|
|
8
|
+
class InvoiceExtraction < DSPy::Signature
|
|
9
|
+
description "Extract normalized invoice fields from a vendor invoice."
|
|
10
|
+
|
|
11
|
+
input do
|
|
12
|
+
const :invoice_text, String
|
|
13
|
+
const :vendor_id, Integer
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
output do
|
|
17
|
+
const :vendor_name, String
|
|
18
|
+
const :invoice_number, String
|
|
19
|
+
const :total_cents, Integer
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def live_example_enabled?
|
|
24
|
+
ENV["RLM_RUN_LIVE_EXAMPLE"] == "1"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def provider_configured?
|
|
28
|
+
!ENV["OPENAI_API_KEY"].to_s.empty?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def print_skipped_message
|
|
32
|
+
puts "Skipped live RLM example."
|
|
33
|
+
puts "Set RLM_RUN_LIVE_EXAMPLE=1 and OPENAI_API_KEY to run a real RubyLLM provider call."
|
|
34
|
+
puts "Optional: set RLM_EXAMPLE_MODEL and RLM_EXAMPLE_SUB_MODEL to override the default model."
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def usage_events(result)
|
|
38
|
+
result.trace.events.select { |event| %i[root_lm_called sub_lm_called].include?(event[:type]) }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
unless live_example_enabled?
|
|
42
|
+
print_skipped_message
|
|
43
|
+
exit 0
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
unless provider_configured?
|
|
47
|
+
warn "RLM_RUN_LIVE_EXAMPLE=1 is set, but OPENAI_API_KEY is missing."
|
|
48
|
+
warn "Configure provider credentials before running the live example."
|
|
49
|
+
exit 1
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
root_model = ENV.fetch("RLM_EXAMPLE_MODEL", "gpt-5-mini")
|
|
53
|
+
sub_model = ENV.fetch("RLM_EXAMPLE_SUB_MODEL", root_model)
|
|
54
|
+
|
|
55
|
+
RLM.configure do |config|
|
|
56
|
+
config.root_lm = RLM::Lm::RubyLLM.new(model: root_model)
|
|
57
|
+
config.sub_lm = RLM::Lm::RubyLLM.new(model: sub_model)
|
|
58
|
+
|
|
59
|
+
# Dev/test only: UnsafeInProcess runs generated Ruby code in this host process.
|
|
60
|
+
config.sandbox = RLM::Sandbox::UnsafeInProcess.new
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
signature = RLM::Signature::Dspy.new(InvoiceExtraction)
|
|
64
|
+
|
|
65
|
+
result = RLM.predict(
|
|
66
|
+
signature,
|
|
67
|
+
input: {
|
|
68
|
+
invoice_text: "Vendor: Acme Supplies\nInvoice: INV-001\nTotal: $100.00",
|
|
69
|
+
vendor_id: 123
|
|
70
|
+
},
|
|
71
|
+
limits: RLM::Limits.new(max_iterations: 8, max_llm_calls: 25, max_recursion_depth: 1)
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
puts "status: #{result.status}"
|
|
75
|
+
puts "trace_id: #{result.trace.id}"
|
|
76
|
+
puts "cost_cents: #{result.cost_cents}"
|
|
77
|
+
puts "output:"
|
|
78
|
+
puts JSON.pretty_generate(result.output)
|
|
79
|
+
|
|
80
|
+
usage_events(result).each do |event|
|
|
81
|
+
next unless event[:payload][:usage]
|
|
82
|
+
|
|
83
|
+
puts "#{event[:type]} usage:"
|
|
84
|
+
puts JSON.pretty_generate(event[:payload][:usage])
|
|
85
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module RLM
|
|
6
|
+
class CodeExtractor
|
|
7
|
+
CODE_OPEN = "<rlm-code>"
|
|
8
|
+
CODE_CLOSE = "</rlm-code>"
|
|
9
|
+
FINAL_OPEN = "<rlm-final>"
|
|
10
|
+
FINAL_CLOSE = "</rlm-final>"
|
|
11
|
+
KNOWN_TAG_PATTERN = %r{</?rlm-(?:code|final)>}
|
|
12
|
+
TYPES = %i[code final].freeze
|
|
13
|
+
|
|
14
|
+
class Result
|
|
15
|
+
attr_reader :type, :content
|
|
16
|
+
|
|
17
|
+
def initialize(type:, content:)
|
|
18
|
+
raise ArgumentError, "Unknown code extraction result type: #{type.inspect}" unless TYPES.include?(type)
|
|
19
|
+
|
|
20
|
+
@type = type
|
|
21
|
+
@content = content
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def code?
|
|
25
|
+
type == :code
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def final?
|
|
29
|
+
type == :final
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def to_h
|
|
33
|
+
{
|
|
34
|
+
type: type,
|
|
35
|
+
content: content
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.extract(response)
|
|
41
|
+
new.extract(response)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def extract(response)
|
|
45
|
+
raise ParseError, "response must be a String" unless response.is_a?(String)
|
|
46
|
+
|
|
47
|
+
tags = scan_tags(response)
|
|
48
|
+
raise ParseError, "response must contain one rlm-code or rlm-final block" if tags.empty?
|
|
49
|
+
|
|
50
|
+
type = block_type_for(tags)
|
|
51
|
+
block = extract_block(response, tags, type)
|
|
52
|
+
|
|
53
|
+
Result.new(type: type, content: parse_content(type, block))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def extract_block(response, tags, type)
|
|
59
|
+
open_tag, close_tag = tags_for(type)
|
|
60
|
+
opening, closing = matching_tags(tags, open_tag, close_tag)
|
|
61
|
+
raise ParseError, "#{close_tag} must appear after #{open_tag}" if closing[:begin] < opening[:end]
|
|
62
|
+
|
|
63
|
+
reject_non_whitespace_outside_block!(response, opening, closing)
|
|
64
|
+
content = response[opening[:end]...closing[:begin]]
|
|
65
|
+
reject_nested_tags!(content)
|
|
66
|
+
content
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def matching_tags(tags, open_tag, close_tag)
|
|
70
|
+
open_tags = tags.select { |tag| tag[:text] == open_tag }
|
|
71
|
+
close_tags = tags.select { |tag| tag[:text] == close_tag }
|
|
72
|
+
|
|
73
|
+
raise ParseError, "response must contain exactly one #{open_tag} tag" unless open_tags.one?
|
|
74
|
+
raise ParseError, "response must contain exactly one #{close_tag} tag" unless close_tags.one?
|
|
75
|
+
|
|
76
|
+
[open_tags.first, close_tags.first]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def scan_tags(response)
|
|
80
|
+
response.to_enum(:scan, KNOWN_TAG_PATTERN).map do
|
|
81
|
+
match = Regexp.last_match
|
|
82
|
+
{ text: match[0], begin: match.begin(0), end: match.end(0) }
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def block_type_for(tags)
|
|
87
|
+
has_code = tags.any? { |tag| [CODE_OPEN, CODE_CLOSE].include?(tag[:text]) }
|
|
88
|
+
has_final = tags.any? { |tag| [FINAL_OPEN, FINAL_CLOSE].include?(tag[:text]) }
|
|
89
|
+
|
|
90
|
+
raise ParseError, "response must not mix rlm-code and rlm-final blocks" if has_code && has_final
|
|
91
|
+
|
|
92
|
+
has_code ? :code : :final
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def tags_for(type)
|
|
96
|
+
case type
|
|
97
|
+
when :code then [CODE_OPEN, CODE_CLOSE]
|
|
98
|
+
when :final then [FINAL_OPEN, FINAL_CLOSE]
|
|
99
|
+
else raise ParseError, "unknown block type: #{type.inspect}"
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def reject_non_whitespace_outside_block!(response, opening, closing)
|
|
104
|
+
before = response[0...opening[:begin]]
|
|
105
|
+
after = response[closing[:end]..]
|
|
106
|
+
return if before.match?(/\A\s*\z/) && after.match?(/\A\s*\z/)
|
|
107
|
+
|
|
108
|
+
raise ParseError, "response must contain only one rlm block and surrounding whitespace"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def reject_nested_tags!(content)
|
|
112
|
+
return unless content.match?(KNOWN_TAG_PATTERN)
|
|
113
|
+
|
|
114
|
+
raise ParseError, "rlm blocks must not contain nested rlm tags"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def parse_content(type, content)
|
|
118
|
+
return content if type == :code
|
|
119
|
+
|
|
120
|
+
JSON.parse(content)
|
|
121
|
+
rescue JSON::ParserError => e
|
|
122
|
+
raise ParseError, "invalid JSON in rlm-final block: #{e.message}"
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
data/lib/rlm/file.rb
CHANGED
|
@@ -85,7 +85,7 @@ module RLM
|
|
|
85
85
|
when :path then ::File.read(source[:path])
|
|
86
86
|
when :text, :io then source[:text]
|
|
87
87
|
when :active_storage then source[:blob].download
|
|
88
|
-
else raise
|
|
88
|
+
else raise ConfigurationError, "Unknown file source kind: #{source[:kind].inspect}"
|
|
89
89
|
end
|
|
90
90
|
end
|
|
91
91
|
|
data/lib/rlm/lm/mock.rb
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RLM
|
|
4
|
+
module Lm
|
|
5
|
+
class Mock
|
|
6
|
+
attr_reader :prompts, :cost_cents
|
|
7
|
+
|
|
8
|
+
def initialize(responses:, cost_cents: 0)
|
|
9
|
+
@responses = Array(responses).dup.freeze
|
|
10
|
+
raise ArgumentError, "responses must not be empty" if @responses.empty?
|
|
11
|
+
|
|
12
|
+
@cost_cents_per_call = cost_cents
|
|
13
|
+
@cost_cents = 0
|
|
14
|
+
@prompts = []
|
|
15
|
+
@index = 0
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call(prompt:, **)
|
|
19
|
+
raise ProviderError, "prompt must be a String" unless prompt.is_a?(String)
|
|
20
|
+
raise ProviderError, "mock LM responses exhausted" if exhausted?
|
|
21
|
+
|
|
22
|
+
prompts << prompt
|
|
23
|
+
@cost_cents += @cost_cents_per_call
|
|
24
|
+
|
|
25
|
+
response = @responses.fetch(@index)
|
|
26
|
+
@index += 1
|
|
27
|
+
response
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def call_count
|
|
31
|
+
prompts.length
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def last_prompt
|
|
35
|
+
prompts.last
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def exhausted?
|
|
41
|
+
@index >= @responses.length
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|