kobako 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +1 -1
- data/README.md +205 -59
- data/data/kobako.wasm +0 -0
- data/ext/kobako/Cargo.toml +2 -2
- data/ext/kobako/src/wasm/cache.rs +15 -7
- data/ext/kobako/src/wasm/dispatch.rs +88 -36
- data/ext/kobako/src/wasm/host_state.rs +298 -55
- data/ext/kobako/src/wasm/instance.rs +477 -160
- data/ext/kobako/src/wasm.rs +20 -5
- data/lib/kobako/capture.rb +12 -10
- data/lib/kobako/codec/decoder.rb +3 -4
- data/lib/kobako/codec/encoder.rb +1 -1
- data/lib/kobako/codec/error.rb +3 -2
- data/lib/kobako/codec/factory.rb +24 -17
- data/lib/kobako/codec/utils.rb +105 -12
- data/lib/kobako/codec.rb +2 -1
- data/lib/kobako/errors.rb +22 -10
- data/lib/kobako/handle.rb +62 -0
- data/lib/kobako/handle_table.rb +119 -0
- data/lib/kobako/invocation.rb +143 -0
- data/lib/kobako/outcome/panic.rb +2 -2
- data/lib/kobako/outcome.rb +61 -24
- data/lib/kobako/rpc/dispatcher.rb +30 -28
- data/lib/kobako/rpc/envelope.rb +10 -10
- data/lib/kobako/rpc/fault.rb +4 -3
- data/lib/kobako/rpc/namespace.rb +3 -3
- data/lib/kobako/rpc/server.rb +23 -33
- data/lib/kobako/rpc/wire_error.rb +23 -0
- data/lib/kobako/sandbox.rb +211 -136
- data/lib/kobako/sandbox_options.rb +73 -0
- data/lib/kobako/snippet/binary.rb +30 -0
- data/lib/kobako/snippet/source.rb +28 -0
- data/lib/kobako/snippet/table.rb +174 -0
- data/lib/kobako/snippet.rb +20 -0
- data/lib/kobako/usage.rb +41 -0
- data/lib/kobako/version.rb +1 -1
- data/lib/kobako.rb +1 -0
- data/sig/kobako/codec/factory.rbs +1 -1
- data/sig/kobako/codec/utils.rbs +10 -0
- data/sig/kobako/errors.rbs +3 -0
- data/sig/kobako/handle.rbs +19 -0
- data/sig/kobako/handle_table.rbs +23 -0
- data/sig/kobako/invocation.rbs +25 -0
- data/sig/kobako/outcome.rbs +1 -1
- data/sig/kobako/rpc/dispatcher.rbs +7 -7
- data/sig/kobako/rpc/envelope.rbs +3 -3
- data/sig/kobako/rpc/server.rbs +1 -7
- data/sig/kobako/rpc/wire_error.rbs +6 -0
- data/sig/kobako/sandbox.rbs +22 -17
- data/sig/kobako/sandbox_options.rbs +32 -0
- data/sig/kobako/snippet/binary.rbs +12 -0
- data/sig/kobako/snippet/source.rbs +13 -0
- data/sig/kobako/snippet/table.rbs +36 -0
- data/sig/kobako/snippet.rbs +4 -0
- data/sig/kobako/usage.rbs +11 -0
- data/sig/kobako/wasm.rbs +5 -1
- metadata +21 -5
- data/lib/kobako/rpc/handle.rb +0 -38
- data/lib/kobako/rpc/handle_table.rb +0 -107
- data/sig/kobako/rpc/handle.rbs +0 -19
- data/sig/kobako/rpc/handle_table.rbs +0 -25
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb91cd11e954d6388b7d6c19be8b9fc77548fa1ea9d57b75f1afc7c0d450a36b
|
|
4
|
+
data.tar.gz: f84463e4b30e2ae5cb1e7d09a7c55345a419afd442613a1eb6b080682263587f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d622978cf22e2b30dbf8674275bbaaf39d0de68962709b40b67194d0521ca3d1e991e9a4e59853e634c111fc852d4719864c2990f2baaf35e1395efa3f67b63a
|
|
7
|
+
data.tar.gz: 3f828b5374841d0bcb8136a7c1aa078668c05c3673c794c50f86de9b1aee0bd915d090e022061b3e9636abb2dfb85a0b3bf1a7bbacff968635d9ed01c5f21edd
|
data/Cargo.lock
CHANGED
data/README.md
CHANGED
|
@@ -7,8 +7,8 @@ The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby an
|
|
|
7
7
|
```
|
|
8
8
|
Host process Wasm guest
|
|
9
9
|
┌──────────────────────┐ ┌──────────────────────┐
|
|
10
|
-
│ Kobako::Sandbox │
|
|
11
|
-
│ │
|
|
10
|
+
│ Kobako::Sandbox │ ─eval─▶ │ mruby interpreter │
|
|
11
|
+
│ │ ─run──▶ │ │
|
|
12
12
|
│ Services │ ◀──RPC─ │ KV::Lookup.call(k) │
|
|
13
13
|
│ KV::Lookup │ ─resp─▶ │ │
|
|
14
14
|
│ │ │ │
|
|
@@ -21,14 +21,18 @@ The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby an
|
|
|
21
21
|
|
|
22
22
|
## Features
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
-
|
|
24
|
+
| Feature | Description |
|
|
25
|
+
|---|---|
|
|
26
|
+
| In-process Wasm sandbox | No subprocess, no container. Both invocation verbs (`Sandbox#eval` for ad-hoc source, `Sandbox#run` for entrypoint dispatch) are synchronous Ruby calls. |
|
|
27
|
+
| Per-invocation caps | Every invocation enforces a wall-clock `timeout` (default 60 s) and a per-invocation linear-memory `memory_limit` (default 1 MiB); exhaustion raises `Kobako::TimeoutError` / `Kobako::MemoryLimitError`. |
|
|
28
|
+
| Capability injection via Services | Guest scripts can only call Ruby objects you explicitly `bind` under a two-level `Namespace::Member` path. |
|
|
29
|
+
| Preloaded snippets | `Sandbox#preload` registers source or RITE bytecode for setup-once dispatch via `Sandbox#run(:Entrypoint, *args, **kwargs)`. |
|
|
30
|
+
| Capability Handles | Services may return stateful host objects; the guest receives an opaque `Kobako::Handle` proxy it can use as the target of follow-up RPC calls, with no way to dereference it. `Sandbox#run` also accepts non-wire-representable Ruby objects as args and auto-wraps them into Handles, so the guest can use any host object the script needs. |
|
|
31
|
+
| Three-class error taxonomy | Every failure is exactly one of `TrapError`, `SandboxError`, or `ServiceError`, so you can route errors without inspecting messages. |
|
|
32
|
+
| Per-invocation state reset | Handles issued during one invocation are invalidated before the next; Service bindings and preloaded snippets remain. |
|
|
33
|
+
| Separated stdout / stderr capture | Guest writes to `$stdout` / `$stderr` are buffered per-channel (1 MiB default cap, configurable); overflow is clipped and reported by `#stdout_truncated?` / `#stderr_truncated?`. |
|
|
34
|
+
| Per-invocation usage readout | `Sandbox#usage` returns the most recent invocation's `wall_time` (Float seconds spent inside the wasm guest) and `memory_peak` (high-water `memory.grow` delta in bytes), populated on every outcome including `TrapError`, for budget diagnostics. |
|
|
35
|
+
| Curated mruby stdlib | Core extensions plus `mruby-onig-regexp` for full Onigmo `Regexp` support; no mrbgem with I/O, network, or syscall access is bundled. |
|
|
32
36
|
|
|
33
37
|
## Requirements
|
|
34
38
|
|
|
@@ -53,7 +57,7 @@ require "kobako"
|
|
|
53
57
|
|
|
54
58
|
sandbox = Kobako::Sandbox.new
|
|
55
59
|
|
|
56
|
-
result = sandbox.
|
|
60
|
+
result = sandbox.eval(<<~RUBY)
|
|
57
61
|
1 + 2
|
|
58
62
|
RUBY
|
|
59
63
|
|
|
@@ -73,14 +77,14 @@ sandbox = Kobako::Sandbox.new
|
|
|
73
77
|
sandbox.define(:KV).bind(:Lookup, ->(key) { redis.get(key) })
|
|
74
78
|
sandbox.define(:Log).bind(:Sink, ->(msg) { logger.info(msg) })
|
|
75
79
|
|
|
76
|
-
sandbox.
|
|
80
|
+
sandbox.eval(<<~RUBY)
|
|
77
81
|
Log::Sink.call("starting")
|
|
78
82
|
KV::Lookup.call("user_42")
|
|
79
83
|
RUBY
|
|
80
84
|
# => "..." (the redis value)
|
|
81
85
|
```
|
|
82
86
|
|
|
83
|
-
Names must match the Ruby constant pattern `/\A[A-Z]\w*\z/`. Services declared before the first
|
|
87
|
+
Names must match the Ruby constant pattern `/\A[A-Z]\w*\z/`. Services declared before the first invocation remain active across subsequent invocations; `define` after the first invocation (`#eval` or `#run`) raises `ArgumentError`.
|
|
84
88
|
|
|
85
89
|
### Keyword arguments
|
|
86
90
|
|
|
@@ -89,18 +93,18 @@ Keyword keys travel as Symbols and reach the host method as keyword arguments:
|
|
|
89
93
|
```ruby
|
|
90
94
|
sandbox.define(:Geo).bind(:Lookup, ->(name:, region:) { "#{region}/#{name}" })
|
|
91
95
|
|
|
92
|
-
sandbox.
|
|
96
|
+
sandbox.eval('Geo::Lookup.call(name: "alice", region: "us")')
|
|
93
97
|
# => "us/alice"
|
|
94
98
|
```
|
|
95
99
|
|
|
96
|
-
## Per-
|
|
100
|
+
## Per-invocation caps
|
|
97
101
|
|
|
98
|
-
Each Sandbox enforces a wall-clock timeout and a guest linear-memory cap on every `#run
|
|
102
|
+
Each Sandbox enforces a wall-clock timeout and a guest linear-memory cap on every invocation (`#eval` or `#run`). Both default to safe values; pass `nil` to `timeout` or `memory_limit` to disable that cap. The output caps (`stdout_limit` / `stderr_limit`) cannot be disabled — pass a large Integer instead.
|
|
99
103
|
|
|
100
104
|
```ruby
|
|
101
105
|
sandbox = Kobako::Sandbox.new(
|
|
102
106
|
timeout: 5.0, # seconds, default 60.0
|
|
103
|
-
memory_limit: 10 * 1024 * 1024, # bytes, default
|
|
107
|
+
memory_limit: 10 * 1024 * 1024, # bytes, default 1 MiB
|
|
104
108
|
stdout_limit: 64 * 1024, # bytes, default 1 MiB
|
|
105
109
|
stderr_limit: 64 * 1024
|
|
106
110
|
)
|
|
@@ -109,20 +113,37 @@ sandbox = Kobako::Sandbox.new(
|
|
|
109
113
|
| Cap | Raises (subclass of `TrapError`) | Default |
|
|
110
114
|
|----------------|------------------------------------|----------|
|
|
111
115
|
| `timeout` | `Kobako::TimeoutError` | 60.0 s |
|
|
112
|
-
| `memory_limit` | `Kobako::MemoryLimitError` |
|
|
116
|
+
| `memory_limit` | `Kobako::MemoryLimitError` | 1 MiB |
|
|
113
117
|
| `stdout_limit` | output silently clipped at cap | 1 MiB |
|
|
114
118
|
| `stderr_limit` | output silently clipped at cap | 1 MiB |
|
|
115
119
|
|
|
116
|
-
The timeout deadline is absolute wall-clock from
|
|
120
|
+
The timeout deadline is absolute wall-clock from invocation entry and is checked at guest Wasm safepoints. Long-running host Service callbacks still consume wall-clock time but do not themselves trap — the next guest safepoint will trap immediately on return if the deadline has passed.
|
|
121
|
+
|
|
122
|
+
`memory_limit` is scoped to the **per-invocation linear-memory delta** — the budget covers how much the current `#eval` / `#run` may grow `memory.grow` past the size observed at invocation entry. The mruby image's initial allocation and prior invocations' high-water mark are folded into that entry baseline, so a Sandbox reused across many invocations does not silently accumulate against a global budget.
|
|
123
|
+
|
|
124
|
+
The 1 MiB default targets lightweight dynamic RPC workloads — short scripts that orchestrate Service calls, return small structured values, or replace a tool-calling layer in an AI Agent's Code Mode dispatch. Bump `memory_limit` when scripts compose multi-hundred-KiB strings, hold large composite return values, or run computations that allocate substantial intermediate state. Because the cap resets every invocation, multi-call patterns on one Sandbox do not need a budget that covers their cumulative footprint — only the largest single invocation's working set.
|
|
125
|
+
|
|
126
|
+
To see how much of the cap an invocation actually consumed, read `Sandbox#usage` after the call. It returns a `Kobako::Usage` value object with `wall_time` (Float seconds the guest export call spent inside wasmtime, aligned with the `timeout` accounting) and `memory_peak` (Integer high-water `memory.grow` delta in bytes, aligned with the `memory_limit` accounting). The fields are populated on every outcome, including the `TrapError` branches, so you can read them after rescuing a trap to diagnose which budget the failing invocation chewed through.
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
sandbox = Kobako::Sandbox.new(timeout: 1.0, memory_limit: 4 * 1024 * 1024)
|
|
130
|
+
|
|
131
|
+
begin
|
|
132
|
+
sandbox.eval("'x' * 5_000_000")
|
|
133
|
+
rescue Kobako::MemoryLimitError
|
|
134
|
+
sandbox.usage.memory_peak # => the largest delta accepted before the trap
|
|
135
|
+
sandbox.usage.wall_time # => seconds spent before the cap fired
|
|
136
|
+
end
|
|
137
|
+
```
|
|
117
138
|
|
|
118
139
|
## Capturing stdout and stderr
|
|
119
140
|
|
|
120
|
-
Guest output is captured into per-
|
|
141
|
+
Guest output is captured into per-invocation buffers and exposed independently from the return value. The buffers cover the full Ruby IO surface — `puts`, `print`, `printf`, `p`, `<<`, and writes through `$stdout` / `$stderr` — all routed through the host-captured WASI pipe.
|
|
121
142
|
|
|
122
143
|
```ruby
|
|
123
144
|
sandbox = Kobako::Sandbox.new
|
|
124
145
|
|
|
125
|
-
result = sandbox.
|
|
146
|
+
result = sandbox.eval(<<~RUBY)
|
|
126
147
|
puts "hello"
|
|
127
148
|
warn "be careful"
|
|
128
149
|
42
|
|
@@ -133,24 +154,24 @@ sandbox.stdout # => "hello\n"
|
|
|
133
154
|
sandbox.stderr # => "be careful\n"
|
|
134
155
|
```
|
|
135
156
|
|
|
136
|
-
Each
|
|
157
|
+
Each invocation clears the buffers at start. Output past the per-channel cap is clipped at the cap boundary — the invocation still returns normally, the bytes carry no truncation sentinel, and `#stdout_truncated?` / `#stderr_truncated?` flip to `true`.
|
|
137
158
|
|
|
138
159
|
```ruby
|
|
139
160
|
sandbox = Kobako::Sandbox.new(stdout_limit: 64 * 1024)
|
|
140
|
-
sandbox.
|
|
161
|
+
sandbox.eval('puts "a" * 100_000')
|
|
141
162
|
sandbox.stdout.bytesize # => 65_536
|
|
142
163
|
sandbox.stdout_truncated? # => true
|
|
143
164
|
```
|
|
144
165
|
|
|
145
166
|
## Error handling
|
|
146
167
|
|
|
147
|
-
Every `#run` either returns a value or raises exactly one of three classes:
|
|
168
|
+
Every invocation (`#eval` or `#run`) either returns a value or raises exactly one of three classes:
|
|
148
169
|
|
|
149
170
|
```ruby
|
|
150
171
|
begin
|
|
151
|
-
sandbox.
|
|
172
|
+
sandbox.eval(script)
|
|
152
173
|
rescue Kobako::TrapError => e
|
|
153
|
-
# Wasm engine fault OR per-
|
|
174
|
+
# Wasm engine fault OR per-invocation cap exhaustion:
|
|
154
175
|
# - Kobako::TimeoutError (wall-clock timeout)
|
|
155
176
|
# - Kobako::MemoryLimitError (memory_limit exceeded)
|
|
156
177
|
# - Kobako::TrapError (engine crash / wire-violation fallback)
|
|
@@ -166,13 +187,17 @@ end
|
|
|
166
187
|
|
|
167
188
|
`SandboxError` and `ServiceError` carry structured fields (`origin`, `klass`, `backtrace_lines`, `details`) when the guest produced a panic envelope. Named subclasses:
|
|
168
189
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
190
|
+
| Class | Parent | Trigger |
|
|
191
|
+
|----------------------------------------|--------------------|------------------------------------------------------------------------------------------|
|
|
192
|
+
| `Kobako::TimeoutError` | `TrapError` | Per-invocation `timeout` exhausted |
|
|
193
|
+
| `Kobako::MemoryLimitError` | `TrapError` | Per-invocation `memory_limit` exhausted |
|
|
194
|
+
| `Kobako::ServiceError::Disconnected` | `ServiceError` | RPC target Handle has been invalidated |
|
|
195
|
+
| `Kobako::HandleTableExhausted` | `SandboxError` | Per-invocation Handle counter reached its 2³¹ − 1 cap |
|
|
196
|
+
| `Kobako::BytecodeError` | `SandboxError` | `#preload(binary:)` payload failed RITE structural validation at first invocation replay |
|
|
172
197
|
|
|
173
198
|
## Capability Handles
|
|
174
199
|
|
|
175
|
-
When a Service returns a stateful host object (anything beyond `nil` / Boolean / Integer / Float / String / Symbol / Array / Hash), the wire layer transparently allocates an opaque Handle. The guest receives a `Kobako::
|
|
200
|
+
When a Service returns a stateful host object (anything beyond `nil` / Boolean / Integer / Float / String / Symbol / Array / Hash), the wire layer transparently allocates an opaque Handle. The guest receives a `Kobako::Handle` proxy it can use as the target of further RPC calls — but cannot dereference, forge from an integer, or smuggle across runs.
|
|
176
201
|
|
|
177
202
|
```ruby
|
|
178
203
|
class Greeter
|
|
@@ -182,57 +207,178 @@ end
|
|
|
182
207
|
|
|
183
208
|
sandbox.define(:Factory).bind(:Make, ->(name) { Greeter.new(name) })
|
|
184
209
|
|
|
185
|
-
sandbox.
|
|
186
|
-
g = Factory::Make.call("Bob") # g is a Kobako::
|
|
210
|
+
sandbox.eval(<<~RUBY)
|
|
211
|
+
g = Factory::Make.call("Bob") # g is a Kobako::Handle proxy
|
|
187
212
|
g.greet # second RPC, routed to the Greeter
|
|
188
213
|
RUBY
|
|
189
214
|
# => "hi, Bob"
|
|
190
215
|
```
|
|
191
216
|
|
|
192
|
-
|
|
217
|
+
`Sandbox#run` accepts non-wire-representable host objects as args / kwargs values too: the host walks the argument tree, wraps every non-wire leaf through the same Handle path, and the guest sees a `Kobako::Handle` proxy in its place. This lets you pass framework objects (a Rack `env` Hash containing an `IO`-like body, an active record, an enumerator) into the entrypoint without first marshalling them into primitives.
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
require "stringio"
|
|
221
|
+
|
|
222
|
+
sandbox = Kobako::Sandbox.new
|
|
223
|
+
sandbox.preload(code: "Echo = ->(body) { body.read.upcase }", name: :Echo)
|
|
224
|
+
|
|
225
|
+
sandbox.run(:Echo, StringIO.new("hello world"))
|
|
226
|
+
# => "HELLO WORLD"
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Handles are scoped to a single invocation — a Handle obtained in invocation N is invalid in invocation N+1, even on the same Sandbox.
|
|
193
230
|
|
|
194
231
|
## Setup-once, run-many
|
|
195
232
|
|
|
196
|
-
A single Sandbox can serve many
|
|
233
|
+
A single Sandbox can serve many invocations. Service bindings and preloaded snippets persist; capability state (Handles, stdout, stderr) resets between invocations.
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
───────────── setup phase (mutable) ─────────────
|
|
237
|
+
|
|
238
|
+
sandbox = Kobako::Sandbox.new
|
|
239
|
+
sandbox.define(:KV).bind(:Lookup, ...)
|
|
240
|
+
sandbox.preload(code: ..., name: :Adder)
|
|
241
|
+
sandbox.preload(code: ..., name: :Greeter)
|
|
242
|
+
|
|
243
|
+
│
|
|
244
|
+
▼
|
|
245
|
+
|
|
246
|
+
═════════════════ seal point ═════════════════
|
|
247
|
+
First #eval or #run freezes the Service registry
|
|
248
|
+
and snippet table. Further define / preload now
|
|
249
|
+
raise ArgumentError.
|
|
250
|
+
|
|
251
|
+
│
|
|
252
|
+
▼
|
|
253
|
+
|
|
254
|
+
──────────────── invocation N ───────────────────
|
|
255
|
+
|
|
256
|
+
1. allocate fresh mrb_state
|
|
257
|
+
|
|
258
|
+
2. replay snippets (in insertion order):
|
|
259
|
+
:Adder → defines Adder
|
|
260
|
+
:Greeter → defines Greeter
|
|
261
|
+
|
|
262
|
+
3. dispatch: eval(source) or run(:Target, *args)
|
|
263
|
+
|
|
264
|
+
4. return value to host
|
|
265
|
+
|
|
266
|
+
5. discard mrb_state; reset per-invocation state:
|
|
267
|
+
· Handles invalidated
|
|
268
|
+
· stdout / stderr buffers cleared
|
|
269
|
+
· memory delta zeroed
|
|
270
|
+
|
|
271
|
+
Services + snippets persist; invocation N+1 repeats.
|
|
272
|
+
```
|
|
197
273
|
|
|
198
274
|
```ruby
|
|
199
275
|
sandbox = Kobako::Sandbox.new
|
|
200
276
|
sandbox.define(:Data).bind(:Fetch, ->(id) { records[id] })
|
|
201
277
|
|
|
202
|
-
sandbox.
|
|
203
|
-
sandbox.
|
|
278
|
+
sandbox.eval('Data::Fetch.call("a")') # => "..."
|
|
279
|
+
sandbox.eval('Data::Fetch.call("b")') # => "..." (same bindings, fresh state)
|
|
204
280
|
```
|
|
205
281
|
|
|
206
282
|
For workloads that must be isolated from each other (e.g., one Sandbox per tenant, per student submission), construct a fresh `Kobako::Sandbox` per scope. wasmtime's Engine and the compiled Module are cached at process scope, so additional Sandboxes amortize cold-start cost automatically.
|
|
207
283
|
|
|
284
|
+
## Preloaded snippets and entrypoint dispatch
|
|
285
|
+
|
|
286
|
+
`Sandbox#preload` registers named mruby snippets that replay against the fresh `mrb_state` before every invocation; `Sandbox#run(:Target, *args, **kwargs)` dispatches into a top-level `Object` constant defined by those snippets and returns the value of `Target.call(*args, **kwargs)`. Together they cover setup-once / dispatch-many workloads where the same logic is exercised across many requests.
|
|
287
|
+
|
|
288
|
+
```ruby
|
|
289
|
+
sandbox = Kobako::Sandbox.new
|
|
290
|
+
sandbox.preload(code: "Adder = ->(a, b) { a + b }", name: :Adder)
|
|
291
|
+
sandbox.preload(code: 'Greeter = ->(name:) { "hello, #{name}" }', name: :Greeter)
|
|
292
|
+
|
|
293
|
+
sandbox.run(:Adder, 2, 3) # => 5
|
|
294
|
+
sandbox.run(:Greeter, name: "world") # => "hello, world"
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
`#preload` accepts two payload forms:
|
|
298
|
+
|
|
299
|
+
| Form | Signature | Snippet name source | Validation timing |
|
|
300
|
+
|----------|----------------------------------------|-------------------------------------|------------------------------------------------------------------------------------------|
|
|
301
|
+
| Source | `preload(code: "...", name: :Const)` | The `name:` keyword | Trial-compiled at preload time; compile errors raise immediately |
|
|
302
|
+
| Bytecode | `preload(binary: bytes)` | Read from the bytecode's `debug_info` | Structural validation runs at first invocation; failure raises `Kobako::BytecodeError` |
|
|
303
|
+
|
|
304
|
+
The source form trial-compiles each snippet against a fresh `mrb_state` at preload time, so compile errors surface immediately at the `#preload` call. The bytecode form treats `binary:` as opaque bytes and defers RITE version / body validation to the first invocation's replay, because that is when the payload loads into a fresh `mrb_state`. Bytecode compiled without `debug_info` (`mrbc` without `-g`) is still accepted — only its backtrace frames are omitted, while exception class, message, and `origin` attribution are preserved.
|
|
305
|
+
|
|
306
|
+
Snippets replay in insertion order, so later snippets can reference constants defined by earlier ones. The snippet table is sealed by the first invocation alongside Service registration; additional `#preload` calls after the first `#eval` or `#run` raise `ArgumentError`.
|
|
307
|
+
|
|
308
|
+
```
|
|
309
|
+
per-invocation replay (every #eval / #run, snippets in insertion order):
|
|
310
|
+
|
|
311
|
+
fresh mrb_state
|
|
312
|
+
│
|
|
313
|
+
├──▶ replay :Adder (defines Adder)
|
|
314
|
+
│
|
|
315
|
+
├──▶ replay :Greeter (defines Greeter)
|
|
316
|
+
│
|
|
317
|
+
└──▶ eval(source) -or- run(:Target, *args, **kwargs)
|
|
318
|
+
│
|
|
319
|
+
▼
|
|
320
|
+
return value, then mrb_state discarded
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
`#run` resolves `target` (Symbol or String, normalized to Symbol) only as a top-level `Object` constant — `::`-segmented names and lowercase forms fail at host pre-flight with `ArgumentError`. A `Kobako::SandboxError` surfaces when the constant is missing or does not respond to `#call`.
|
|
324
|
+
|
|
325
|
+
### Choosing between source and bytecode
|
|
326
|
+
|
|
327
|
+
Use the **source form** when snippets are authored in your repo or generated at boot — compile errors land at the `#preload` call so a misbehaving snippet fails fast at setup time, and no separate `mrbc` toolchain is needed. The trial-compile happens once per snippet (~2.5 µs per snippet) and is paid at preload, not on the request hot path.
|
|
328
|
+
|
|
329
|
+
Use the **bytecode form** when snippets ship as build artifacts from a pipeline that runs `mrbc` separately — for example, when source bodies should not be embedded in the running process, when you want a build step that compiles and packages snippets ahead of release, or when you want `Exception#backtrace` frames attributed to the bytecode's `debug_info` filename rather than a host-supplied `name:` keyword. Structural validation (RITE version, body integrity) is deferred to the first invocation, so a malformed bytecode payload surfaces as `Kobako::BytecodeError` on the first `#eval` or `#run`, not at `#preload`.
|
|
330
|
+
|
|
331
|
+
Both forms behave identically at dispatch time and replay through the same per-invocation path, so the choice between them is about your build / distribution pipeline and where you want errors to land, not about runtime cost.
|
|
332
|
+
|
|
208
333
|
## Performance
|
|
209
334
|
|
|
210
|
-
|
|
335
|
+
Order-of-magnitude figures for capacity planning on macOS arm64, Ruby 3.4.7, YJIT off. Absolute values vary by hardware but the ratios are stable across machines. Detailed numbers and methodology live in [`benchmark/README.md`](benchmark/README.md).
|
|
211
336
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
|
215
|
-
|
|
216
|
-
|
|
|
217
|
-
|
|
|
218
|
-
|
|
|
219
|
-
|
|
|
220
|
-
|
|
|
221
|
-
|
|
|
222
|
-
|
|
|
223
|
-
| 1 000
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
337
|
+
### Lifecycle costs
|
|
338
|
+
|
|
339
|
+
| Phase | Cost |
|
|
340
|
+
|-------------------------------------------------------------|-------------------------------------------------|
|
|
341
|
+
| First `Sandbox.new` in a fresh process (Engine + Module JIT) | ~600 ms one-time |
|
|
342
|
+
| Subsequent `Sandbox.new` (Engine cache warm) | ~130 µs |
|
|
343
|
+
| Reusing a Sandbox for one `#eval("nil")` | ~135 µs |
|
|
344
|
+
| Fresh `Sandbox.new` per request | ~275 µs (≈ +140 µs vs reuse) |
|
|
345
|
+
| Warm `#run(:Entrypoint, ...)` dispatch | ~165 µs |
|
|
346
|
+
| Per-RPC cost amortized inside one invocation | ~6.6 µs (1 000 RPCs in one `#eval` ≈ 6.6 ms) |
|
|
347
|
+
| 100 000-iteration integer XOR loop in mruby | ~43 ms |
|
|
348
|
+
| 1 000 Onigmo `Regexp =~` matches | ~3 µs each |
|
|
349
|
+
|
|
350
|
+
The ~600 ms cold start dominates the first Sandbox in a process — wasmtime JIT-compiles the precompiled `kobako.wasm` Module and the result is cached at process scope. Construct one Sandbox at boot before serving requests so the JIT cost lands off the hot path.
|
|
351
|
+
|
|
352
|
+
### Memory budget
|
|
353
|
+
|
|
354
|
+
| Allocation | Cost |
|
|
355
|
+
|---------------------------------------------|----------------------------------------------------------------------------|
|
|
356
|
+
| Process RSS after first `Sandbox.new` | ~165-195 MB (one-time engine + module + first instance) |
|
|
357
|
+
| Per additional Sandbox | ~580 KB (Wasm instance + linear memory + WASI capture pipes) |
|
|
358
|
+
| 1 000 isolated tenants in one process | ~765 MB total |
|
|
359
|
+
|
|
360
|
+
Use these as upper-bound budgets for capacity planning, not lower bounds — actual RSS shifts ~30% with host process load and macOS allocator state.
|
|
361
|
+
|
|
362
|
+
### Choosing your pattern
|
|
363
|
+
|
|
364
|
+
When the script is ad-hoc (LLM-generated, untrusted user input) and only runs once, use `Sandbox#eval(source)`. Per-invocation cost is ~135 µs of setup plus the script's own runtime; mruby parses the source on every call.
|
|
365
|
+
|
|
366
|
+
When you have a fixed set of entrypoints exercised many times — a stable AI Agent tool-call protocol, a plug-in registry loaded at boot, a small library of host-side commands — preload the entrypoints via `Sandbox#preload(code:, name:)` once at setup and dispatch via `Sandbox#run(:Target, *args, **kwargs)`. The mruby source compile (~2.5 µs per snippet) lands once at preload, not on every request, and warm dispatch costs ~165 µs.
|
|
367
|
+
|
|
368
|
+
Mind the snippet replay cost. Every preloaded snippet replays into a fresh `mrb_state` before **every** invocation, whether the invocation is `#eval` or `#run`, at ~7-9 µs per snippet per invocation. Preloading 8 helpers adds ~60 µs to every subsequent invocation; preloading 64 helpers adds ~565 µs. Keep the snippet count proportionate to how often the helpers are actually used — preloading rarely-touched helpers is more expensive than inlining or re-eval'ing them.
|
|
369
|
+
|
|
370
|
+
For tenant isolation between mutually untrusted scopes, construct a fresh `Kobako::Sandbox` per scope. Per-request construction costs ~140 µs over reuse plus ~580 KB of RSS — comfortably affordable for 1 000+ isolated tenants in one Sidekiq / Puma worker. Reuse a Sandbox when all requests share one trust scope; isolate when scripts come from many.
|
|
371
|
+
|
|
372
|
+
### Concurrency
|
|
373
|
+
|
|
374
|
+
`ext/` does not release the GVL during wasmtime execution, so wasm work is GVL-serialized: aggregate throughput across N Threads stays around 7-8k `#eval`/s regardless of N. Ruby-side `#eval` setup can still overlap, so a short `#eval` running while another Thread is in a long `#eval` is slowed by ~2× (not 10×) — host-side synchronization yields the GVL and the contending Thread interleaves. Mixed short / long workloads in one process do not deadlock.
|
|
375
|
+
|
|
376
|
+
### Regression gate
|
|
377
|
+
|
|
378
|
+
A +10% regression on any of the five SPEC-mandated benchmarks (cold_start, RPC roundtrip, codec, mruby VM, HandleTable) blocks release. Full per-suite breakdown in [`benchmark/README.md`](benchmark/README.md).
|
|
233
379
|
|
|
234
380
|
```bash
|
|
235
|
-
bundle exec rake bench # five gated regression benchmarks (≤ 1 MiB payloads
|
|
381
|
+
bundle exec rake bench # five gated regression benchmarks (~5-8 min, ≤ 1 MiB payloads)
|
|
236
382
|
```
|
|
237
383
|
|
|
238
384
|
## Development
|
data/data/kobako.wasm
CHANGED
|
Binary file
|
data/ext/kobako/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kobako"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
authors = ["Aotokitsuruya <contact@aotoki.me>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -29,7 +29,7 @@ wasmtime = { version = "44.0.1", default-features = false, features = [
|
|
|
29
29
|
"wat",
|
|
30
30
|
] }
|
|
31
31
|
# wasmtime-wasi provides WASI preview1 support for routing guest stdout/stderr
|
|
32
|
-
# into in-memory buffers (
|
|
32
|
+
# into in-memory buffers (docs/behavior.md §B-04). The `p1` feature enables the
|
|
33
33
|
# WasiCtxBuilder + preview1 adapter which wires fd 1/2 to pipes. We omit
|
|
34
34
|
# `p2` (component-model) and `p0`/`p3` (async) because kobako runs
|
|
35
35
|
# synchronous sandboxes only.
|
|
@@ -34,7 +34,7 @@ static SHARED_ENGINE: OnceLock<WtEngine> = OnceLock::new();
|
|
|
34
34
|
static MODULE_CACHE: OnceLock<Mutex<HashMap<PathBuf, WtModule>>> = OnceLock::new();
|
|
35
35
|
|
|
36
36
|
/// Ticker cadence for the process-singleton epoch ticker. Bounds the
|
|
37
|
-
/// granularity of the
|
|
37
|
+
/// granularity of the docs/behavior.md B-01 wall-clock timeout: the
|
|
38
38
|
/// `epoch_deadline_callback` fires once per tick (`Continue(1)`), so the
|
|
39
39
|
/// trap can lag the deadline by at most one tick under nominal
|
|
40
40
|
/// scheduling. 10 ms keeps the lag small enough that it does not skew
|
|
@@ -53,8 +53,8 @@ const EPOCH_TICK: Duration = Duration::from_millis(10);
|
|
|
53
53
|
/// instructions.
|
|
54
54
|
///
|
|
55
55
|
/// Also enables `epoch_interruption(true)` so every Store can install an
|
|
56
|
-
/// `epoch_deadline_callback` for the per-run wall-clock cap
|
|
57
|
-
/// B-01, E-19). The first call spawns the process-singleton ticker
|
|
56
|
+
/// `epoch_deadline_callback` for the per-run wall-clock cap
|
|
57
|
+
/// (docs/behavior.md B-01, E-19). The first call spawns the process-singleton ticker
|
|
58
58
|
/// thread that drives `engine.increment_epoch()` at [`EPOCH_TICK`]
|
|
59
59
|
/// cadence; subsequent calls reuse the same engine and ticker.
|
|
60
60
|
pub(crate) fn shared_engine() -> Result<&'static WtEngine, MagnusError> {
|
|
@@ -112,16 +112,24 @@ pub(crate) fn cached_module(path: &Path) -> Result<WtModule, MagnusError> {
|
|
|
112
112
|
return Err(MagnusError::new(
|
|
113
113
|
ruby.get_inner(&MODULE_NOT_BUILT_ERROR),
|
|
114
114
|
format!(
|
|
115
|
-
"
|
|
115
|
+
"Sandbox runtime not found at {}; run `bundle exec rake wasm:build` to build it",
|
|
116
116
|
path.display()
|
|
117
117
|
),
|
|
118
118
|
));
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
-
let bytes =
|
|
122
|
-
|
|
121
|
+
let bytes = fs::read(path).map_err(|e| {
|
|
122
|
+
wasm_err(
|
|
123
|
+
&ruby,
|
|
124
|
+
format!(
|
|
125
|
+
"failed to read Sandbox runtime at {}: {}",
|
|
126
|
+
path.display(),
|
|
127
|
+
e
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
})?;
|
|
123
131
|
let module = WtModule::new(shared_engine()?, &bytes)
|
|
124
|
-
.map_err(|e| wasm_err(&ruby, format!("compile
|
|
132
|
+
.map_err(|e| wasm_err(&ruby, format!("failed to compile Sandbox runtime: {}", e)))?;
|
|
125
133
|
cache
|
|
126
134
|
.lock()
|
|
127
135
|
.expect("module cache mutex poisoned")
|