kobako 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -0
- data/CHANGELOG.md +44 -0
- data/Cargo.lock +1 -1
- data/README.md +89 -205
- data/data/kobako.wasm +0 -0
- data/ext/kobako/Cargo.toml +1 -1
- data/ext/kobako/src/lib.rs +4 -2
- data/ext/kobako/src/{wasm → runtime}/cache.rs +12 -16
- data/ext/kobako/src/runtime/capture.rs +91 -0
- data/ext/kobako/src/runtime/config.rs +26 -0
- data/ext/kobako/src/runtime/dispatch.rs +211 -0
- data/ext/kobako/src/runtime/exports.rs +51 -0
- data/ext/kobako/src/runtime/guest_mem.rs +228 -0
- data/ext/kobako/src/{wasm/host_state.rs → runtime/invocation.rs} +94 -86
- data/ext/kobako/src/runtime/trap.rs +134 -0
- data/ext/kobako/src/runtime.rs +782 -0
- data/ext/kobako/src/snapshot.rs +110 -0
- data/lib/kobako/capture.rb +11 -16
- data/lib/kobako/catalog/handles.rb +107 -0
- data/lib/kobako/catalog/namespaces.rb +100 -0
- data/lib/kobako/{snippet/table.rb → catalog/snippets.rb} +37 -62
- data/lib/kobako/catalog.rb +18 -0
- data/lib/kobako/codec/decoder.rb +13 -5
- data/lib/kobako/codec/factory.rb +12 -12
- data/lib/kobako/codec/utils.rb +83 -59
- data/lib/kobako/codec.rb +6 -3
- data/lib/kobako/errors.rb +45 -28
- data/lib/kobako/fault.rb +40 -0
- data/lib/kobako/handle.rb +4 -6
- data/lib/kobako/namespace.rb +67 -0
- data/lib/kobako/outcome.rb +31 -35
- data/lib/kobako/runtime.rb +30 -0
- data/lib/kobako/sandbox.rb +88 -72
- data/lib/kobako/sandbox_options.rb +6 -9
- data/lib/kobako/snapshot.rb +40 -0
- data/lib/kobako/snippet/binary.rb +6 -7
- data/lib/kobako/snippet/source.rb +8 -8
- data/lib/kobako/snippet.rb +7 -9
- data/lib/kobako/transport/dispatcher.rb +195 -0
- data/lib/kobako/{rpc/wire_error.rb → transport/error.rb} +7 -6
- data/lib/kobako/transport/request.rb +79 -0
- data/lib/kobako/transport/response.rb +69 -0
- data/lib/kobako/transport/run.rb +141 -0
- data/lib/kobako/transport/yield.rb +91 -0
- data/lib/kobako/transport/yielder.rb +108 -0
- data/lib/kobako/transport.rb +24 -0
- data/lib/kobako/version.rb +1 -1
- data/lib/kobako.rb +4 -4
- data/release-please-config.json +24 -0
- data/sig/kobako/capture.rbs +0 -2
- data/sig/kobako/catalog/handles.rbs +19 -0
- data/sig/kobako/catalog/namespaces.rbs +17 -0
- data/sig/kobako/{snippet/table.rbs → catalog/snippets.rbs} +2 -11
- data/sig/kobako/{rpc.rbs → catalog.rbs} +1 -1
- data/sig/kobako/codec/decoder.rbs +2 -1
- data/sig/kobako/codec/factory.rbs +2 -2
- data/sig/kobako/codec/utils.rbs +7 -5
- data/sig/kobako/errors.rbs +7 -7
- data/sig/kobako/fault.rbs +19 -0
- data/sig/kobako/handle.rbs +2 -3
- data/sig/kobako/namespace.rbs +19 -0
- data/sig/kobako/outcome.rbs +2 -2
- data/sig/kobako/runtime.rbs +23 -0
- data/sig/kobako/sandbox.rbs +5 -8
- data/sig/kobako/snapshot.rbs +15 -0
- data/sig/kobako/transport/dispatcher.rbs +34 -0
- data/sig/kobako/transport/error.rbs +6 -0
- data/sig/kobako/transport/request.rbs +32 -0
- data/sig/kobako/transport/response.rbs +30 -0
- data/sig/kobako/transport/run.rbs +27 -0
- data/sig/kobako/transport/yield.rbs +34 -0
- data/sig/kobako/transport/yielder.rbs +24 -0
- data/sig/kobako/transport.rbs +4 -0
- metadata +48 -30
- data/ext/kobako/src/wasm/dispatch.rs +0 -162
- data/ext/kobako/src/wasm/instance.rs +0 -873
- data/ext/kobako/src/wasm.rs +0 -126
- data/lib/kobako/handle_table.rb +0 -119
- data/lib/kobako/invocation.rb +0 -143
- data/lib/kobako/rpc/dispatcher.rb +0 -171
- data/lib/kobako/rpc/envelope.rb +0 -118
- data/lib/kobako/rpc/fault.rb +0 -41
- data/lib/kobako/rpc/namespace.rb +0 -74
- data/lib/kobako/rpc/server.rb +0 -146
- data/lib/kobako/rpc.rb +0 -11
- data/lib/kobako/wasm.rb +0 -25
- data/sig/kobako/handle_table.rbs +0 -23
- data/sig/kobako/invocation.rbs +0 -25
- data/sig/kobako/rpc/dispatcher.rbs +0 -33
- data/sig/kobako/rpc/envelope.rbs +0 -51
- data/sig/kobako/rpc/fault.rbs +0 -20
- data/sig/kobako/rpc/namespace.rbs +0 -24
- data/sig/kobako/rpc/server.rbs +0 -31
- data/sig/kobako/rpc/wire_error.rbs +0 -6
- data/sig/kobako/wasm.rbs +0 -41
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e6bac8a7b2fd8ff003b41057940921c0cb8357c8e97e66d235eb22b3d361ebd4
|
|
4
|
+
data.tar.gz: 54a8945f945db0062a35700078fa195461c74d760ca7d34c0b72409b544f5eb9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 99754b3a49329e4faa3e7d96f793bf749d31c6ef0c1739c34db7ce8cef7df250af847efc2e5fb0a4c5f280e6d4c0b0eba0694b3a815e232aa5ac5a242ea805ab
|
|
7
|
+
data.tar.gz: c7a70f38b8e8a1342a7623cb51cf99bf31dc5aaa2929f4eaa921054c90a8a17815c271617f6c1c4f72b305ab7ceeaa88077dd152583f8566ffe51e8a09de9fee
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{".":"0.6.0"}
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.6.0](https://github.com/elct9620/kobako/compare/v0.5.0...v0.6.0) (2026-05-28)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **bench:** gate against a committed anchor baseline ([ed8b30e](https://github.com/elct9620/kobako/commit/ed8b30e0940736cbabcca18227590d07c3bf94d3))
|
|
9
|
+
* **handle:** restore guest-returned Capability Handles to host objects (B-37) ([092815d](https://github.com/elct9620/kobako/commit/092815d610d3595db82b406d4b67880c84f11900))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
### Bug Fixes
|
|
13
|
+
|
|
14
|
+
* **bench:** harden the gate guards and split judgment from the runner ([d6eaae2](https://github.com/elct9620/kobako/commit/d6eaae2de44d14a4735fbb544da712c659144a86))
|
|
15
|
+
* **ci:** chain release.yml from release-please via workflow_call ([711665d](https://github.com/elct9620/kobako/commit/711665d29a8c8445b1e26ca08e4b0efc5b24982c))
|
|
16
|
+
* **handle:** don't restore a Handle broken out of a guest block (B-37) ([ea25ab9](https://github.com/elct9620/kobako/commit/ea25ab9793f376f15e8d668077ad58f8d67e5a63))
|
|
17
|
+
|
|
18
|
+
## [0.5.0](https://github.com/elct9620/kobako/compare/v0.4.0...v0.5.0) (2026-05-27)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
### Features
|
|
22
|
+
|
|
23
|
+
* **abi:** add `__kobako_yield_to_block` skeleton + host re-entry channel ([555eb4b](https://github.com/elct9620/kobako/commit/555eb4bf578c3c4397ba2c0d105c0d3ca687e23c))
|
|
24
|
+
* **abi:** classify RBreak via ci_break_index for B-25 / E-21 ([32668a0](https://github.com/elct9620/kobako/commit/32668a033e2f959700acadadfbc41388ed72a2dd))
|
|
25
|
+
* **abi:** wire `__kobako_yield_to_block` to real `mrb_yield_argv` ([35aeac8](https://github.com/elct9620/kobako/commit/35aeac8700254d1500f5be837a72c56984a7ebfa))
|
|
26
|
+
* **bench:** add noise-aware release gate, report mean alongside median ([0cfaebc](https://github.com/elct9620/kobako/commit/0cfaebc2afadfae81e3d00441273da70e396d7a5))
|
|
27
|
+
* **bench:** add yield round-trip suite as gated benchmark [#6](https://github.com/elct9620/kobako/issues/6) ([315f923](https://github.com/elct9620/kobako/commit/315f923caa89bcd8752a611525da68ae53ae092f))
|
|
28
|
+
* **catalog:** introduce empty Kobako::Catalog namespace ([8af8c54](https://github.com/elct9620/kobako/commit/8af8c54c72e5e5193555bcc2e86072d4a4d8176d))
|
|
29
|
+
* **ext:** enforce the 16 MiB single-dispatch payload cap on host boundaries ([c80e281](https://github.com/elct9620/kobako/commit/c80e281e0810640c60d93174beddd49a31c34182))
|
|
30
|
+
* **guest:** capture guest blocks via `n*&` argspec + LIFO BLOCK_STACK ([aa55556](https://github.com/elct9620/kobako/commit/aa55556aab23c159078d0ba0ea47ed878b26e89d))
|
|
31
|
+
* **rpc:** build block proxy for guest-supplied yield blocks ([b6d6cf7](https://github.com/elct9620/kobako/commit/b6d6cf7f5ca857f55aafea62631b243f688c61a6))
|
|
32
|
+
* **rpc:** catch/throw + frame invalidator close B-25 / B-28 / E-23 ([3b21f25](https://github.com/elct9620/kobako/commit/3b21f252fafdd2070f3953460509e24a0e643d88))
|
|
33
|
+
* **transport:** introduce empty Kobako::Transport namespace ([85cda26](https://github.com/elct9620/kobako/commit/85cda268000490f521424339bec1664d0b33478b))
|
|
34
|
+
* **wire:** add `block_given` field to Request envelope ([30e004f](https://github.com/elct9620/kobako/commit/30e004fa8f00739e68883889c5225c98cf9521fe))
|
|
35
|
+
* **wire:** add YieldResponse envelope codec on both sides ([4592567](https://github.com/elct9620/kobako/commit/459256784af616d70738ffd0f56c3b15244b3e7c))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
### Bug Fixes
|
|
39
|
+
|
|
40
|
+
* **bench:** restore renamed class references so rake bench runs ([76140cc](https://github.com/elct9620/kobako/commit/76140cc99922973fc305aab6ba727a832ddbe7ba))
|
|
41
|
+
* **ext:** GC-root the dispatch Proc via a pinning mark on Kobako::Runtime ([f31bd07](https://github.com/elct9620/kobako/commit/f31bd071201b5fed7376bd13b876f103d6c6a5d6))
|
|
42
|
+
* **ext:** raise SandboxError, not TrapError, when #run envelope alloc fails ([a1981fe](https://github.com/elct9620/kobako/commit/a1981fea7438090a76758147e7e84543e9d96968))
|
|
43
|
+
* **transport:** fill E-xx placeholder and drop BLOCK_RESEARCH citations ([816ff80](https://github.com/elct9620/kobako/commit/816ff804535196036bec01fcd980e25036211b80))
|
|
44
|
+
* **wasm:** reject unrepresentable guest return values instead of stringifying ([c3fd069](https://github.com/elct9620/kobako/commit/c3fd0698cb168b55502fb86065406caf9a7744e1))
|
data/Cargo.lock
CHANGED
data/README.md
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
# Kobako
|
|
2
2
|
|
|
3
|
+
[](https://deepwiki.com/elct9620/kobako)
|
|
4
|
+
|
|
3
5
|
Kobako is a Ruby gem that embeds a Wasm-isolated mruby interpreter inside your application, so you can execute untrusted Ruby scripts (LLM-generated code, user formulas, student submissions, third-party plugins) in-process without giving them access to host memory, files, network, or credentials.
|
|
4
6
|
|
|
5
|
-
The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby and
|
|
7
|
+
The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby and a Transport proxy. The only way a guest script can reach the outside world is through Host App-declared **Services** — named Ruby objects you explicitly inject into the sandbox; the guest sees each one as a proxy that forwards calls back to the host over the Transport wire.
|
|
6
8
|
|
|
7
9
|
```
|
|
8
10
|
Host process Wasm guest
|
|
9
11
|
┌──────────────────────┐ ┌──────────────────────┐
|
|
10
12
|
│ Kobako::Sandbox │ ─eval─▶ │ mruby interpreter │
|
|
11
13
|
│ │ ─run──▶ │ │
|
|
12
|
-
│ Services │
|
|
14
|
+
│ Services │ ◀─call─ │ KV::Lookup.call(k) │
|
|
13
15
|
│ KV::Lookup │ ─resp─▶ │ │
|
|
14
16
|
│ │ │ │
|
|
15
17
|
│ stdout / stderr buf │ ◀─pipe─ │ puts / warn │
|
|
@@ -19,21 +21,6 @@ The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby an
|
|
|
19
21
|
trusted untrusted
|
|
20
22
|
```
|
|
21
23
|
|
|
22
|
-
## Features
|
|
23
|
-
|
|
24
|
-
| Feature | Description |
|
|
25
|
-
|---|---|
|
|
26
|
-
| In-process Wasm sandbox | No subprocess, no container. Both invocation verbs (`Sandbox#eval` for ad-hoc source, `Sandbox#run` for entrypoint dispatch) are synchronous Ruby calls. |
|
|
27
|
-
| Per-invocation caps | Every invocation enforces a wall-clock `timeout` (default 60 s) and a per-invocation linear-memory `memory_limit` (default 1 MiB); exhaustion raises `Kobako::TimeoutError` / `Kobako::MemoryLimitError`. |
|
|
28
|
-
| Capability injection via Services | Guest scripts can only call Ruby objects you explicitly `bind` under a two-level `Namespace::Member` path. |
|
|
29
|
-
| Preloaded snippets | `Sandbox#preload` registers source or RITE bytecode for setup-once dispatch via `Sandbox#run(:Entrypoint, *args, **kwargs)`. |
|
|
30
|
-
| Capability Handles | Services may return stateful host objects; the guest receives an opaque `Kobako::Handle` proxy it can use as the target of follow-up RPC calls, with no way to dereference it. `Sandbox#run` also accepts non-wire-representable Ruby objects as args and auto-wraps them into Handles, so the guest can use any host object the script needs. |
|
|
31
|
-
| Three-class error taxonomy | Every failure is exactly one of `TrapError`, `SandboxError`, or `ServiceError`, so you can route errors without inspecting messages. |
|
|
32
|
-
| Per-invocation state reset | Handles issued during one invocation are invalidated before the next; Service bindings and preloaded snippets remain. |
|
|
33
|
-
| Separated stdout / stderr capture | Guest writes to `$stdout` / `$stderr` are buffered per-channel (1 MiB default cap, configurable); overflow is clipped and reported by `#stdout_truncated?` / `#stderr_truncated?`. |
|
|
34
|
-
| Per-invocation usage readout | `Sandbox#usage` returns the most recent invocation's `wall_time` (Float seconds spent inside the wasm guest) and `memory_peak` (high-water `memory.grow` delta in bytes), populated on every outcome including `TrapError`, for budget diagnostics. |
|
|
35
|
-
| Curated mruby stdlib | Core extensions plus `mruby-onig-regexp` for full Onigmo `Regexp` support; no mrbgem with I/O, network, or syscall access is bundled. |
|
|
36
|
-
|
|
37
24
|
## Requirements
|
|
38
25
|
|
|
39
26
|
- **Ruby ≥ 3.3.0**
|
|
@@ -61,88 +48,75 @@ result = sandbox.eval(<<~RUBY)
|
|
|
61
48
|
1 + 2
|
|
62
49
|
RUBY
|
|
63
50
|
|
|
64
|
-
result
|
|
65
|
-
sandbox.stdout # => ""
|
|
51
|
+
result # => 3
|
|
66
52
|
```
|
|
67
53
|
|
|
68
54
|
The script executes inside the Wasm guest. It cannot read your filesystem, open sockets, or touch your `ENV`.
|
|
69
55
|
|
|
70
|
-
##
|
|
56
|
+
## Usage
|
|
71
57
|
|
|
72
|
-
|
|
58
|
+
### Injecting Services
|
|
59
|
+
|
|
60
|
+
Declare a Namespace, then `bind` any Ruby object as a Member; the guest reaches it as a `<Namespace>::<Member>` proxy and invokes its public methods through the Transport wire. See [`docs/behavior.md`](docs/behavior.md) B-07..B-12.
|
|
73
61
|
|
|
74
62
|
```ruby
|
|
75
|
-
|
|
63
|
+
class User
|
|
64
|
+
attr_reader :name
|
|
76
65
|
|
|
77
|
-
|
|
78
|
-
|
|
66
|
+
def initialize(name:)
|
|
67
|
+
@name = name
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
sandbox.define(:Project).bind(:User, User.new(name: "alice"))
|
|
72
|
+
sandbox.define(:KV) .bind(:Lookup, ->(key) { redis.get(key) })
|
|
79
73
|
|
|
80
74
|
sandbox.eval(<<~RUBY)
|
|
81
|
-
|
|
82
|
-
KV::Lookup.call("user_42")
|
|
75
|
+
Project::User.name # => "alice"
|
|
76
|
+
KV::Lookup.call("user_42") # => "..."
|
|
83
77
|
RUBY
|
|
84
|
-
# => "..." (the redis value)
|
|
85
78
|
```
|
|
86
79
|
|
|
87
|
-
Names must match
|
|
80
|
+
Names must match `/\A[A-Z]\w*\z/`. Symbol kwargs travel transparently to the host method's keyword arguments. The registry seals at the first invocation; later `#define` raises `ArgumentError`.
|
|
88
81
|
|
|
89
|
-
###
|
|
82
|
+
### Yielding to guest blocks
|
|
90
83
|
|
|
91
|
-
|
|
84
|
+
A Service method can accept a guest-supplied block via `&blk` and `yield` into it. The block body runs inside the Wasm guest; `break` / `next` / exceptions follow normal Ruby semantics, scoped to the single dispatch. See [`docs/behavior.md`](docs/behavior.md) B-23..B-30.
|
|
92
85
|
|
|
93
86
|
```ruby
|
|
94
|
-
sandbox.define(:
|
|
87
|
+
sandbox.define(:Seq).bind(:Map, ->(items, &blk) { items.map(&blk) })
|
|
95
88
|
|
|
96
|
-
sandbox.eval('
|
|
97
|
-
# =>
|
|
89
|
+
sandbox.eval('Seq::Map.call([1, 2, 3]) { |x| x * 2 }')
|
|
90
|
+
# => [2, 4, 6]
|
|
98
91
|
```
|
|
99
92
|
|
|
100
|
-
|
|
93
|
+
### Per-invocation caps
|
|
101
94
|
|
|
102
|
-
Each
|
|
95
|
+
Each invocation enforces a wall-clock `timeout` and a per-invocation linear-memory `memory_limit`; exhaustion raises a `TrapError` subclass. Pass `nil` to `timeout` / `memory_limit` to disable that cap. Read [`Sandbox#usage`](lib/kobako/sandbox.rb) after the call — populated on every outcome including traps — for actual consumption ([`docs/behavior.md`](docs/behavior.md) B-35).
|
|
103
96
|
|
|
104
97
|
```ruby
|
|
105
98
|
sandbox = Kobako::Sandbox.new(
|
|
106
|
-
timeout: 5.0,
|
|
107
|
-
memory_limit: 10 * 1024 * 1024, # bytes,
|
|
108
|
-
stdout_limit: 64 * 1024,
|
|
99
|
+
timeout: 5.0, # seconds, default 60.0
|
|
100
|
+
memory_limit: 10 * 1024 * 1024, # bytes, default 1 MiB
|
|
101
|
+
stdout_limit: 64 * 1024, # bytes, default 1 MiB
|
|
109
102
|
stderr_limit: 64 * 1024
|
|
110
103
|
)
|
|
111
104
|
```
|
|
112
105
|
|
|
113
|
-
| Cap | Raises
|
|
114
|
-
|
|
115
|
-
| `timeout` | `Kobako::TimeoutError`
|
|
116
|
-
| `memory_limit` | `Kobako::MemoryLimitError`
|
|
117
|
-
| `stdout_limit` | output
|
|
118
|
-
| `stderr_limit` | output
|
|
119
|
-
|
|
120
|
-
The timeout deadline is absolute wall-clock from invocation entry and is checked at guest Wasm safepoints. Long-running host Service callbacks still consume wall-clock time but do not themselves trap — the next guest safepoint will trap immediately on return if the deadline has passed.
|
|
121
|
-
|
|
122
|
-
`memory_limit` is scoped to the **per-invocation linear-memory delta** — the budget covers how much the current `#eval` / `#run` may grow `memory.grow` past the size observed at invocation entry. The mruby image's initial allocation and prior invocations' high-water mark are folded into that entry baseline, so a Sandbox reused across many invocations does not silently accumulate against a global budget.
|
|
106
|
+
| Cap | Raises | Default |
|
|
107
|
+
|----------------|----------------------------|---------|
|
|
108
|
+
| `timeout` | `Kobako::TimeoutError` | 60.0 s |
|
|
109
|
+
| `memory_limit` | `Kobako::MemoryLimitError` | 1 MiB |
|
|
110
|
+
| `stdout_limit` | output clipped (no raise) | 1 MiB |
|
|
111
|
+
| `stderr_limit` | output clipped (no raise) | 1 MiB |
|
|
123
112
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
To see how much of the cap an invocation actually consumed, read `Sandbox#usage` after the call. It returns a `Kobako::Usage` value object with `wall_time` (Float seconds the guest export call spent inside wasmtime, aligned with the `timeout` accounting) and `memory_peak` (Integer high-water `memory.grow` delta in bytes, aligned with the `memory_limit` accounting). The fields are populated on every outcome, including the `TrapError` branches, so you can read them after rescuing a trap to diagnose which budget the failing invocation chewed through.
|
|
127
|
-
|
|
128
|
-
```ruby
|
|
129
|
-
sandbox = Kobako::Sandbox.new(timeout: 1.0, memory_limit: 4 * 1024 * 1024)
|
|
130
|
-
|
|
131
|
-
begin
|
|
132
|
-
sandbox.eval("'x' * 5_000_000")
|
|
133
|
-
rescue Kobako::MemoryLimitError
|
|
134
|
-
sandbox.usage.memory_peak # => the largest delta accepted before the trap
|
|
135
|
-
sandbox.usage.wall_time # => seconds spent before the cap fired
|
|
136
|
-
end
|
|
137
|
-
```
|
|
113
|
+
`memory_limit` covers the per-invocation `memory.grow` delta from the entry baseline, so a Sandbox reused across invocations does not silently accumulate against a global budget.
|
|
138
114
|
|
|
139
|
-
|
|
115
|
+
### Capturing stdout / stderr
|
|
140
116
|
|
|
141
|
-
Guest
|
|
117
|
+
Guest writes through `puts` / `print` / `p` / `$stdout` / `$stderr` are buffered per-channel and exposed independently of the return value ([`docs/behavior.md`](docs/behavior.md) B-04). Buffers clear at the start of each invocation; overflow is clipped at the cap and flagged by `#stdout_truncated?` / `#stderr_truncated?`.
|
|
142
118
|
|
|
143
119
|
```ruby
|
|
144
|
-
sandbox = Kobako::Sandbox.new
|
|
145
|
-
|
|
146
120
|
result = sandbox.eval(<<~RUBY)
|
|
147
121
|
puts "hello"
|
|
148
122
|
warn "be careful"
|
|
@@ -154,50 +128,34 @@ sandbox.stdout # => "hello\n"
|
|
|
154
128
|
sandbox.stderr # => "be careful\n"
|
|
155
129
|
```
|
|
156
130
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
```ruby
|
|
160
|
-
sandbox = Kobako::Sandbox.new(stdout_limit: 64 * 1024)
|
|
161
|
-
sandbox.eval('puts "a" * 100_000')
|
|
162
|
-
sandbox.stdout.bytesize # => 65_536
|
|
163
|
-
sandbox.stdout_truncated? # => true
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
## Error handling
|
|
131
|
+
### Error handling
|
|
167
132
|
|
|
168
|
-
Every invocation
|
|
133
|
+
Every invocation either returns a value or raises exactly one of three classes, so you can route faults without inspecting messages. The full taxonomy lives in [`lib/kobako/errors.rb`](lib/kobako/errors.rb).
|
|
169
134
|
|
|
170
135
|
```ruby
|
|
171
136
|
begin
|
|
172
137
|
sandbox.eval(script)
|
|
173
|
-
rescue Kobako::TrapError
|
|
174
|
-
# Wasm engine fault
|
|
175
|
-
|
|
176
|
-
#
|
|
177
|
-
|
|
178
|
-
# The
|
|
179
|
-
rescue Kobako::ServiceError => e
|
|
180
|
-
# A Service call failed and the script did not rescue it.
|
|
181
|
-
# Treat like any other downstream-service failure in your app.
|
|
182
|
-
rescue Kobako::SandboxError => e
|
|
183
|
-
# The script itself raised, failed to compile, or produced an
|
|
184
|
-
# unrepresentable value. A script-level fault, not infrastructure.
|
|
138
|
+
rescue Kobako::TrapError
|
|
139
|
+
# Wasm engine fault or cap exhaustion. Discard the Sandbox.
|
|
140
|
+
rescue Kobako::ServiceError
|
|
141
|
+
# A host Service call failed and the script did not rescue it.
|
|
142
|
+
rescue Kobako::SandboxError
|
|
143
|
+
# The script raised, failed to compile, or returned an unrepresentable value.
|
|
185
144
|
end
|
|
186
145
|
```
|
|
187
146
|
|
|
188
|
-
|
|
147
|
+
| Class | Parent | Trigger |
|
|
148
|
+
|---------------------------------|----------------|------------------------------------------------------|
|
|
149
|
+
| `Kobako::TimeoutError` | `TrapError` | Per-invocation `timeout` exhausted |
|
|
150
|
+
| `Kobako::MemoryLimitError` | `TrapError` | Per-invocation `memory_limit` exhausted |
|
|
151
|
+
| `Kobako::HandlerExhaustedError` | `SandboxError` | Handle counter reached its 2³¹ − 1 cap |
|
|
152
|
+
| `Kobako::BytecodeError` | `SandboxError` | `#preload(binary:)` failed RITE validation at replay |
|
|
189
153
|
|
|
190
|
-
|
|
191
|
-
|----------------------------------------|--------------------|------------------------------------------------------------------------------------------|
|
|
192
|
-
| `Kobako::TimeoutError` | `TrapError` | Per-invocation `timeout` exhausted |
|
|
193
|
-
| `Kobako::MemoryLimitError` | `TrapError` | Per-invocation `memory_limit` exhausted |
|
|
194
|
-
| `Kobako::ServiceError::Disconnected` | `ServiceError` | RPC target Handle has been invalidated |
|
|
195
|
-
| `Kobako::HandleTableExhausted` | `SandboxError` | Per-invocation Handle counter reached its 2³¹ − 1 cap |
|
|
196
|
-
| `Kobako::BytecodeError` | `SandboxError` | `#preload(binary:)` payload failed RITE structural validation at first invocation replay |
|
|
154
|
+
`SandboxError` and `ServiceError` carry structured `origin` / `klass` / `backtrace_lines` / `details` fields when the guest produced a panic envelope.
|
|
197
155
|
|
|
198
|
-
|
|
156
|
+
### Capability Handles
|
|
199
157
|
|
|
200
|
-
|
|
158
|
+
A non-wire-representable host object — returned from a Service (B-14), passed to `#run` (B-34), or handed back from the guest (B-37) — crosses the boundary as an opaque `Kobako::Handle` proxy and is restored to the original object before host code sees it; any other unrepresentable value raises `Kobako::SandboxError`. Handles are scoped to a single invocation ([`docs/behavior.md`](docs/behavior.md) B-13..B-21, B-34, B-37).
|
|
201
159
|
|
|
202
160
|
```ruby
|
|
203
161
|
class Greeter
|
|
@@ -207,30 +165,15 @@ end
|
|
|
207
165
|
|
|
208
166
|
sandbox.define(:Factory).bind(:Make, ->(name) { Greeter.new(name) })
|
|
209
167
|
|
|
210
|
-
sandbox.eval(
|
|
211
|
-
|
|
212
|
-
g.greet # second RPC, routed to the Greeter
|
|
213
|
-
RUBY
|
|
214
|
-
# => "hi, Bob"
|
|
215
|
-
```
|
|
216
|
-
|
|
217
|
-
`Sandbox#run` accepts non-wire-representable host objects as args / kwargs values too: the host walks the argument tree, wraps every non-wire leaf through the same Handle path, and the guest sees a `Kobako::Handle` proxy in its place. This lets you pass framework objects (a Rack `env` Hash containing an `IO`-like body, an active record, an enumerator) into the entrypoint without first marshalling them into primitives.
|
|
218
|
-
|
|
219
|
-
```ruby
|
|
220
|
-
require "stringio"
|
|
221
|
-
|
|
222
|
-
sandbox = Kobako::Sandbox.new
|
|
223
|
-
sandbox.preload(code: "Echo = ->(body) { body.read.upcase }", name: :Echo)
|
|
224
|
-
|
|
225
|
-
sandbox.run(:Echo, StringIO.new("hello world"))
|
|
226
|
-
# => "HELLO WORLD"
|
|
168
|
+
sandbox.eval('Factory::Make.call("Bob").greet') # => "hi, Bob" (Handle round-trip inside guest)
|
|
169
|
+
sandbox.eval('Factory::Make.call("Bob")') # => #<Greeter @name="Bob"> (B-37 restoration)
|
|
227
170
|
```
|
|
228
171
|
|
|
229
|
-
|
|
172
|
+
A `break` value from a guest block is the one exception: it unwinds back to the guest Member call rather than to host code, so a Handle in it stays a Handle — restoring would just re-wrap the same object into a new id on the return trip.
|
|
230
173
|
|
|
231
|
-
|
|
174
|
+
### Setup-once, run-many
|
|
232
175
|
|
|
233
|
-
|
|
176
|
+
One Sandbox serves many invocations. Service bindings and preloaded snippets persist across calls; capability state (Handles, stdout, stderr, memory delta) resets between them.
|
|
234
177
|
|
|
235
178
|
```
|
|
236
179
|
───────────── setup phase (mutable) ─────────────
|
|
@@ -271,40 +214,21 @@ A single Sandbox can serve many invocations. Service bindings and preloaded snip
|
|
|
271
214
|
Services + snippets persist; invocation N+1 repeats.
|
|
272
215
|
```
|
|
273
216
|
|
|
274
|
-
|
|
275
|
-
sandbox = Kobako::Sandbox.new
|
|
276
|
-
sandbox.define(:Data).bind(:Fetch, ->(id) { records[id] })
|
|
277
|
-
|
|
278
|
-
sandbox.eval('Data::Fetch.call("a")') # => "..."
|
|
279
|
-
sandbox.eval('Data::Fetch.call("b")') # => "..." (same bindings, fresh state)
|
|
280
|
-
```
|
|
217
|
+
For workloads that must be isolated from each other (one Sandbox per tenant, per student submission, per agent session), construct a fresh `Kobako::Sandbox` per scope — wasmtime's Engine and the compiled Module are cached at process scope, so additional Sandboxes amortize cold-start cost automatically.
|
|
281
218
|
|
|
282
|
-
|
|
219
|
+
### Preloaded snippets and entrypoint dispatch
|
|
283
220
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
`Sandbox#preload` registers named mruby snippets that replay against the fresh `mrb_state` before every invocation; `Sandbox#run(:Target, *args, **kwargs)` dispatches into a top-level `Object` constant defined by those snippets and returns the value of `Target.call(*args, **kwargs)`. Together they cover setup-once / dispatch-many workloads where the same logic is exercised across many requests.
|
|
221
|
+
`Sandbox#preload` registers named mruby snippets that replay against the fresh `mrb_state` before every invocation; `Sandbox#run(:Target, *args, **kwargs)` dispatches into a top-level `Object` constant defined by those snippets ([`docs/behavior.md`](docs/behavior.md) B-31..B-33).
|
|
287
222
|
|
|
288
223
|
```ruby
|
|
289
224
|
sandbox = Kobako::Sandbox.new
|
|
290
|
-
sandbox.preload(code: "Adder
|
|
225
|
+
sandbox.preload(code: "Adder = ->(a, b) { a + b }", name: :Adder)
|
|
291
226
|
sandbox.preload(code: 'Greeter = ->(name:) { "hello, #{name}" }', name: :Greeter)
|
|
292
227
|
|
|
293
|
-
sandbox.run(:Adder, 2, 3)
|
|
294
|
-
sandbox.run(:Greeter, name: "world")
|
|
228
|
+
sandbox.run(:Adder, 2, 3) # => 5
|
|
229
|
+
sandbox.run(:Greeter, name: "world") # => "hello, world"
|
|
295
230
|
```
|
|
296
231
|
|
|
297
|
-
`#preload` accepts two payload forms:
|
|
298
|
-
|
|
299
|
-
| Form | Signature | Snippet name source | Validation timing |
|
|
300
|
-
|----------|----------------------------------------|-------------------------------------|------------------------------------------------------------------------------------------|
|
|
301
|
-
| Source | `preload(code: "...", name: :Const)` | The `name:` keyword | Trial-compiled at preload time; compile errors raise immediately |
|
|
302
|
-
| Bytecode | `preload(binary: bytes)` | Read from the bytecode's `debug_info` | Structural validation runs at first invocation; failure raises `Kobako::BytecodeError` |
|
|
303
|
-
|
|
304
|
-
The source form trial-compiles each snippet against a fresh `mrb_state` at preload time, so compile errors surface immediately at the `#preload` call. The bytecode form treats `binary:` as opaque bytes and defers RITE version / body validation to the first invocation's replay, because that is when the payload loads into a fresh `mrb_state`. Bytecode compiled without `debug_info` (`mrbc` without `-g`) is still accepted — only its backtrace frames are omitted, while exception class, message, and `origin` attribution are preserved.
|
|
305
|
-
|
|
306
|
-
Snippets replay in insertion order, so later snippets can reference constants defined by earlier ones. The snippet table is sealed by the first invocation alongside Service registration; additional `#preload` calls after the first `#eval` or `#run` raise `ArgumentError`.
|
|
307
|
-
|
|
308
232
|
```
|
|
309
233
|
per-invocation replay (every #eval / #run, snippets in insertion order):
|
|
310
234
|
|
|
@@ -320,65 +244,33 @@ Snippets replay in insertion order, so later snippets can reference constants de
|
|
|
320
244
|
return value, then mrb_state discarded
|
|
321
245
|
```
|
|
322
246
|
|
|
323
|
-
`#
|
|
324
|
-
|
|
325
|
-
### Choosing between source and bytecode
|
|
326
|
-
|
|
327
|
-
Use the **source form** when snippets are authored in your repo or generated at boot — compile errors land at the `#preload` call so a misbehaving snippet fails fast at setup time, and no separate `mrbc` toolchain is needed. The trial-compile happens once per snippet (~2.5 µs per snippet) and is paid at preload, not on the request hot path.
|
|
247
|
+
`#preload` accepts two payload forms:
|
|
328
248
|
|
|
329
|
-
|
|
249
|
+
| Form | Signature | Snippet name source | Validation timing |
|
|
250
|
+
|----------|--------------------------------------|---------------------------------------|----------------------------------------------------------------------------|
|
|
251
|
+
| Source | `preload(code: "...", name: :Const)` | The `name:` keyword | Trial-compiled at preload; compile errors raise immediately |
|
|
252
|
+
| Bytecode | `preload(binary: bytes)` | Read from the bytecode's `debug_info` | Deferred to first invocation; failure raises `Kobako::BytecodeError` |
|
|
330
253
|
|
|
331
|
-
|
|
254
|
+
Use the source form for snippets authored in your repo (compile errors fail fast at `#preload`); use the bytecode form when snippets ship as build artifacts from a separate `mrbc` pipeline. Both replay through the same per-invocation path.
|
|
332
255
|
|
|
333
256
|
## Performance
|
|
334
257
|
|
|
335
|
-
Order-of-magnitude figures
|
|
336
|
-
|
|
337
|
-
### Lifecycle costs
|
|
338
|
-
|
|
339
|
-
| Phase | Cost |
|
|
340
|
-
|-------------------------------------------------------------|-------------------------------------------------|
|
|
341
|
-
| First `Sandbox.new` in a fresh process (Engine + Module JIT) | ~600 ms one-time |
|
|
342
|
-
| Subsequent `Sandbox.new` (Engine cache warm) | ~130 µs |
|
|
343
|
-
| Reusing a Sandbox for one `#eval("nil")` | ~135 µs |
|
|
344
|
-
| Fresh `Sandbox.new` per request | ~275 µs (≈ +140 µs vs reuse) |
|
|
345
|
-
| Warm `#run(:Entrypoint, ...)` dispatch | ~165 µs |
|
|
346
|
-
| Per-RPC cost amortized inside one invocation | ~6.6 µs (1 000 RPCs in one `#eval` ≈ 6.6 ms) |
|
|
347
|
-
| 100 000-iteration integer XOR loop in mruby | ~43 ms |
|
|
348
|
-
| 1 000 Onigmo `Regexp =~` matches | ~3 µs each |
|
|
258
|
+
Order-of-magnitude figures on macOS arm64, Ruby 3.4.7, YJIT off. Absolute values vary by hardware but ratios are stable across machines. Full numbers, methodology, and the +10%-regression gate live in [`benchmark/README.md`](benchmark/README.md).
|
|
349
259
|
|
|
350
|
-
|
|
260
|
+
| Phase | Cost |
|
|
261
|
+
|--------------------------------------------------------------|-----------------------|
|
|
262
|
+
| First `Sandbox.new` in a fresh process (Engine + Module JIT) | ~600 ms one-time |
|
|
263
|
+
| Subsequent `Sandbox.new` (Engine cache warm) | ~125 µs |
|
|
264
|
+
| Warm `#eval("nil")` on a reused Sandbox | ~135 µs |
|
|
265
|
+
| Warm `#run(:Entrypoint, ...)` dispatch | ~165 µs |
|
|
266
|
+
| Service call amortized inside one invocation | ~6.7 µs |
|
|
267
|
+
| Snippet replay per invocation | ~7-9 µs each |
|
|
268
|
+
| Per additional Sandbox (RSS) | ~570 KB |
|
|
351
269
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
| Allocation | Cost |
|
|
355
|
-
|---------------------------------------------|----------------------------------------------------------------------------|
|
|
356
|
-
| Process RSS after first `Sandbox.new` | ~165-195 MB (one-time engine + module + first instance) |
|
|
357
|
-
| Per additional Sandbox | ~580 KB (Wasm instance + linear memory + WASI capture pipes) |
|
|
358
|
-
| 1 000 isolated tenants in one process | ~765 MB total |
|
|
359
|
-
|
|
360
|
-
Use these as upper-bound budgets for capacity planning, not lower bounds — actual RSS shifts ~30% with host process load and macOS allocator state.
|
|
361
|
-
|
|
362
|
-
### Choosing your pattern
|
|
363
|
-
|
|
364
|
-
When the script is ad-hoc (LLM-generated, untrusted user input) and only runs once, use `Sandbox#eval(source)`. Per-invocation cost is ~135 µs of setup plus the script's own runtime; mruby parses the source on every call.
|
|
365
|
-
|
|
366
|
-
When you have a fixed set of entrypoints exercised many times — a stable AI Agent tool-call protocol, a plug-in registry loaded at boot, a small library of host-side commands — preload the entrypoints via `Sandbox#preload(code:, name:)` once at setup and dispatch via `Sandbox#run(:Target, *args, **kwargs)`. The mruby source compile (~2.5 µs per snippet) lands once at preload, not on every request, and warm dispatch costs ~165 µs.
|
|
367
|
-
|
|
368
|
-
Mind the snippet replay cost. Every preloaded snippet replays into a fresh `mrb_state` before **every** invocation, whether the invocation is `#eval` or `#run`, at ~7-9 µs per snippet per invocation. Preloading 8 helpers adds ~60 µs to every subsequent invocation; preloading 64 helpers adds ~565 µs. Keep the snippet count proportionate to how often the helpers are actually used — preloading rarely-touched helpers is more expensive than inlining or re-eval'ing them.
|
|
369
|
-
|
|
370
|
-
For tenant isolation between mutually untrusted scopes, construct a fresh `Kobako::Sandbox` per scope. Per-request construction costs ~140 µs over reuse plus ~580 KB of RSS — comfortably affordable for 1 000+ isolated tenants in one Sidekiq / Puma worker. Reuse a Sandbox when all requests share one trust scope; isolate when scripts come from many.
|
|
371
|
-
|
|
372
|
-
### Concurrency
|
|
373
|
-
|
|
374
|
-
`ext/` does not release the GVL during wasmtime execution, so wasm work is GVL-serialized: aggregate throughput across N Threads stays around 7-8k `#eval`/s regardless of N. Ruby-side `#eval` setup can still overlap, so a short `#eval` running while another Thread is in a long `#eval` is slowed by ~2× (not 10×) — host-side synchronization yields the GVL and the contending Thread interleaves. Mixed short / long workloads in one process do not deadlock.
|
|
375
|
-
|
|
376
|
-
### Regression gate
|
|
377
|
-
|
|
378
|
-
A +10% regression on any of the five SPEC-mandated benchmarks (cold_start, RPC roundtrip, codec, mruby VM, HandleTable) blocks release. Full per-suite breakdown in [`benchmark/README.md`](benchmark/README.md).
|
|
270
|
+
Construct one Sandbox at boot so the ~600 ms JIT cost lands off the request hot path. `ext/` does not release the GVL during wasmtime execution, so wasm work is GVL-serialized: aggregate throughput stays around 7-8k `#eval`/s regardless of Thread count, though Ruby-side `#eval` setup still overlaps. A +10% regression on any of the six SPEC-mandated benchmarks blocks release.
|
|
379
271
|
|
|
380
272
|
```bash
|
|
381
|
-
bundle exec rake bench
|
|
273
|
+
bundle exec rake bench # six gated regression benchmarks (~5-8 min)
|
|
382
274
|
```
|
|
383
275
|
|
|
384
276
|
## Development
|
|
@@ -386,19 +278,11 @@ bundle exec rake bench # five gated regression benchmarks (~5-8 min, ≤ 1 MiB
|
|
|
386
278
|
After checking out the repo:
|
|
387
279
|
|
|
388
280
|
```bash
|
|
389
|
-
bin/setup
|
|
390
|
-
bundle exec rake
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
Building from source requires a WASI-capable Rust toolchain in addition to the standard host toolchain. The first compile walks the full vendor / mruby / wasm chain:
|
|
394
|
-
|
|
395
|
-
```bash
|
|
396
|
-
bundle exec rake compile # build the native extension
|
|
397
|
-
bundle exec rake wasm:build # rebuild data/kobako.wasm
|
|
398
|
-
bundle exec rake test # run the Ruby test suite
|
|
281
|
+
bin/setup # install dependencies
|
|
282
|
+
bundle exec rake # default: compile + test + rubocop + steep
|
|
399
283
|
```
|
|
400
284
|
|
|
401
|
-
`bin/console` opens an IRB session with the gem preloaded
|
|
285
|
+
Building from source requires a WASI-capable Rust toolchain in addition to the standard host toolchain; the first compile walks the full vendor / mruby / wasm chain. See [`CLAUDE.md`](CLAUDE.md) for the rake task map and pipeline layout. `bin/console` opens an IRB session with the gem preloaded; `bundle exec rake install` installs the local checkout as a gem.
|
|
402
286
|
|
|
403
287
|
## Contributing
|
|
404
288
|
|
data/data/kobako.wasm
CHANGED
|
Binary file
|
data/ext/kobako/Cargo.toml
CHANGED
data/ext/kobako/src/lib.rs
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
use magnus::{Error, Ruby};
|
|
2
2
|
|
|
3
|
-
mod
|
|
3
|
+
mod runtime;
|
|
4
|
+
mod snapshot;
|
|
4
5
|
|
|
5
6
|
#[magnus::init]
|
|
6
7
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
7
8
|
let module = ruby.define_module("Kobako")?;
|
|
8
|
-
|
|
9
|
+
runtime::init(ruby, module)?;
|
|
10
|
+
snapshot::init(ruby, module)?;
|
|
9
11
|
Ok(())
|
|
10
12
|
}
|