kobako 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1 -1
  3. data/README.md +123 -57
  4. data/data/kobako.wasm +0 -0
  5. data/ext/kobako/Cargo.toml +2 -2
  6. data/ext/kobako/src/wasm/cache.rs +3 -3
  7. data/ext/kobako/src/wasm/dispatch.rs +87 -36
  8. data/ext/kobako/src/wasm/host_state.rs +189 -52
  9. data/ext/kobako/src/wasm/instance.rs +367 -152
  10. data/ext/kobako/src/wasm.rs +19 -5
  11. data/lib/kobako/capture.rb +12 -10
  12. data/lib/kobako/codec/decoder.rb +3 -2
  13. data/lib/kobako/codec/encoder.rb +1 -1
  14. data/lib/kobako/codec/error.rb +3 -2
  15. data/lib/kobako/codec/factory.rb +11 -7
  16. data/lib/kobako/codec/utils.rb +3 -2
  17. data/lib/kobako/codec.rb +2 -1
  18. data/lib/kobako/errors.rb +22 -10
  19. data/lib/kobako/invocation.rb +112 -0
  20. data/lib/kobako/outcome/panic.rb +2 -2
  21. data/lib/kobako/outcome.rb +20 -13
  22. data/lib/kobako/rpc/dispatcher.rb +9 -9
  23. data/lib/kobako/rpc/envelope.rb +3 -3
  24. data/lib/kobako/rpc/fault.rb +3 -2
  25. data/lib/kobako/rpc/handle.rb +3 -2
  26. data/lib/kobako/rpc/handle_table.rb +7 -7
  27. data/lib/kobako/rpc/namespace.rb +3 -3
  28. data/lib/kobako/rpc/server.rb +14 -12
  29. data/lib/kobako/sandbox.rb +147 -125
  30. data/lib/kobako/sandbox_options.rb +73 -0
  31. data/lib/kobako/snippet/binary.rb +30 -0
  32. data/lib/kobako/snippet/source.rb +28 -0
  33. data/lib/kobako/snippet/table.rb +174 -0
  34. data/lib/kobako/snippet.rb +20 -0
  35. data/lib/kobako/version.rb +1 -1
  36. data/sig/kobako/errors.rbs +3 -0
  37. data/sig/kobako/invocation.rbs +23 -0
  38. data/sig/kobako/sandbox.rbs +17 -18
  39. data/sig/kobako/sandbox_options.rbs +32 -0
  40. data/sig/kobako/snippet/binary.rbs +12 -0
  41. data/sig/kobako/snippet/source.rbs +13 -0
  42. data/sig/kobako/snippet/table.rbs +36 -0
  43. data/sig/kobako/snippet.rbs +4 -0
  44. data/sig/kobako/wasm.rbs +3 -1
  45. metadata +13 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5f76e4f3f09c6ade1cf87bd6e970040d8585ed9f0d8db7f1b5e653154acb2af2
4
- data.tar.gz: 7d92b5b0c0961f4ed1b3bab01c5625a0a586ccf364c5e17a9727616bed02d065
3
+ metadata.gz: ddc9be38e5ce7f23c176bcd7be8b8683cf58d308dfedb17e91fc5841308a5c8c
4
+ data.tar.gz: 4a0de9e36b529010b50148ff9d904cafbe253b0c394a9a2118e6ef4d7c9e4029
5
5
  SHA512:
6
- metadata.gz: 64d90bdb3aaf15493486926937e3edb2576e79a932485d169c6588e464ef6d6ae1504ba4afb1647b171d2cc04363381c5568669590cd626dd0a5ccbcccee1025
7
- data.tar.gz: 8d0469f966df65ba9a149b25cfb5a5688381e3b1dc1e7b63572428d31f0a49901108613b7482d99192f3b52a32b61d53f3e725e79c44ab80f5564c6a40eee2d8
6
+ metadata.gz: ae0e599389ce723a1c257923a5b5b760ddf94a71698e5fa990f82587494ae8ca874d11c8c1fd5eedfac0d6e953f53cfb8603fb4da34989f8f92e39425bb99fa5
7
+ data.tar.gz: 6cbdec819843c52c1be3d04a6694af51ae41121e159a4de12d1ebef0074d1626247c8c06db0e9f15da2deb19b4b1730c124aa027bc676ffc2b1fedf6cd68d3c7
data/Cargo.lock CHANGED
@@ -864,7 +864,7 @@ dependencies = [
864
864
 
865
865
  [[package]]
866
866
  name = "kobako"
867
- version = "0.2.1"
867
+ version = "0.3.0"
868
868
  dependencies = [
869
869
  "magnus",
870
870
  "wasmtime",
data/README.md CHANGED
@@ -7,8 +7,8 @@ The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby an
7
7
  ```
8
8
  Host process Wasm guest
9
9
  ┌──────────────────────┐ ┌──────────────────────┐
10
- │ Kobako::Sandbox │ ──run─▶ │ mruby interpreter │
11
- │ │ │ │
10
+ │ Kobako::Sandbox │ ─eval─▶ │ mruby interpreter │
11
+ │ │ ─run──▶ │ │
12
12
  │ Services │ ◀──RPC─ │ KV::Lookup.call(k) │
13
13
  │ KV::Lookup │ ─resp─▶ │ │
14
14
  │ │ │ │
@@ -21,14 +21,17 @@ The host (`wasmtime`) runs a precompiled `kobako.wasm` guest containing mruby an
21
21
 
22
22
  ## Features
23
23
 
24
- - **In-process Wasm sandbox** — no subprocess, no container. Each `Sandbox#run` is a synchronous Ruby call.
25
- - **Per-run caps** — every `#run` enforces a wall-clock `timeout` (default 60 s) and a guest `memory_limit` (default 5 MiB). Exhaustion raises `Kobako::TimeoutError` / `Kobako::MemoryLimitError`.
26
- - **Capability injection via Services** guest scripts can only call Ruby objects you explicitly `bind` under a two-level `Namespace::Member` path.
27
- - **Three-class error taxonomy** every failure is exactly one of `TrapError` (Wasm engine / per-run cap), `SandboxError` (script / wire fault), or `ServiceError` (Service capability fault), so you can route errors without inspecting messages.
28
- - **Per-run state reset** Handles issued during one `#run` are invalidated before the next; Service bindings remain.
29
- - **Separated stdout / stderr capture** — guest `puts` / `warn` / `print` / `printf` / `p` and writes to `$stdout` / `$stderr` are buffered per-channel (1 MiB default cap, configurable). Output past the cap is clipped; `#stdout_truncated?` / `#stderr_truncated?` report overflow.
30
- - **Capability Handles** Services may return stateful host objects; the guest receives an opaque `Kobako::RPC::Handle` proxy it can use as the target of follow-up RPC calls, with no way to dereference it.
31
- - **Curated mruby stdlib** core extensions plus `mruby-onig-regexp` for full Onigmo `Regexp` support. No mrbgem with I/O, network, or syscall access is bundled.
24
+ | Feature | Description |
25
+ |---|---|
26
+ | In-process Wasm sandbox | No subprocess, no container. Both invocation verbs (`Sandbox#eval` for ad-hoc source, `Sandbox#run` for entrypoint dispatch) are synchronous Ruby calls. |
27
+ | Per-invocation caps | Every invocation enforces a wall-clock `timeout` (default 60 s) and a per-invocation linear-memory `memory_limit` (default 1 MiB); exhaustion raises `Kobako::TimeoutError` / `Kobako::MemoryLimitError`. |
28
+ | Capability injection via Services | Guest scripts can only call Ruby objects you explicitly `bind` under a two-level `Namespace::Member` path. |
29
+ | Preloaded snippets | `Sandbox#preload` registers source or RITE bytecode for setup-once dispatch via `Sandbox#run(:Entrypoint, *args, **kwargs)`. |
30
+ | Capability Handles | Services may return stateful host objects; the guest receives an opaque `Kobako::RPC::Handle` proxy it can use as the target of follow-up RPC calls, with no way to dereference it. |
31
+ | Three-class error taxonomy | Every failure is exactly one of `TrapError`, `SandboxError`, or `ServiceError`, so you can route errors without inspecting messages. |
32
+ | Per-invocation state reset | Handles issued during one invocation are invalidated before the next; Service bindings and preloaded snippets remain. |
33
+ | Separated stdout / stderr capture | Guest writes to `$stdout` / `$stderr` are buffered per-channel (1 MiB default cap, configurable); overflow is clipped and reported by `#stdout_truncated?` / `#stderr_truncated?`. |
34
+ | Curated mruby stdlib | Core extensions plus `mruby-onig-regexp` for full Onigmo `Regexp` support; no mrbgem with I/O, network, or syscall access is bundled. |
32
35
 
33
36
  ## Requirements
34
37
 
@@ -53,7 +56,7 @@ require "kobako"
53
56
 
54
57
  sandbox = Kobako::Sandbox.new
55
58
 
56
- result = sandbox.run(<<~RUBY)
59
+ result = sandbox.eval(<<~RUBY)
57
60
  1 + 2
58
61
  RUBY
59
62
 
@@ -73,14 +76,14 @@ sandbox = Kobako::Sandbox.new
73
76
  sandbox.define(:KV).bind(:Lookup, ->(key) { redis.get(key) })
74
77
  sandbox.define(:Log).bind(:Sink, ->(msg) { logger.info(msg) })
75
78
 
76
- sandbox.run(<<~RUBY)
79
+ sandbox.eval(<<~RUBY)
77
80
  Log::Sink.call("starting")
78
81
  KV::Lookup.call("user_42")
79
82
  RUBY
80
83
  # => "..." (the redis value)
81
84
  ```
82
85
 
83
- Names must match the Ruby constant pattern `/\A[A-Z]\w*\z/`. Services declared before the first `#run` remain active across subsequent runs; `define` after the first `#run` raises `ArgumentError`.
86
+ Names must match the Ruby constant pattern `/\A[A-Z]\w*\z/`. Services declared before the first invocation remain active across subsequent invocations; `define` after the first invocation (`#eval` or `#run`) raises `ArgumentError`.
84
87
 
85
88
  ### Keyword arguments
86
89
 
@@ -89,18 +92,18 @@ Keyword keys travel as Symbols and reach the host method as keyword arguments:
89
92
  ```ruby
90
93
  sandbox.define(:Geo).bind(:Lookup, ->(name:, region:) { "#{region}/#{name}" })
91
94
 
92
- sandbox.run('Geo::Lookup.call(name: "alice", region: "us")')
95
+ sandbox.eval('Geo::Lookup.call(name: "alice", region: "us")')
93
96
  # => "us/alice"
94
97
  ```
95
98
 
96
- ## Per-run caps
99
+ ## Per-invocation caps
97
100
 
98
- Each Sandbox enforces a wall-clock timeout and a guest linear-memory cap on every `#run`. Both default to safe values; pass `nil` to `timeout` or `memory_limit` to disable that cap. The output caps (`stdout_limit` / `stderr_limit`) cannot be disabled — pass a large Integer instead.
101
+ Each Sandbox enforces a wall-clock timeout and a guest linear-memory cap on every invocation (`#eval` or `#run`). Both default to safe values; pass `nil` to `timeout` or `memory_limit` to disable that cap. The output caps (`stdout_limit` / `stderr_limit`) cannot be disabled — pass a large Integer instead.
99
102
 
100
103
  ```ruby
101
104
  sandbox = Kobako::Sandbox.new(
102
105
  timeout: 5.0, # seconds, default 60.0
103
- memory_limit: 10 * 1024 * 1024, # bytes, default 5 MiB
106
+ memory_limit: 10 * 1024 * 1024, # bytes, default 1 MiB
104
107
  stdout_limit: 64 * 1024, # bytes, default 1 MiB
105
108
  stderr_limit: 64 * 1024
106
109
  )
@@ -109,20 +112,24 @@ sandbox = Kobako::Sandbox.new(
109
112
  | Cap | Raises (subclass of `TrapError`) | Default |
110
113
  |----------------|------------------------------------|----------|
111
114
  | `timeout` | `Kobako::TimeoutError` | 60.0 s |
112
- | `memory_limit` | `Kobako::MemoryLimitError` | 5 MiB |
115
+ | `memory_limit` | `Kobako::MemoryLimitError` | 1 MiB |
113
116
  | `stdout_limit` | output silently clipped at cap | 1 MiB |
114
117
  | `stderr_limit` | output silently clipped at cap | 1 MiB |
115
118
 
116
- The timeout deadline is absolute wall-clock from `#run` entry and is checked at guest Wasm safepoints. Long-running host Service callbacks still consume wall-clock time but do not themselves trap — the next guest safepoint will trap immediately on return if the deadline has passed.
119
+ The timeout deadline is absolute wall-clock from invocation entry and is checked at guest Wasm safepoints. Long-running host Service callbacks still consume wall-clock time but do not themselves trap — the next guest safepoint will trap immediately on return if the deadline has passed.
120
+
121
+ `memory_limit` is scoped to the **per-invocation linear-memory delta** — the budget covers how much the current `#eval` / `#run` may grow `memory.grow` past the size observed at invocation entry. The mruby image's initial allocation and prior invocations' high-water mark are folded into that entry baseline, so a Sandbox reused across many invocations does not silently accumulate against a global budget.
122
+
123
+ The 1 MiB default targets lightweight dynamic RPC workloads — short scripts that orchestrate Service calls, return small structured values, or replace a tool-calling layer in an AI Agent's Code Mode dispatch. Bump `memory_limit` when scripts compose multi-hundred-KiB strings, hold large composite return values, or run computations that allocate substantial intermediate state. Because the cap resets every invocation, multi-call patterns on one Sandbox do not need a budget that covers their cumulative footprint — only the largest single invocation's working set.
117
124
 
118
125
  ## Capturing stdout and stderr
119
126
 
120
- Guest output is captured into per-run buffers and exposed independently from the return value. The buffers cover the full Ruby IO surface — `puts`, `print`, `printf`, `p`, `<<`, and writes through `$stdout` / `$stderr` — all routed through the host-captured WASI pipe.
127
+ Guest output is captured into per-invocation buffers and exposed independently from the return value. The buffers cover the full Ruby IO surface — `puts`, `print`, `printf`, `p`, `<<`, and writes through `$stdout` / `$stderr` — all routed through the host-captured WASI pipe.
121
128
 
122
129
  ```ruby
123
130
  sandbox = Kobako::Sandbox.new
124
131
 
125
- result = sandbox.run(<<~RUBY)
132
+ result = sandbox.eval(<<~RUBY)
126
133
  puts "hello"
127
134
  warn "be careful"
128
135
  42
@@ -133,24 +140,24 @@ sandbox.stdout # => "hello\n"
133
140
  sandbox.stderr # => "be careful\n"
134
141
  ```
135
142
 
136
- Each `#run` clears the buffers at start. Output past the per-channel cap is clipped at the cap boundary — `#run` still returns normally, the bytes carry no truncation sentinel, and `#stdout_truncated?` / `#stderr_truncated?` flip to `true`.
143
+ Each invocation clears the buffers at start. Output past the per-channel cap is clipped at the cap boundary — the invocation still returns normally, the bytes carry no truncation sentinel, and `#stdout_truncated?` / `#stderr_truncated?` flip to `true`.
137
144
 
138
145
  ```ruby
139
146
  sandbox = Kobako::Sandbox.new(stdout_limit: 64 * 1024)
140
- sandbox.run('puts "a" * 100_000')
147
+ sandbox.eval('puts "a" * 100_000')
141
148
  sandbox.stdout.bytesize # => 65_536
142
149
  sandbox.stdout_truncated? # => true
143
150
  ```
144
151
 
145
152
  ## Error handling
146
153
 
147
- Every `#run` either returns a value or raises exactly one of three classes:
154
+ Every invocation (`#eval` or `#run`) either returns a value or raises exactly one of three classes:
148
155
 
149
156
  ```ruby
150
157
  begin
151
- sandbox.run(script)
158
+ sandbox.eval(script)
152
159
  rescue Kobako::TrapError => e
153
- # Wasm engine fault OR per-run cap exhaustion:
160
+ # Wasm engine fault OR per-invocation cap exhaustion:
154
161
  # - Kobako::TimeoutError (wall-clock timeout)
155
162
  # - Kobako::MemoryLimitError (memory_limit exceeded)
156
163
  # - Kobako::TrapError (engine crash / wire-violation fallback)
@@ -166,9 +173,13 @@ end
166
173
 
167
174
  `SandboxError` and `ServiceError` carry structured fields (`origin`, `klass`, `backtrace_lines`, `details`) when the guest produced a panic envelope. Named subclasses:
168
175
 
169
- - `Kobako::TimeoutError` / `Kobako::MemoryLimitError` — per-run cap exhaustion (subclasses of `TrapError`).
170
- - `Kobako::ServiceError::Disconnected` — RPC target Handle has been invalidated.
171
- - `Kobako::HandleTableExhausted` per-run Handle counter reached its cap (2³¹ − 1); subclass of `SandboxError`.
176
+ | Class | Parent | Trigger |
177
+ |----------------------------------------|--------------------|------------------------------------------------------------------------------------------|
178
+ | `Kobako::TimeoutError` | `TrapError` | Per-invocation `timeout` exhausted |
179
+ | `Kobako::MemoryLimitError` | `TrapError` | Per-invocation `memory_limit` exhausted |
180
+ | `Kobako::ServiceError::Disconnected` | `ServiceError` | RPC target Handle has been invalidated |
181
+ | `Kobako::HandleTableExhausted` | `SandboxError` | Per-invocation Handle counter reached its 2³¹ − 1 cap |
182
+ | `Kobako::BytecodeError` | `SandboxError` | `#preload(binary:)` payload failed RITE structural validation at first invocation replay |
172
183
 
173
184
  ## Capability Handles
174
185
 
@@ -182,57 +193,112 @@ end
182
193
 
183
194
  sandbox.define(:Factory).bind(:Make, ->(name) { Greeter.new(name) })
184
195
 
185
- sandbox.run(<<~RUBY)
196
+ sandbox.eval(<<~RUBY)
186
197
  g = Factory::Make.call("Bob") # g is a Kobako::RPC::Handle proxy
187
198
  g.greet # second RPC, routed to the Greeter
188
199
  RUBY
189
200
  # => "hi, Bob"
190
201
  ```
191
202
 
192
- Handles are scoped to a single `#run` — a Handle obtained in run N is invalid in run N+1, even on the same Sandbox.
203
+ Handles are scoped to a single invocation — a Handle obtained in invocation N is invalid in invocation N+1, even on the same Sandbox.
193
204
 
194
205
  ## Setup-once, run-many
195
206
 
196
- A single Sandbox can serve many script executions. Service bindings persist; capability state (Handles, stdout, stderr) resets between runs.
207
+ A single Sandbox can serve many invocations. Service bindings and preloaded snippets persist; capability state (Handles, stdout, stderr) resets between invocations.
197
208
 
198
209
  ```ruby
199
210
  sandbox = Kobako::Sandbox.new
200
211
  sandbox.define(:Data).bind(:Fetch, ->(id) { records[id] })
201
212
 
202
- sandbox.run('Data::Fetch.call("a")') # => "..."
203
- sandbox.run('Data::Fetch.call("b")') # => "..." (same bindings, fresh state)
213
+ sandbox.eval('Data::Fetch.call("a")') # => "..."
214
+ sandbox.eval('Data::Fetch.call("b")') # => "..." (same bindings, fresh state)
204
215
  ```
205
216
 
206
217
  For workloads that must be isolated from each other (e.g., one Sandbox per tenant, per student submission), construct a fresh `Kobako::Sandbox` per scope. wasmtime's Engine and the compiled Module are cached at process scope, so additional Sandboxes amortize cold-start cost automatically.
207
218
 
219
+ ## Preloaded snippets and entrypoint dispatch
220
+
221
+ `Sandbox#preload` registers named mruby snippets that replay against the fresh `mrb_state` before every invocation; `Sandbox#run(:Target, *args, **kwargs)` dispatches into a top-level `Object` constant defined by those snippets and returns the value of `Target.call(*args, **kwargs)`. Together they cover setup-once / dispatch-many workloads where the same logic is exercised across many requests.
222
+
223
+ ```ruby
224
+ sandbox = Kobako::Sandbox.new
225
+ sandbox.preload(code: "Adder = ->(a, b) { a + b }", name: :Adder)
226
+ sandbox.preload(code: 'Greeter = ->(name:) { "hello, #{name}" }', name: :Greeter)
227
+
228
+ sandbox.run(:Adder, 2, 3) # => 5
229
+ sandbox.run(:Greeter, name: "world") # => "hello, world"
230
+ ```
231
+
232
+ `#preload` accepts two payload forms:
233
+
234
+ | Form | Signature | Snippet name source | Validation timing |
235
+ |----------|----------------------------------------|-------------------------------------|------------------------------------------------------------------------------------------|
236
+ | Source | `preload(code: "...", name: :Const)` | The `name:` keyword | Trial-compiled at preload time; compile errors raise immediately |
237
+ | Bytecode | `preload(binary: bytes)` | Read from the bytecode's `debug_info` | Structural validation runs at first invocation; failure raises `Kobako::BytecodeError` |
238
+
239
+ The source form trial-compiles each snippet against a fresh `mrb_state` at preload time, so compile errors surface immediately at the `#preload` call. The bytecode form treats `binary:` as opaque bytes and defers RITE version / body validation to the first invocation's replay, because that is when the payload loads into a fresh `mrb_state`. Bytecode compiled without `debug_info` (`mrbc` without `-g`) is still accepted — only its backtrace frames are omitted, while exception class, message, and `origin` attribution are preserved.
240
+
241
+ Snippets replay in insertion order, so later snippets can reference constants defined by earlier ones. The snippet table is sealed by the first invocation alongside Service registration; additional `#preload` calls after the first `#eval` or `#run` raise `ArgumentError`.
242
+
243
+ `#run` resolves `target` (Symbol or String, normalized to Symbol) only as a top-level `Object` constant — `::`-segmented names and lowercase forms fail at host pre-flight with `ArgumentError`. A `Kobako::SandboxError` surfaces when the constant is missing or does not respond to `#call`.
244
+
245
+ ### Choosing between source and bytecode
246
+
247
+ Use the **source form** when snippets are authored in your repo or generated at boot — compile errors land at the `#preload` call so a misbehaving snippet fails fast at setup time, and no separate `mrbc` toolchain is needed. The trial-compile happens once per snippet (~2.5 µs per snippet) and is paid at preload, not on the request hot path.
248
+
249
+ Use the **bytecode form** when snippets ship as build artifacts from a pipeline that runs `mrbc` separately — for example, when source bodies should not be embedded in the running process, when you want a build step that compiles and packages snippets ahead of release, or when you want `Exception#backtrace` frames attributed to the bytecode's `debug_info` filename rather than a host-supplied `name:` keyword. Structural validation (RITE version, body integrity) is deferred to the first invocation, so a malformed bytecode payload surfaces as `Kobako::BytecodeError` on the first `#eval` or `#run`, not at `#preload`.
250
+
251
+ Both forms behave identically at dispatch time and replay through the same per-invocation path, so the choice between them is about your build / distribution pipeline and where you want errors to land, not about runtime cost.
252
+
208
253
  ## Performance
209
254
 
210
- Headline numbers from the current baseline (macOS arm64, Ruby 3.4.7, YJIT off full results in [`benchmark/results/`](benchmark/results) and [`benchmark/README.md`](benchmark/README.md)).
255
+ Order-of-magnitude figures for capacity planning on macOS arm64, Ruby 3.4.7, YJIT off. Absolute values vary by hardware but the ratios are stable across machines. Detailed numbers and methodology live in [`benchmark/README.md`](benchmark/README.md).
211
256
 
212
- | What | Cost |
213
- |---|---|
214
- | First `Sandbox.new` in a fresh process (Engine init + Module JIT) | ~2.0 s one-time |
215
- | Subsequent `Sandbox.new` (cache warm) | ~130 µs |
216
- | Reusing a Sandbox for one `#run("nil")` | ~135 µs |
217
- | Fresh Sandbox per request — the tenant-isolation pattern | ~275 µs (+140 µs vs reuse) |
218
- | Per-RPC cost amortized across 1 000 calls in one `#run` | ~35 µs |
219
- | 100 000-iteration integer XOR loop in mruby | ~200 ms |
220
- | 1 000 Onigmo `Regexp =~` matches | ~14 µs per match |
221
- | Process RSS after the first `Sandbox.new` | ~150 MB (one-time) |
222
- | Memory per additional Sandbox | ~575 KB |
223
- | 1 000 isolated tenants in one process | ~730 MB total |
224
- | Aggregate throughput across N Threads | GVL-bound — wasm execution serialized, modest scaling from Ruby-side overlap |
225
-
226
- Practical implications:
227
-
228
- - **Pre-warm at boot.** The ~2 s first-Sandbox cost is paid once per process; every subsequent Sandbox amortizes to micro-, not seconds. Construct one Sandbox at boot before serving requests.
229
- - **Tenant isolation is affordable.** Per-request Sandbox construction adds ~140 µs of overhead; per-tenant RSS budget is ~575 KB plus one-time ~130 MB for the engine. 1 000 isolated tenants in a single Sidekiq / Puma worker is well within typical RSS limits.
230
- - **Batch RPCs inside one `#run`.** A single Service call costs ~135 µs because each `#run` carries ~130 µs of setup; 1 000 calls inside one `#run` reduce the per-call cost to ~35 µs.
231
-
232
- A +10% regression on any of the five SPEC-mandated benchmarks blocks release. See [`benchmark/README.md`](benchmark/README.md) for the full per-suite breakdown and known measurement caveats.
257
+ ### Lifecycle costs
258
+
259
+ | Phase | Cost |
260
+ |-------------------------------------------------------------|-------------------------------------------------|
261
+ | First `Sandbox.new` in a fresh process (Engine + Module JIT) | ~600 ms one-time |
262
+ | Subsequent `Sandbox.new` (Engine cache warm) | ~130 µs |
263
+ | Reusing a Sandbox for one `#eval("nil")` | ~135 µs |
264
+ | Fresh `Sandbox.new` per request | ~275 µs (≈ +140 µs vs reuse) |
265
+ | Warm `#run(:Entrypoint, ...)` dispatch | ~165 µs |
266
+ | Per-RPC cost amortized inside one invocation | ~6.6 µs (1 000 RPCs in one `#eval` ≈ 6.6 ms) |
267
+ | 100 000-iteration integer XOR loop in mruby | ~43 ms |
268
+ | 1 000 Onigmo `Regexp =~` matches | ~3 µs each |
269
+
270
+ The ~600 ms cold start dominates the first Sandbox in a process — wasmtime JIT-compiles the precompiled `kobako.wasm` Module and the result is cached at process scope. Construct one Sandbox at boot before serving requests so the JIT cost lands off the hot path.
271
+
272
+ ### Memory budget
273
+
274
+ | Allocation | Cost |
275
+ |---------------------------------------------|----------------------------------------------------------------------------|
276
+ | Process RSS after first `Sandbox.new` | ~150-180 MB (one-time engine + module + first instance) |
277
+ | Per additional Sandbox | ~580 KB (Wasm instance + linear memory + WASI capture pipes) |
278
+ | 1 000 isolated tenants in one process | ~750 MB total |
279
+
280
+ Use these as upper-bound budgets for capacity planning, not lower bounds — actual RSS shifts ~30% with host process load and macOS allocator state.
281
+
282
+ ### Choosing your pattern
283
+
284
+ When the script is ad-hoc (LLM-generated, untrusted user input) and only runs once, use `Sandbox#eval(source)`. Per-invocation cost is ~135 µs of setup plus the script's own runtime; mruby parses the source on every call.
285
+
286
+ When you have a fixed set of entrypoints exercised many times — a stable AI Agent tool-call protocol, a plug-in registry loaded at boot, a small library of host-side commands — preload the entrypoints via `Sandbox#preload(code:, name:)` once at setup and dispatch via `Sandbox#run(:Target, *args, **kwargs)`. The mruby source compile (~2.5 µs per snippet) lands once at preload, not on every request, and warm dispatch costs ~165 µs.
287
+
288
+ Mind the snippet replay cost. Every preloaded snippet replays into a fresh `mrb_state` before **every** invocation, whether the invocation is `#eval` or `#run`, at ~7-9 µs per snippet per invocation. Preloading 8 helpers adds ~60 µs to every subsequent invocation; preloading 64 helpers adds ~565 µs. Keep the snippet count proportionate to how often the helpers are actually used — preloading rarely-touched helpers is more expensive than inlining or re-eval'ing them.
289
+
290
+ For tenant isolation between mutually untrusted scopes, construct a fresh `Kobako::Sandbox` per scope. Per-request construction costs ~140 µs over reuse plus ~580 KB of RSS — comfortably affordable for 1 000+ isolated tenants in one Sidekiq / Puma worker. Reuse a Sandbox when all requests share one trust scope; isolate when scripts come from many.
291
+
292
+ ### Concurrency
293
+
294
+ `ext/` does not release the GVL during wasmtime execution, so wasm work is GVL-serialized: aggregate throughput across N Threads stays around 7-8k `#eval`/s regardless of N. Ruby-side `#eval` setup can still overlap, so a short `#eval` running while another Thread is in a long `#eval` is slowed by ~2× (not 10×) — host-side synchronization yields the GVL and the contending Thread interleaves. Mixed short / long workloads in one process do not deadlock.
295
+
296
+ ### Regression gate
297
+
298
+ A +10% regression on any of the five SPEC-mandated benchmarks (cold_start, RPC roundtrip, codec, mruby VM, HandleTable) blocks release. Full per-suite breakdown in [`benchmark/README.md`](benchmark/README.md).
233
299
 
234
300
  ```bash
235
- bundle exec rake bench # five gated regression benchmarks (≤ 1 MiB payloads, ~5-8 min)
301
+ bundle exec rake bench # five gated regression benchmarks (~5-8 min, ≤ 1 MiB payloads)
236
302
  ```
237
303
 
238
304
  ## Development
data/data/kobako.wasm CHANGED
Binary file
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kobako"
3
- version = "0.2.1"
3
+ version = "0.3.0"
4
4
  edition = "2021"
5
5
  authors = ["Aotokitsuruya <contact@aotoki.me>"]
6
6
  license = "Apache-2.0"
@@ -29,7 +29,7 @@ wasmtime = { version = "44.0.1", default-features = false, features = [
29
29
  "wat",
30
30
  ] }
31
31
  # wasmtime-wasi provides WASI preview1 support for routing guest stdout/stderr
32
- # into in-memory buffers (SPEC.md §B-04). The `p1` feature enables the
32
+ # into in-memory buffers (docs/behavior.md §B-04). The `p1` feature enables the
33
33
  # WasiCtxBuilder + preview1 adapter which wires fd 1/2 to pipes. We omit
34
34
  # `p2` (component-model) and `p0`/`p3` (async) because kobako runs
35
35
  # synchronous sandboxes only.
@@ -34,7 +34,7 @@ static SHARED_ENGINE: OnceLock<WtEngine> = OnceLock::new();
34
34
  static MODULE_CACHE: OnceLock<Mutex<HashMap<PathBuf, WtModule>>> = OnceLock::new();
35
35
 
36
36
  /// Ticker cadence for the process-singleton epoch ticker. Bounds the
37
- /// granularity of the SPEC.md B-01 wall-clock timeout: the
37
+ /// granularity of the docs/behavior.md B-01 wall-clock timeout: the
38
38
  /// `epoch_deadline_callback` fires once per tick (`Continue(1)`), so the
39
39
  /// trap can lag the deadline by at most one tick under nominal
40
40
  /// scheduling. 10 ms keeps the lag small enough that it does not skew
@@ -53,8 +53,8 @@ const EPOCH_TICK: Duration = Duration::from_millis(10);
53
53
  /// instructions.
54
54
  ///
55
55
  /// Also enables `epoch_interruption(true)` so every Store can install an
56
- /// `epoch_deadline_callback` for the per-run wall-clock cap (SPEC.md
57
- /// B-01, E-19). The first call spawns the process-singleton ticker
56
+ /// `epoch_deadline_callback` for the per-run wall-clock cap
57
+ /// (docs/behavior.md B-01, E-19). The first call spawns the process-singleton ticker
58
58
  /// thread that drives `engine.increment_epoch()` at [`EPOCH_TICK`]
59
59
  /// cadence; subsequent calls reuse the same engine and ticker.
60
60
  pub(crate) fn shared_engine() -> Result<&'static WtEngine, MagnusError> {
@@ -3,7 +3,7 @@
3
3
  //! When the guest invokes the wasm import declared in
4
4
  //! `wasm/kobako-wasm/src/abi.rs`, wasmtime calls back into the host
5
5
  //! through the closure built in [`super::instance::Instance::build`].
6
- //! That closure delegates here. The dispatcher (SPEC.md B-12 / B-13):
6
+ //! That closure delegates here. The dispatcher (docs/behavior.md B-12 / B-13):
7
7
  //!
8
8
  //! 1. Reads the Request bytes from guest linear memory.
9
9
  //! 2. Hands them to the Ruby-side `Kobako::RPC::Server` and recovers
@@ -19,6 +19,31 @@
19
19
  //! return to a trap. Failures during normal dispatch surface as
20
20
  //! Response.err envelopes from the Server itself — they never reach
21
21
  //! this 0-return path.
22
+ //!
23
+ //! ## Why this module writes to `stderr`
24
+ //!
25
+ //! This file is the one place in `ext/` that deliberately prints
26
+ //! through `eprintln!`. The host normally surfaces faults by
27
+ //! raising a `MagnusError` back into Ruby; the dispatcher contract
28
+ //! is the exception — it must return a packed `i64` to the guest
29
+ //! and cannot raise, so a 0 return is the only signal the wasm side
30
+ //! receives. The guest collapses every 0 into the same trap, so the
31
+ //! Ruby host has no way to attribute the failure to a specific
32
+ //! step (missing `memory` export vs. no Server bound vs. Server
33
+ //! raised vs. `__kobako_alloc` returned 0 vs. `memory.write`
34
+ //! rejected).
35
+ //!
36
+ //! [`handle`] writes a single `[kobako-dispatch] <reason>` line to
37
+ //! `stderr` on each failure path so operators have a breadcrumb to
38
+ //! correlate the trap with the actual cause. The line is emitted in
39
+ //! both debug and release builds on purpose: dispatcher failures
40
+ //! are wire-layer faults rather than expected error paths
41
+ //! (`Kobako::Sandbox` always installs a Server, the Server is
42
+ //! contracted never to raise, etc.), so the "release-build noise"
43
+ //! cost is bounded — under normal operation the line is never
44
+ //! written. Operators that need to silence the channel can redirect
45
+ //! the host process's stderr, but the kobako convention is "ext
46
+ //! never logs" plus this single, named exception.
22
47
 
23
48
  use magnus::value::{Opaque, ReprValue};
24
49
  use magnus::{Error as MagnusError, RString, Ruby, Value};
@@ -28,27 +53,48 @@ use super::host_state::HostState;
28
53
 
29
54
  /// Drive a single `__kobako_dispatch` invocation end-to-end. Entry point
30
55
  /// from the wasmtime closure built in [`super::instance::Instance::build`].
56
+ ///
57
+ /// Returns the packed `(ptr<<32)|len` u64 on success, 0 on any
58
+ /// wire-layer fault. Failure paths log a `[kobako-dispatch]` line to
59
+ /// `stderr` so operators have a breadcrumb when the guest sees a 0
60
+ /// return and traps; before this every failure was silent. The Server
61
+ /// itself is contracted never to raise (it folds Service exceptions
62
+ /// into Response.err envelopes), so reaching the failure path is
63
+ /// always a wiring bug or wire-layer fault rather than an expected
64
+ /// path.
31
65
  pub(crate) fn handle(caller: &mut Caller<'_, HostState>, req_ptr: i32, req_len: i32) -> i64 {
32
- let req_bytes = match read_caller_memory(caller, req_ptr, req_len) {
33
- Some(b) => b,
34
- None => return 0,
35
- };
66
+ match try_handle(caller, req_ptr, req_len) {
67
+ Ok(packed) => packed,
68
+ Err(reason) => {
69
+ eprintln!("[kobako-dispatch] {}", reason);
70
+ 0
71
+ }
72
+ }
73
+ }
36
74
 
37
- // No Server bound return 0 to signal a wire-layer fault; the guest
38
- // maps a 0 return to a trap. `Kobako::Sandbox` always installs a
39
- // Server before invoking the guest, so reaching this branch indicates
40
- // a misuse rather than a normal control path.
41
- let server = match caller.data().server() {
42
- Some(d) => d,
43
- None => return 0,
44
- };
75
+ /// Result-returning core of [`handle`]. Pulled out so each early
76
+ /// failure path carries a diagnostic string instead of an opaque 0.
77
+ fn try_handle(
78
+ caller: &mut Caller<'_, HostState>,
79
+ req_ptr: i32,
80
+ req_len: i32,
81
+ ) -> Result<i64, &'static str> {
82
+ let req_bytes = read_caller_memory(caller, req_ptr, req_len)
83
+ .ok_or("guest 'memory' export missing or request slice out of bounds")?;
45
84
 
46
- let resp_bytes = match invoke_server(server, &req_bytes) {
47
- Ok(b) => b,
48
- Err(_) => return 0,
49
- };
85
+ // `Kobako::Sandbox` always installs a Server before invoking the
86
+ // guest, so reaching this branch indicates a misuse rather than a
87
+ // normal control path.
88
+ let server = caller
89
+ .data()
90
+ .server()
91
+ .ok_or("no Ruby Server bound — Sandbox#run must precede __kobako_dispatch")?;
92
+
93
+ let resp_bytes = invoke_server(server, &req_bytes).map_err(|_| {
94
+ "Ruby Server#dispatch raised — contract is to fold faults into Response.err"
95
+ })?;
50
96
 
51
- write_response(caller, &resp_bytes).unwrap_or(0)
97
+ write_response(caller, &resp_bytes)
52
98
  }
53
99
 
54
100
  /// Call the Ruby Server's `#dispatch(request_bytes)` method and return
@@ -56,43 +102,48 @@ pub(crate) fn handle(caller: &mut Caller<'_, HostState>, req_ptr: i32, req_len:
56
102
  /// failed (it is contracted never to raise — see
57
103
  /// `Kobako::RPC::Server#dispatch`), which we treat as a wire-layer fault.
58
104
  fn invoke_server(server: Opaque<Value>, req_bytes: &[u8]) -> Result<Vec<u8>, MagnusError> {
59
- // The wasmtime callback runs on the same Ruby thread that called
60
- // Sandbox#run — the invariant SPEC Implementation Standards
61
- // Architecture pins for the host gem — so `Ruby::get()` is always
62
- // available here. Panicking with `expect` localises the violation
63
- // rather than letting a nonsense error propagate.
105
+ // The wasmtime callback runs on the same Ruby thread that called the
106
+ // active Sandbox invocation (#eval or #run) — the invariant SPEC
107
+ // Implementation Standards Architecture pins for the host gem — so
108
+ // `Ruby::get()` is always available here. Panicking with `expect`
109
+ // localises the violation rather than letting a nonsense error
110
+ // propagate.
64
111
  let ruby = Ruby::get().expect("Ruby handle unavailable in __kobako_dispatch");
65
112
  let server_value: Value = ruby.get_inner(server);
66
113
  let req_str = ruby.str_from_slice(req_bytes);
67
114
  let resp: RString = server_value.funcall("dispatch", (req_str,))?;
68
- // SAFETY: the returned RString is held by the Ruby VM for the duration of
69
- // this scope; copying its bytes into a Vec is a defensive standard pattern.
70
- let bytes = unsafe { resp.as_slice() }.to_vec();
71
- Ok(bytes)
115
+ Ok(super::rstring_to_vec(resp))
72
116
  }
73
117
 
74
118
  /// Allocate a guest-side buffer through `__kobako_alloc` and copy the
75
119
  /// response bytes into it. Returns the packed `(ptr<<32)|len` u64.
76
- fn write_response(caller: &mut Caller<'_, HostState>, bytes: &[u8]) -> Option<i64> {
120
+ /// Each failure path carries a `&'static str` reason so the dispatcher
121
+ /// wrapper can surface a useful diagnostic rather than a silent 0.
122
+ fn write_response(caller: &mut Caller<'_, HostState>, bytes: &[u8]) -> Result<i64, &'static str> {
77
123
  let alloc = match caller.get_export("__kobako_alloc") {
78
- Some(Extern::Func(f)) => f.typed::<i32, i32>(&*caller).ok()?,
79
- _ => return None,
124
+ Some(Extern::Func(f)) => f
125
+ .typed::<i32, i32>(&*caller)
126
+ .map_err(|_| "guest '__kobako_alloc' export has wrong signature")?,
127
+ _ => return Err("guest '__kobako_alloc' export missing"),
80
128
  };
81
- let len_i32 = i32::try_from(bytes.len()).ok()?;
82
- let ptr = alloc.call(&mut *caller, len_i32).ok()?;
129
+ let len_i32 = i32::try_from(bytes.len()).map_err(|_| "response exceeds i32::MAX bytes")?;
130
+ let ptr = alloc
131
+ .call(&mut *caller, len_i32)
132
+ .map_err(|_| "__kobako_alloc trapped")?;
83
133
  if ptr == 0 {
84
- return None;
134
+ return Err("__kobako_alloc returned 0 (out of memory)");
85
135
  }
86
136
 
87
137
  let mem = match caller.get_export("memory") {
88
138
  Some(Extern::Memory(m)) => m,
89
- _ => return None,
139
+ _ => return Err("guest 'memory' export missing"),
90
140
  };
91
- mem.write(&mut *caller, ptr as usize, bytes).ok()?;
141
+ mem.write(&mut *caller, ptr as usize, bytes)
142
+ .map_err(|_| "memory.write rejected response buffer range")?;
92
143
 
93
144
  let ptr_u32 = ptr as u32;
94
145
  let len_u32 = bytes.len() as u32;
95
- Some(((ptr_u32 as i64) << 32) | (len_u32 as i64))
146
+ Ok(((ptr_u32 as i64) << 32) | (len_u32 as i64))
96
147
  }
97
148
 
98
149
  /// Copy `[ptr, ptr+len)` out of the guest's linear memory as seen from