onnx-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +334 -0
- data/Gemfile +5 -0
- data/LICENSE +21 -0
- data/README.md +301 -0
- data/Rakefile +17 -0
- data/examples/classification.rb +35 -0
- data/examples/embedding.rb +35 -0
- data/examples/real_world_demo.rb +170 -0
- data/examples/with_zvec.rb +54 -0
- data/ext/onnx_ruby/extconf.rb +75 -0
- data/ext/onnx_ruby/onnx_ruby_ext.cpp +436 -0
- data/lib/onnx_ruby/classifier.rb +107 -0
- data/lib/onnx_ruby/configuration.rb +16 -0
- data/lib/onnx_ruby/embedder.rb +147 -0
- data/lib/onnx_ruby/hub.rb +73 -0
- data/lib/onnx_ruby/lazy_session.rb +38 -0
- data/lib/onnx_ruby/model.rb +71 -0
- data/lib/onnx_ruby/reranker.rb +91 -0
- data/lib/onnx_ruby/session.rb +89 -0
- data/lib/onnx_ruby/session_pool.rb +75 -0
- data/lib/onnx_ruby/tensor.rb +92 -0
- data/lib/onnx_ruby/version.rb +5 -0
- data/lib/onnx_ruby.rb +45 -0
- data/onnx-ruby.gemspec +37 -0
- metadata +125 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 809c3c981b4ec890ed7169969b64f556dffb4a44a0c44e6126ec3f2ba28f2f07
|
|
4
|
+
data.tar.gz: 91233f1ee8c921c5cf4a2352f98e0f322aac91b27b0db01c55212c0ae8f9a750
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: b1ad99c83090be72d099836e84992d1a108b876272fd152fab1de4b43569930b1bdf985ca79e5fc8b27e3a4a8da65b33d823a560b9dd6cd4d7df384c942adb0a
|
|
7
|
+
data.tar.gz: f9a9c2b4278f7c20dffd890ece27cecf24004127944826b65bcef7a31a383c5f3799cecb01ca29e2ffd984f92698cb1ecf50e3681df2147b104c24839b9021df
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# onnx-ruby
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
Ruby bindings for [ONNX Runtime](https://github.com/microsoft/onnxruntime), Microsoft's high-performance inference engine for ONNX models. This gem wraps the ONNX Runtime C++ API using **Rice** (same approach as zvec-ruby) to give Ruby developers fast local model inference.
|
|
6
|
+
|
|
7
|
+
This unlocks: local embeddings, text classification, named entity recognition, sentiment analysis, reranking, and any other ML model exported to ONNX format — all without Python or API calls.
|
|
8
|
+
|
|
9
|
+
## Author
|
|
10
|
+
|
|
11
|
+
- Name: Johannes Dwi Cahyo
|
|
12
|
+
- GitHub: johannesdwicahyo
|
|
13
|
+
- Repo: git@github.com:johannesdwicahyo/onnx-ruby.git
|
|
14
|
+
|
|
15
|
+
## Technical Approach
|
|
16
|
+
|
|
17
|
+
### Binding Strategy: Rice 4.x (C++ → Ruby)
|
|
18
|
+
|
|
19
|
+
ONNX Runtime has a C++ API (`onnxruntime_cxx_api.h`). We wrap it using Rice, exactly like zvec-ruby.
|
|
20
|
+
|
|
21
|
+
**Important lessons from zvec-ruby to apply here:**
|
|
22
|
+
- Use `require "mkmf-rice"` (not `require "rice/extconf"`) for Rice 4.x
|
|
23
|
+
- Use `define_module_under()` and `define_enum_under()` for Rice 4.x
|
|
24
|
+
- Wrap all raw `VALUE` returns in `Rice::Object()` when pushing to Arrays
|
|
25
|
+
- Use `std::make_shared` when C++ API expects shared_ptr
|
|
26
|
+
- Extract results to Ruby Hashes/Arrays in C++ before returning (avoid dangling pointers)
|
|
27
|
+
- Ship precompiled gems — ONNX Runtime is a large C++ library, nobody wants to build it
|
|
28
|
+
- On macOS use `-force_load` for static archives with static initializers
|
|
29
|
+
- Default to safe options (like mmap=true in zvec-ruby)
|
|
30
|
+
|
|
31
|
+
### ONNX Runtime Linking
|
|
32
|
+
|
|
33
|
+
ONNX Runtime provides **prebuilt shared libraries** (`.so`/`.dylib`) for all platforms. Unlike zvec (which required building from source), we can download the official release and link against it. This is much simpler.
|
|
34
|
+
|
|
35
|
+
Download from: https://github.com/microsoft/onnxruntime/releases
|
|
36
|
+
|
|
37
|
+
The `extconf.rb` should:
|
|
38
|
+
1. Check for `ONNX_RUNTIME_DIR` env var
|
|
39
|
+
2. Check for system-installed onnxruntime via pkg-config
|
|
40
|
+
3. Auto-download the correct prebuilt release if neither found
|
|
41
|
+
|
|
42
|
+
### Precompiled Gems
|
|
43
|
+
|
|
44
|
+
For precompiled gems, statically link or bundle the ONNX Runtime `.dylib`/`.so` inside the gem. The gem will be ~50-80MB but users get zero-install experience.
|
|
45
|
+
|
|
46
|
+
## Core API Design
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
require "onnx_ruby"
|
|
50
|
+
|
|
51
|
+
# --- Session (model loading) ---
|
|
52
|
+
|
|
53
|
+
# Load a model
|
|
54
|
+
session = OnnxRuby::Session.new("model.onnx")
|
|
55
|
+
|
|
56
|
+
# With options
|
|
57
|
+
session = OnnxRuby::Session.new("model.onnx",
|
|
58
|
+
providers: [:cpu], # :cpu, :cuda, :coreml, :tensorrt
|
|
59
|
+
inter_threads: 4,
|
|
60
|
+
intra_threads: 2,
|
|
61
|
+
log_level: :warning
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Model info
|
|
65
|
+
session.inputs # => [{ name: "input_ids", type: :int64, shape: [-1, 512] }]
|
|
66
|
+
session.outputs # => [{ name: "embeddings", type: :float, shape: [-1, 384] }]
|
|
67
|
+
|
|
68
|
+
# --- Inference ---
|
|
69
|
+
|
|
70
|
+
# Run inference
|
|
71
|
+
result = session.run(
|
|
72
|
+
{ "input_ids" => [[101, 2023, 2003, 1037, 3231, 102]] },
|
|
73
|
+
)
|
|
74
|
+
result["embeddings"] # => [[0.0123, -0.0456, ...]]
|
|
75
|
+
|
|
76
|
+
# With output names
|
|
77
|
+
result = session.run(inputs, output_names: ["embeddings"])
|
|
78
|
+
|
|
79
|
+
# --- Tensor ---
|
|
80
|
+
|
|
81
|
+
# Create tensors explicitly
|
|
82
|
+
tensor = OnnxRuby::Tensor.new([1, 2, 3, 4], shape: [2, 2], dtype: :int64)
|
|
83
|
+
tensor.to_a # => [[1, 2], [3, 4]]
|
|
84
|
+
tensor.shape # => [2, 2]
|
|
85
|
+
tensor.dtype # => :int64
|
|
86
|
+
|
|
87
|
+
# From flat array
|
|
88
|
+
tensor = OnnxRuby::Tensor.float([0.1, 0.2, 0.3], shape: [1, 3])
|
|
89
|
+
|
|
90
|
+
# --- High-Level Helpers ---
|
|
91
|
+
|
|
92
|
+
# Embedding model (wraps session with pre/post processing)
|
|
93
|
+
embedder = OnnxRuby::Embedder.new("all-MiniLM-L6-v2.onnx",
|
|
94
|
+
tokenizer: "sentence-transformers/all-MiniLM-L6-v2" # requires tokenizer-ruby
|
|
95
|
+
)
|
|
96
|
+
embeddings = embedder.embed("Hello world") # => [0.0123, ...]
|
|
97
|
+
embeddings = embedder.embed_batch(["Hello", "World"]) # => [[...], [...]]
|
|
98
|
+
|
|
99
|
+
# Classifier
|
|
100
|
+
classifier = OnnxRuby::Classifier.new("intent_model.onnx",
|
|
101
|
+
tokenizer: "bert-base-uncased",
|
|
102
|
+
labels: ["greeting", "farewell", "question", "command"]
|
|
103
|
+
)
|
|
104
|
+
classifier.predict("Hello there!") # => { label: "greeting", score: 0.95 }
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Features to Implement
|
|
108
|
+
|
|
109
|
+
### Phase 1 — Core (MVP)
|
|
110
|
+
- [ ] `Session.new(path, options)` — load ONNX model
|
|
111
|
+
- [ ] `session.run(inputs)` — run inference, return outputs
|
|
112
|
+
- [ ] `session.inputs` / `session.outputs` — model metadata
|
|
113
|
+
- [ ] `Tensor` class — create and manipulate tensors
|
|
114
|
+
- [ ] Support dtypes: float32, float64, int32, int64, string, bool
|
|
115
|
+
- [ ] Support shapes: 1D, 2D, 3D, 4D tensors
|
|
116
|
+
- [ ] CPU execution provider
|
|
117
|
+
|
|
118
|
+
### Phase 2 — Providers & Options
|
|
119
|
+
- [ ] CoreML provider (macOS acceleration)
|
|
120
|
+
- [ ] CUDA provider (NVIDIA GPU)
|
|
121
|
+
- [ ] Session options: threading, memory, optimization level
|
|
122
|
+
- [ ] Model optimization: `OnnxRuby.optimize("model.onnx", "optimized.onnx")`
|
|
123
|
+
- [ ] Dynamic shapes (batching)
|
|
124
|
+
|
|
125
|
+
### Phase 3 — High-Level API
|
|
126
|
+
- [ ] `Embedder` — embedding model wrapper (tokenize → infer → normalize)
|
|
127
|
+
- [ ] `Classifier` — text classification wrapper
|
|
128
|
+
- [ ] `Reranker` — cross-encoder reranking wrapper
|
|
129
|
+
- [ ] Integration with tokenizer-ruby for text preprocessing
|
|
130
|
+
- [ ] Model hub: `OnnxRuby::Hub.download("sentence-transformers/all-MiniLM-L6-v2")`
|
|
131
|
+
|
|
132
|
+
### Phase 4 — Rails Integration
|
|
133
|
+
- [ ] `OnnxRuby.configure { |c| c.models_path = "app/models/onnx" }`
|
|
134
|
+
- [ ] Lazy model loading (load on first inference)
|
|
135
|
+
- [ ] Connection pool for thread-safe concurrent inference
|
|
136
|
+
- [ ] ActiveModel integration for embedding generation
|
|
137
|
+
|
|
138
|
+
## Project Structure
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
onnx-ruby/
|
|
142
|
+
├── CLAUDE.md
|
|
143
|
+
├── Gemfile
|
|
144
|
+
├── Rakefile
|
|
145
|
+
├── LICENSE # MIT
|
|
146
|
+
├── README.md
|
|
147
|
+
├── onnx-ruby.gemspec
|
|
148
|
+
├── lib/
|
|
149
|
+
│ ├── onnx_ruby.rb
|
|
150
|
+
│ └── onnx_ruby/
|
|
151
|
+
│ ├── version.rb
|
|
152
|
+
│ ├── session.rb
|
|
153
|
+
│ ├── tensor.rb
|
|
154
|
+
│ ├── embedder.rb
|
|
155
|
+
│ ├── classifier.rb
|
|
156
|
+
│ └── reranker.rb
|
|
157
|
+
├── ext/
|
|
158
|
+
│ └── onnx_ruby/
|
|
159
|
+
│ ├── extconf.rb
|
|
160
|
+
│ └── onnx_ruby_ext.cpp
|
|
161
|
+
├── test/
|
|
162
|
+
│ ├── test_helper.rb
|
|
163
|
+
│ ├── test_session.rb
|
|
164
|
+
│ ├── test_tensor.rb
|
|
165
|
+
│ ├── test_inference.rb
|
|
166
|
+
│ └── models/ # small test ONNX models
|
|
167
|
+
│ └── .gitkeep
|
|
168
|
+
├── script/
|
|
169
|
+
│ ├── download_onnxruntime.sh
|
|
170
|
+
│ └── package_native_gem.rb
|
|
171
|
+
└── examples/
|
|
172
|
+
├── embedding.rb
|
|
173
|
+
├── classification.rb
|
|
174
|
+
└── with_zvec.rb # full RAG example with zvec-ruby
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Dependencies
|
|
178
|
+
|
|
179
|
+
### Runtime
|
|
180
|
+
- `rice` (>= 4.0) — C++ to Ruby bindings
|
|
181
|
+
- ONNX Runtime shared library (bundled in precompiled gems)
|
|
182
|
+
|
|
183
|
+
### Optional
|
|
184
|
+
- `tokenizer-ruby` — for Embedder/Classifier text preprocessing
|
|
185
|
+
|
|
186
|
+
### Development
|
|
187
|
+
- `rake-compiler` for building native extensions
|
|
188
|
+
- `rake-compiler-dock` for cross-compilation
|
|
189
|
+
- `minitest` for testing
|
|
190
|
+
- `rake` for tasks
|
|
191
|
+
|
|
192
|
+
## Key C++ Binding Details
|
|
193
|
+
|
|
194
|
+
### ONNX Runtime C++ API Structure
|
|
195
|
+
|
|
196
|
+
```cpp
|
|
197
|
+
#include <onnxruntime_cxx_api.h>
|
|
198
|
+
|
|
199
|
+
// Key classes to wrap:
|
|
200
|
+
Ort::Env // Runtime environment (singleton)
|
|
201
|
+
Ort::Session // Model session
|
|
202
|
+
Ort::SessionOptions
|
|
203
|
+
Ort::Value // Tensor (input/output)
|
|
204
|
+
Ort::MemoryInfo // Memory allocation info
|
|
205
|
+
Ort::TypeInfo // Model input/output type info
|
|
206
|
+
Ort::TensorTypeAndShapeInfo
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### extconf.rb approach
|
|
210
|
+
|
|
211
|
+
```ruby
|
|
212
|
+
require "mkmf-rice"
|
|
213
|
+
|
|
214
|
+
# Try to find ONNX Runtime
|
|
215
|
+
ort_dir = ENV["ONNX_RUNTIME_DIR"]
|
|
216
|
+
|
|
217
|
+
unless ort_dir
|
|
218
|
+
# Auto-download prebuilt ONNX Runtime for the current platform
|
|
219
|
+
# from https://github.com/microsoft/onnxruntime/releases
|
|
220
|
+
ort_dir = download_onnxruntime() # helper function
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
dir_config("onnxruntime", "#{ort_dir}/include", "#{ort_dir}/lib")
|
|
224
|
+
$INCFLAGS << " -I#{ort_dir}/include"
|
|
225
|
+
$LDFLAGS << " -L#{ort_dir}/lib"
|
|
226
|
+
$libs << " -lonnxruntime"
|
|
227
|
+
|
|
228
|
+
have_header("onnxruntime_cxx_api.h") or
|
|
229
|
+
abort "Cannot find ONNX Runtime headers"
|
|
230
|
+
|
|
231
|
+
create_makefile("onnx_ruby/onnx_ruby_ext")
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### C++ Extension Skeleton
|
|
235
|
+
|
|
236
|
+
```cpp
|
|
237
|
+
#include <rice/rice.hpp>
|
|
238
|
+
#include <onnxruntime_cxx_api.h>
|
|
239
|
+
|
|
240
|
+
using namespace Rice;
|
|
241
|
+
|
|
242
|
+
// Global ORT environment (initialized once)
|
|
243
|
+
static Ort::Env& get_env() {
|
|
244
|
+
static Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "onnx_ruby");
|
|
245
|
+
return env;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Wrap Ort::Session
|
|
249
|
+
// Wrap Ort::Value (Tensor)
|
|
250
|
+
// Handle type conversion: Ruby Array ↔ ORT Tensor
|
|
251
|
+
// Map ORT errors to Ruby exceptions
|
|
252
|
+
|
|
253
|
+
void Init_onnx_ruby_ext() {
|
|
254
|
+
Module rb_mOnnxRuby = define_module("OnnxRuby");
|
|
255
|
+
Module rb_mExt = define_module_under(rb_mOnnxRuby, "Ext");
|
|
256
|
+
|
|
257
|
+
// Define Session, Tensor, etc.
|
|
258
|
+
}
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Critical: Tensor ↔ Ruby Array Conversion
|
|
262
|
+
|
|
263
|
+
The most complex part. Need to handle:
|
|
264
|
+
- Ruby Array of floats → ORT float32 tensor (most common for embeddings)
|
|
265
|
+
- Ruby Array of integers → ORT int64 tensor (for token IDs)
|
|
266
|
+
- Nested Ruby Arrays → multi-dimensional tensors
|
|
267
|
+
- ORT output tensors → Ruby Arrays (with proper Float wrapping via `Rice::Object(rb_float_new())`)
|
|
268
|
+
|
|
269
|
+
```cpp
|
|
270
|
+
// Ruby Array → ORT Tensor
|
|
271
|
+
Ort::Value array_to_tensor(Rice::Array arr, const std::vector<int64_t>& shape) {
|
|
272
|
+
// Flatten nested arrays
|
|
273
|
+
// Detect dtype from Ruby values
|
|
274
|
+
// Create ORT tensor with proper memory allocation
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// ORT Tensor → Ruby Array
|
|
278
|
+
Rice::Object tensor_to_array(const Ort::Value& tensor) {
|
|
279
|
+
// Read shape
|
|
280
|
+
// Read dtype
|
|
281
|
+
// Copy data to Ruby Array (with Rice::Object wrapping!)
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## Testing Strategy
|
|
286
|
+
|
|
287
|
+
- Test with small ONNX models (generate test models with Python: `torch.onnx.export`)
|
|
288
|
+
- Test model loading and metadata inspection
|
|
289
|
+
- Test inference with known inputs/outputs
|
|
290
|
+
- Test all supported dtypes
|
|
291
|
+
- Test batch inference
|
|
292
|
+
- Test error handling (invalid model, wrong input shape, etc.)
|
|
293
|
+
- Benchmark against Python's onnxruntime for correctness
|
|
294
|
+
|
|
295
|
+
### Create test models (Python script to include):
|
|
296
|
+
```python
|
|
297
|
+
# script/create_test_models.py
|
|
298
|
+
import torch
|
|
299
|
+
import torch.nn as nn
|
|
300
|
+
|
|
301
|
+
# Simple linear model for testing
|
|
302
|
+
class SimpleModel(nn.Module):
|
|
303
|
+
def __init__(self):
|
|
304
|
+
super().__init__()
|
|
305
|
+
self.linear = nn.Linear(4, 3)
|
|
306
|
+
def forward(self, x):
|
|
307
|
+
return self.linear(x)
|
|
308
|
+
|
|
309
|
+
model = SimpleModel()
|
|
310
|
+
dummy = torch.randn(1, 4)
|
|
311
|
+
torch.onnx.export(model, dummy, "test/models/simple.onnx",
|
|
312
|
+
input_names=["input"], output_names=["output"])
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## Publishing
|
|
316
|
+
|
|
317
|
+
- RubyGems.org: `gem push onnx-ruby-*.gem`
|
|
318
|
+
- gem.coop: `GEM_HOST_API_KEY=hjncPswY8PbGDfLPw4RMj928 gem push onnx-ruby-*.gem --host https://beta.gem.coop/@johannesdwicahyo`
|
|
319
|
+
|
|
320
|
+
## Notes from zvec-ruby Experience
|
|
321
|
+
|
|
322
|
+
- **Rice 4.x API**: `define_module_under()`, `define_enum_under()`, not the 3.x syntax
|
|
323
|
+
- **Rice::Object wrapping**: ALWAYS wrap `rb_float_new()`, `Qtrue`, `Qnil` in `Rice::Object()` when pushing to Arrays
|
|
324
|
+
- **shared_ptr**: Use `make_shared` when C++ expects `shared_ptr`, accept by `const T&` in bindings
|
|
325
|
+
- **Extract results in C++**: Don't try to push C++ objects directly into Ruby arrays. Extract to Hashes/Arrays first.
|
|
326
|
+
- **Precompiled gems**: Essential. Use `script/package_native_gem.rb` for macOS, `rake-compiler-dock` for Linux.
|
|
327
|
+
- **ONNX Runtime ships prebuilt binaries**: Much easier than zvec. Download from GitHub releases, link against `.dylib`/`.so`.
|
|
328
|
+
- **Static initializers**: May need `-force_load` if ONNX Runtime uses static registration patterns.
|
|
329
|
+
- **mmap/memory**: ONNX Runtime manages its own memory via allocators. Let it handle memory, don't fight it.
|
|
330
|
+
|
|
331
|
+
## Existing Ruby ONNX Solutions
|
|
332
|
+
|
|
333
|
+
- `onnxruntime` gem by ankane — exists but is FFI-based and limited. We can provide better performance and API with Rice + additional high-level features (Embedder, Classifier, Reranker).
|
|
334
|
+
- Differentiate by: better API, precompiled gems, high-level wrappers, tokenizer-ruby integration
|
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Johannes Dwi Cahyo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# onnx-ruby
|
|
2
|
+
|
|
3
|
+
High-performance [ONNX Runtime](https://onnxruntime.ai/) bindings for Ruby. Run ONNX models locally for embeddings, classification, reranking, and any other ML inference — without Python or API calls.
|
|
4
|
+
|
|
5
|
+
Built with [Rice](https://github.com/ruby-rice/rice) (C++ to Ruby bindings) wrapping the ONNX Runtime C++ API directly.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Fast inference** — native C++ bindings, not FFI
|
|
10
|
+
- **Auto-download** — ONNX Runtime is downloaded automatically during gem install
|
|
11
|
+
- **Multiple providers** — CPU, CoreML (macOS), CUDA, TensorRT
|
|
12
|
+
- **High-level wrappers** — `Embedder`, `Classifier`, `Reranker` for common ML tasks
|
|
13
|
+
- **Thread-safe** — `SessionPool` for concurrent inference in multi-threaded apps
|
|
14
|
+
- **Lazy loading** — `LazySession` loads models on first use
|
|
15
|
+
- **Rails-ready** — `OnnxRuby::Model` mixin, global configuration, connection pooling
|
|
16
|
+
- **Model hub** — download models from HuggingFace with local caching
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```ruby
|
|
21
|
+
# Gemfile
|
|
22
|
+
gem "onnx-ruby"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
bundle install
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
ONNX Runtime (v1.24.3) is automatically downloaded during native extension compilation.
|
|
30
|
+
|
|
31
|
+
To use a custom ONNX Runtime installation:
|
|
32
|
+
|
|
33
|
+
```sh
|
|
34
|
+
ONNX_RUNTIME_DIR=/path/to/onnxruntime bundle install
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
### Basic Inference
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
require "onnx_ruby"
|
|
43
|
+
|
|
44
|
+
# Load a model
|
|
45
|
+
session = OnnxRuby::Session.new("model.onnx")
|
|
46
|
+
|
|
47
|
+
# Inspect model
|
|
48
|
+
session.inputs # => [{ name: "input", type: :float32, shape: [-1, 4] }]
|
|
49
|
+
session.outputs # => [{ name: "output", type: :float32, shape: [-1, 3] }]
|
|
50
|
+
|
|
51
|
+
# Run inference
|
|
52
|
+
result = session.run({ "input" => [[1.0, 2.0, 3.0, 4.0]] })
|
|
53
|
+
result["output"] # => [[0.123, -0.456, 0.789]]
|
|
54
|
+
|
|
55
|
+
# Batch inference
|
|
56
|
+
result = session.run({ "input" => [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]] })
|
|
57
|
+
result["output"] # => [[...], [...]]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Embeddings
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
require "onnx_ruby"
|
|
64
|
+
require "tokenizers"
|
|
65
|
+
|
|
66
|
+
# With a HuggingFace tokenizer
|
|
67
|
+
tokenizer = Tokenizers::Tokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
|
68
|
+
embedder = OnnxRuby::Embedder.new("all-MiniLM-L6-v2.onnx",
|
|
69
|
+
tokenizer: tokenizer,
|
|
70
|
+
normalize: true
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Single embedding
|
|
74
|
+
embedding = embedder.embed("Hello world") # => [0.0123, -0.0456, ...] (384 dims)
|
|
75
|
+
|
|
76
|
+
# Batch embedding
|
|
77
|
+
embeddings = embedder.embed_batch(["Hello", "World"]) # => [[...], [...]]
|
|
78
|
+
|
|
79
|
+
# Without tokenizer (pre-tokenized input)
|
|
80
|
+
embedder = OnnxRuby::Embedder.new("model.onnx")
|
|
81
|
+
embedding = embedder.embed({
|
|
82
|
+
"input_ids" => [101, 2023, 2003, 102],
|
|
83
|
+
"attention_mask" => [1, 1, 1, 1]
|
|
84
|
+
})
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Classification
|
|
88
|
+
|
|
89
|
+
```ruby
|
|
90
|
+
classifier = OnnxRuby::Classifier.new("classifier.onnx",
|
|
91
|
+
labels: ["greeting", "farewell", "question", "command"]
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# With feature vectors
|
|
95
|
+
result = classifier.predict([0.1, 0.2, 0.3, 0.4])
|
|
96
|
+
# => { label: "greeting", score: 0.95, scores: [0.95, 0.02, 0.02, 0.01] }
|
|
97
|
+
|
|
98
|
+
# Batch
|
|
99
|
+
results = classifier.predict_batch([features1, features2])
|
|
100
|
+
|
|
101
|
+
# With tokenizer for text input
|
|
102
|
+
classifier = OnnxRuby::Classifier.new("bert-classifier.onnx",
|
|
103
|
+
tokenizer: "bert-base-uncased",
|
|
104
|
+
labels: ["positive", "negative"]
|
|
105
|
+
)
|
|
106
|
+
classifier.predict("This is great!")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Reranking
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
reranker = OnnxRuby::Reranker.new("reranker.onnx", tokenizer: tokenizer)
|
|
113
|
+
|
|
114
|
+
# Rerank documents by relevance to a query
|
|
115
|
+
results = reranker.rerank("What is Ruby?", [
|
|
116
|
+
"Ruby is a programming language",
|
|
117
|
+
"The weather is nice today",
|
|
118
|
+
"Rails is built with Ruby"
|
|
119
|
+
])
|
|
120
|
+
# => [
|
|
121
|
+
# { document: "Ruby is a programming language", score: 0.98, index: 0 },
|
|
122
|
+
# { document: "Rails is built with Ruby", score: 0.85, index: 2 },
|
|
123
|
+
# { document: "The weather is nice today", score: 0.01, index: 1 }
|
|
124
|
+
# ]
|
|
125
|
+
|
|
126
|
+
# Raw scoring with pre-tokenized inputs
|
|
127
|
+
scores = reranker.score(
|
|
128
|
+
input_ids: [[101, 2023, 102], [101, 7592, 102]],
|
|
129
|
+
attention_mask: [[1, 1, 1], [1, 1, 1]]
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Session Options
|
|
134
|
+
|
|
135
|
+
```ruby
|
|
136
|
+
session = OnnxRuby::Session.new("model.onnx",
|
|
137
|
+
providers: [:coreml, :cpu], # execution providers (fallback order)
|
|
138
|
+
optimization_level: :all, # :none, :basic, :extended, :all
|
|
139
|
+
intra_threads: 4, # threads within an operator
|
|
140
|
+
inter_threads: 2, # threads between operators
|
|
141
|
+
execution_mode: :parallel, # :sequential or :parallel
|
|
142
|
+
memory_pattern: true, # pre-allocate memory
|
|
143
|
+
cpu_mem_arena: true, # use memory arena
|
|
144
|
+
log_level: :warning # :verbose, :info, :warning, :error, :fatal
|
|
145
|
+
)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Execution Providers
|
|
149
|
+
|
|
150
|
+
```ruby
|
|
151
|
+
# List available providers
|
|
152
|
+
OnnxRuby.available_providers
|
|
153
|
+
# => ["CoreMLExecutionProvider", "CPUExecutionProvider"]
|
|
154
|
+
|
|
155
|
+
# CoreML (macOS — uses Apple Neural Engine)
|
|
156
|
+
session = OnnxRuby::Session.new("model.onnx", providers: [:coreml])
|
|
157
|
+
|
|
158
|
+
# CUDA (NVIDIA GPU — requires CUDA build of ONNX Runtime)
|
|
159
|
+
session = OnnxRuby::Session.new("model.onnx", providers: [:cuda, :cpu])
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Model Optimization
|
|
163
|
+
|
|
164
|
+
```ruby
|
|
165
|
+
# Optimize and save a model
|
|
166
|
+
OnnxRuby.optimize("model.onnx", "model_optimized.onnx", level: :all)
|
|
167
|
+
|
|
168
|
+
# Use the optimized model
|
|
169
|
+
session = OnnxRuby::Session.new("model_optimized.onnx")
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Tensors
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
# Create typed tensors
|
|
176
|
+
tensor = OnnxRuby::Tensor.new([1, 2, 3, 4], shape: [2, 2], dtype: :int64)
|
|
177
|
+
tensor.to_a # => [[1, 2], [3, 4]]
|
|
178
|
+
tensor.shape # => [2, 2]
|
|
179
|
+
tensor.dtype # => :int64
|
|
180
|
+
|
|
181
|
+
# Convenience constructors
|
|
182
|
+
OnnxRuby::Tensor.float([0.1, 0.2, 0.3], shape: [1, 3])
|
|
183
|
+
OnnxRuby::Tensor.int64([1, 2, 3], shape: [3])
|
|
184
|
+
OnnxRuby::Tensor.double([1.0, 2.0], shape: [2])
|
|
185
|
+
OnnxRuby::Tensor.int32([1, 2], shape: [2])
|
|
186
|
+
|
|
187
|
+
# Use tensors as session input
|
|
188
|
+
tensor = OnnxRuby::Tensor.float([1.0, 2.0, 3.0, 4.0], shape: [1, 4])
|
|
189
|
+
session.run({ "input" => tensor })
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Supported dtypes: `float32`, `float64`, `int32`, `int64`, `bool`, `string`
|
|
193
|
+
|
|
194
|
+
## Thread Safety
|
|
195
|
+
|
|
196
|
+
### Session Pool
|
|
197
|
+
|
|
198
|
+
```ruby
|
|
199
|
+
# Create a pool of sessions for concurrent inference
|
|
200
|
+
pool = OnnxRuby::SessionPool.new("model.onnx", size: 5, timeout: 10)
|
|
201
|
+
|
|
202
|
+
# Auto checkout/checkin
|
|
203
|
+
result = pool.run({ "input" => data })
|
|
204
|
+
|
|
205
|
+
# Or manual block form
|
|
206
|
+
pool.with_session do |session|
|
|
207
|
+
session.run({ "input" => data })
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Pool stats
|
|
211
|
+
pool.size # => number of created sessions
|
|
212
|
+
pool.available # => number of idle sessions
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Lazy Loading
|
|
216
|
+
|
|
217
|
+
```ruby
|
|
218
|
+
# Model loads on first use, thread-safe
|
|
219
|
+
lazy = OnnxRuby::LazySession.new("model.onnx")
|
|
220
|
+
lazy.loaded? # => false
|
|
221
|
+
lazy.run(inputs)
|
|
222
|
+
lazy.loaded? # => true
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## Rails Integration
|
|
226
|
+
|
|
227
|
+
### Configuration
|
|
228
|
+
|
|
229
|
+
```ruby
|
|
230
|
+
# config/initializers/onnx_ruby.rb
|
|
231
|
+
OnnxRuby.configure do |c|
|
|
232
|
+
c.models_path = "app/models/onnx"
|
|
233
|
+
c.default_providers = [:coreml, :cpu]
|
|
234
|
+
c.default_log_level = :warning
|
|
235
|
+
c.pool_size = 5
|
|
236
|
+
c.pool_timeout = 5
|
|
237
|
+
end
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### ActiveModel Mixin
|
|
241
|
+
|
|
242
|
+
```ruby
|
|
243
|
+
class Document < ApplicationRecord
|
|
244
|
+
include OnnxRuby::Model
|
|
245
|
+
|
|
246
|
+
onnx_model "embeddings.onnx"
|
|
247
|
+
onnx_input ->(doc) {
|
|
248
|
+
# tokenize doc.content and return input hash
|
|
249
|
+
{ "input_ids" => ids, "attention_mask" => mask }
|
|
250
|
+
}
|
|
251
|
+
onnx_output "embeddings"
|
|
252
|
+
|
|
253
|
+
def generate_embedding
|
|
254
|
+
self.embedding = onnx_predict.first
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
doc = Document.find(1)
|
|
259
|
+
doc.generate_embedding # runs ONNX inference
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
The model is loaded lazily on first inference and shared across all instances.
|
|
263
|
+
|
|
264
|
+
## Model Hub
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
# Download from HuggingFace
|
|
268
|
+
path = OnnxRuby::Hub.download("sentence-transformers/all-MiniLM-L6-v2",
|
|
269
|
+
filename: "model.onnx"
|
|
270
|
+
)
|
|
271
|
+
session = OnnxRuby::Session.new(path)
|
|
272
|
+
|
|
273
|
+
# Cache management
|
|
274
|
+
OnnxRuby::Hub.cached_models # => ["/home/user/.cache/onnx_ruby/models/..."]
|
|
275
|
+
OnnxRuby::Hub.clear_cache
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Requirements
|
|
279
|
+
|
|
280
|
+
- Ruby >= 3.1
|
|
281
|
+
- C++ compiler with C++17 support
|
|
282
|
+
- ONNX Runtime (auto-downloaded during install)
|
|
283
|
+
|
|
284
|
+
### Optional
|
|
285
|
+
|
|
286
|
+
- [tokenizers](https://github.com/ankane/tokenizers-ruby) gem — for text tokenization in Embedder/Classifier/Reranker
|
|
287
|
+
|
|
288
|
+
## Development
|
|
289
|
+
|
|
290
|
+
```sh
|
|
291
|
+
git clone https://github.com/johannesdwicahyo/onnx-ruby.git
|
|
292
|
+
cd onnx-ruby
|
|
293
|
+
bundle install
|
|
294
|
+
bundle exec rake compile
|
|
295
|
+
python3 script/create_test_models.py # requires torch, onnx, onnxscript
|
|
296
|
+
bundle exec rake test
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## License
|
|
300
|
+
|
|
301
|
+
MIT License. See [LICENSE](LICENSE).
|
data/Rakefile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rake/extensiontask"
|
|
4
|
+
require "rake/testtask"
|
|
5
|
+
|
|
6
|
+
Rake::ExtensionTask.new("onnx_ruby_ext") do |ext|
|
|
7
|
+
ext.lib_dir = "lib/onnx_ruby"
|
|
8
|
+
ext.ext_dir = "ext/onnx_ruby"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
Rake::TestTask.new(:test) do |t|
|
|
12
|
+
t.libs << "test"
|
|
13
|
+
t.libs << "lib"
|
|
14
|
+
t.test_files = FileList["test/**/test_*.rb"]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
task default: %i[compile test]
|