onnx-ruby 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/MILESTONES.md +12 -0
- data/ext/onnx_ruby/onnx_ruby_ext.cpp +59 -2
- data/lib/onnx_ruby/classifier.rb +11 -17
- data/lib/onnx_ruby/configuration.rb +21 -2
- data/lib/onnx_ruby/embedder.rb +7 -16
- data/lib/onnx_ruby/lazy_session.rb +0 -2
- data/lib/onnx_ruby/reranker.rb +2 -16
- data/lib/onnx_ruby/session.rb +13 -1
- data/lib/onnx_ruby/session_pool.rb +4 -1
- data/lib/onnx_ruby/tensor.rb +8 -0
- data/lib/onnx_ruby/tokenizer_support.rb +22 -0
- data/lib/onnx_ruby/version.rb +1 -1
- data/lib/onnx_ruby.rb +1 -0
- metadata +3 -2
- data/CLAUDE.md +0 -334
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 16caafd6c79615589d18c10f16be77816875f823f0024b78d32357b025645d30
|
|
4
|
+
data.tar.gz: 915a8bbf683808063b752e81da2a5629157baf73959f99bee7a61d1c1871fad3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b3fcc3eb5eb0605ff0866af424b0436f2bd294675a5eb32c0348182ad65bea9b64997d19c3bb026c0b7b8290c8fe59038693f3737ca7984ffb4712cd0e88c2d3
|
|
7
|
+
data.tar.gz: d22068ed40991670d3dee5780339cea9232bc7627a07f796d9b0a7bc870b648250622e66b8eb3337a186eb9ff7b092e7e6bc6dd1da7c1ac4cfc99840b2b4a9ae
|
data/MILESTONES.md
ADDED
|
@@ -94,12 +94,63 @@ static Rice::Object tensor_to_ruby(const Ort::Value& tensor) {
|
|
|
94
94
|
break;
|
|
95
95
|
}
|
|
96
96
|
case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: {
|
|
97
|
-
const
|
|
97
|
+
const uint8_t* data = reinterpret_cast<const uint8_t*>(tensor.GetTensorData<bool>());
|
|
98
98
|
for (size_t i = 0; i < total; i++) {
|
|
99
99
|
flat.push(Rice::Object(data[i] ? Qtrue : Qfalse));
|
|
100
100
|
}
|
|
101
101
|
break;
|
|
102
102
|
}
|
|
103
|
+
case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: {
|
|
104
|
+
const uint8_t* data = tensor.GetTensorData<uint8_t>();
|
|
105
|
+
for (size_t i = 0; i < total; i++) {
|
|
106
|
+
flat.push(Rice::Object(INT2NUM(data[i])));
|
|
107
|
+
}
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: {
|
|
111
|
+
const int8_t* data = tensor.GetTensorData<int8_t>();
|
|
112
|
+
for (size_t i = 0; i < total; i++) {
|
|
113
|
+
flat.push(Rice::Object(INT2NUM(data[i])));
|
|
114
|
+
}
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: {
|
|
118
|
+
const uint16_t* data = tensor.GetTensorData<uint16_t>();
|
|
119
|
+
for (size_t i = 0; i < total; i++) {
|
|
120
|
+
flat.push(Rice::Object(INT2NUM(data[i])));
|
|
121
|
+
}
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: {
|
|
125
|
+
const int16_t* data = tensor.GetTensorData<int16_t>();
|
|
126
|
+
for (size_t i = 0; i < total; i++) {
|
|
127
|
+
flat.push(Rice::Object(INT2NUM(data[i])));
|
|
128
|
+
}
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: {
|
|
132
|
+
// float16 is stored as uint16_t; convert to float for Ruby
|
|
133
|
+
const uint16_t* data = tensor.GetTensorData<uint16_t>();
|
|
134
|
+
for (size_t i = 0; i < total; i++) {
|
|
135
|
+
// IEEE 754 half-precision to single-precision conversion
|
|
136
|
+
uint16_t h = data[i];
|
|
137
|
+
uint32_t sign = (h & 0x8000u) << 16;
|
|
138
|
+
uint32_t exponent = (h >> 10) & 0x1F;
|
|
139
|
+
uint32_t mantissa = h & 0x03FF;
|
|
140
|
+
uint32_t f;
|
|
141
|
+
if (exponent == 0) {
|
|
142
|
+
f = sign; // zero or subnormal (treat as zero for simplicity)
|
|
143
|
+
} else if (exponent == 31) {
|
|
144
|
+
f = sign | 0x7F800000u | (mantissa << 13); // inf or nan
|
|
145
|
+
} else {
|
|
146
|
+
f = sign | ((exponent + 112) << 23) | (mantissa << 13);
|
|
147
|
+
}
|
|
148
|
+
float val;
|
|
149
|
+
memcpy(&val, &f, sizeof(float));
|
|
150
|
+
flat.push(Rice::Object(rb_float_new(static_cast<double>(val))));
|
|
151
|
+
}
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
103
154
|
case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: {
|
|
104
155
|
size_t count = total;
|
|
105
156
|
std::vector<std::string> strings(count);
|
|
@@ -317,7 +368,13 @@ public:
|
|
|
317
368
|
}
|
|
318
369
|
|
|
319
370
|
size_t total_elements = 1;
|
|
320
|
-
for (auto dim : shape)
|
|
371
|
+
for (auto dim : shape) {
|
|
372
|
+
if (dim < 0) throw std::runtime_error("Negative shape dimension: " + std::to_string(dim));
|
|
373
|
+
if (dim > 0 && total_elements > SIZE_MAX / static_cast<size_t>(dim)) {
|
|
374
|
+
throw std::runtime_error("Shape dimension overflow");
|
|
375
|
+
}
|
|
376
|
+
total_elements *= static_cast<size_t>(dim);
|
|
377
|
+
}
|
|
321
378
|
|
|
322
379
|
if (dtype == "float") {
|
|
323
380
|
float_buffers.emplace_back(total_elements);
|
data/lib/onnx_ruby/classifier.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Classifier
|
|
5
|
+
include TokenizerSupport
|
|
6
|
+
|
|
5
7
|
attr_reader :session, :labels
|
|
6
8
|
|
|
7
9
|
def initialize(model_path, tokenizer: nil, labels: nil, **session_opts)
|
|
@@ -31,23 +33,9 @@ module OnnxRuby
|
|
|
31
33
|
|
|
32
34
|
private
|
|
33
35
|
|
|
34
|
-
def resolve_tokenizer(tokenizer)
|
|
35
|
-
return nil if tokenizer.nil?
|
|
36
|
-
|
|
37
|
-
if tokenizer.respond_to?(:encode)
|
|
38
|
-
tokenizer
|
|
39
|
-
else
|
|
40
|
-
begin
|
|
41
|
-
require "tokenizers"
|
|
42
|
-
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
43
|
-
rescue LoadError
|
|
44
|
-
raise Error, "tokenizer-ruby gem is required for text tokenization. " \
|
|
45
|
-
"Install with: gem install tokenizers"
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
36
|
def prepare_inputs(inputs)
|
|
37
|
+
return [] if inputs.empty?
|
|
38
|
+
|
|
51
39
|
if inputs.first.is_a?(String)
|
|
52
40
|
raise Error, "tokenizer is required for text inputs" unless @tokenizer
|
|
53
41
|
|
|
@@ -98,9 +86,15 @@ module OnnxRuby
|
|
|
98
86
|
end
|
|
99
87
|
|
|
100
88
|
def softmax(logits)
|
|
89
|
+
# Clamp extreme values to prevent overflow
|
|
101
90
|
max_val = logits.max
|
|
102
|
-
exps = logits.map
|
|
91
|
+
exps = logits.map do |v|
|
|
92
|
+
clamped = v - max_val
|
|
93
|
+
clamped = -500.0 if clamped < -500.0
|
|
94
|
+
Math.exp(clamped)
|
|
95
|
+
end
|
|
103
96
|
sum = exps.sum
|
|
97
|
+
sum = Float::MIN if sum.zero?
|
|
104
98
|
exps.map { |v| v / sum }
|
|
105
99
|
end
|
|
106
100
|
end
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Configuration
|
|
5
|
-
attr_accessor :models_path, :default_providers, :default_log_level
|
|
6
|
-
:pool_size, :pool_timeout
|
|
5
|
+
attr_accessor :models_path, :default_providers, :default_log_level
|
|
7
6
|
|
|
8
7
|
def initialize
|
|
9
8
|
@models_path = "app/models/onnx"
|
|
@@ -12,5 +11,25 @@ module OnnxRuby
|
|
|
12
11
|
@pool_size = 5
|
|
13
12
|
@pool_timeout = 5
|
|
14
13
|
end
|
|
14
|
+
|
|
15
|
+
def pool_size
|
|
16
|
+
@pool_size
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def pool_size=(value)
|
|
20
|
+
raise ArgumentError, "pool_size must be a positive Integer" unless value.is_a?(Integer) && value > 0
|
|
21
|
+
|
|
22
|
+
@pool_size = value
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def pool_timeout
|
|
26
|
+
@pool_timeout
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def pool_timeout=(value)
|
|
30
|
+
raise ArgumentError, "pool_timeout must be a positive Numeric" unless value.is_a?(Numeric) && value > 0
|
|
31
|
+
|
|
32
|
+
@pool_timeout = value
|
|
33
|
+
end
|
|
15
34
|
end
|
|
16
35
|
end
|
data/lib/onnx_ruby/embedder.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Embedder
|
|
5
|
+
include TokenizerSupport
|
|
6
|
+
|
|
5
7
|
attr_reader :session
|
|
6
8
|
|
|
7
9
|
def initialize(model_path, tokenizer: nil, normalize: true, **session_opts)
|
|
@@ -26,6 +28,7 @@ module OnnxRuby
|
|
|
26
28
|
result = @session.run(feed)
|
|
27
29
|
|
|
28
30
|
raw = find_output(result, %w[embeddings sentence_embedding output last_hidden_state])
|
|
31
|
+
return [] if raw.nil? || raw.empty?
|
|
29
32
|
|
|
30
33
|
# If output is 3D (batch, seq_len, dim) — do mean pooling
|
|
31
34
|
embeddings = if raw.first.is_a?(Array) && raw.first.first.is_a?(Array)
|
|
@@ -39,22 +42,6 @@ module OnnxRuby
|
|
|
39
42
|
|
|
40
43
|
private
|
|
41
44
|
|
|
42
|
-
def resolve_tokenizer(tokenizer)
|
|
43
|
-
return nil if tokenizer.nil?
|
|
44
|
-
|
|
45
|
-
if tokenizer.respond_to?(:encode) || tokenizer.respond_to?(:encode_batch)
|
|
46
|
-
tokenizer
|
|
47
|
-
else
|
|
48
|
-
begin
|
|
49
|
-
require "tokenizers"
|
|
50
|
-
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
51
|
-
rescue LoadError
|
|
52
|
-
raise Error, "tokenizer-ruby gem is required for text tokenization. " \
|
|
53
|
-
"Install with: gem install tokenizers"
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
45
|
def prepare_inputs(inputs)
|
|
59
46
|
if inputs.first.is_a?(String)
|
|
60
47
|
raise Error, "tokenizer is required for text inputs" unless @tokenizer
|
|
@@ -100,6 +87,8 @@ module OnnxRuby
|
|
|
100
87
|
|
|
101
88
|
def build_feed(ids, masks)
|
|
102
89
|
input_names = @session.inputs.map { |i| i[:name] }
|
|
90
|
+
raise OnnxRuby::Error, "Model has no input names" if input_names.empty?
|
|
91
|
+
|
|
103
92
|
feed = {}
|
|
104
93
|
feed[input_names.find { |n| n.include?("input_id") } || input_names[0]] = ids
|
|
105
94
|
mask_name = input_names.find { |n| n.include?("mask") || n.include?("attention") }
|
|
@@ -119,6 +108,8 @@ module OnnxRuby
|
|
|
119
108
|
# Mean pooling over token embeddings, masked by attention_mask
|
|
120
109
|
def mean_pool(hidden_states, masks)
|
|
121
110
|
hidden_states.each_with_index.map do |tokens, batch_idx|
|
|
111
|
+
return [] if tokens.nil? || tokens.empty? || tokens.first.nil?
|
|
112
|
+
|
|
122
113
|
mask = masks && masks[batch_idx]
|
|
123
114
|
dim = tokens.first.length
|
|
124
115
|
sum = Array.new(dim, 0.0)
|
data/lib/onnx_ruby/reranker.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Reranker
|
|
5
|
+
include TokenizerSupport
|
|
6
|
+
|
|
5
7
|
attr_reader :session
|
|
6
8
|
|
|
7
9
|
def initialize(model_path, tokenizer: nil, **session_opts)
|
|
@@ -37,22 +39,6 @@ module OnnxRuby
|
|
|
37
39
|
|
|
38
40
|
private
|
|
39
41
|
|
|
40
|
-
def resolve_tokenizer(tokenizer)
|
|
41
|
-
return nil if tokenizer.nil?
|
|
42
|
-
|
|
43
|
-
if tokenizer.respond_to?(:encode)
|
|
44
|
-
tokenizer
|
|
45
|
-
else
|
|
46
|
-
begin
|
|
47
|
-
require "tokenizers"
|
|
48
|
-
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
49
|
-
rescue LoadError
|
|
50
|
-
raise Error, "tokenizer-ruby gem is required for text tokenization. " \
|
|
51
|
-
"Install with: gem install tokenizers"
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
42
|
def score_pairs(pairs)
|
|
57
43
|
if @tokenizer.respond_to?(:encode_batch)
|
|
58
44
|
encodings = @tokenizer.encode_batch(pairs)
|
data/lib/onnx_ruby/session.rb
CHANGED
|
@@ -42,8 +42,13 @@ module OnnxRuby
|
|
|
42
42
|
if data.is_a?(Tensor)
|
|
43
43
|
{ name: name, data: data.flat_data, shape: data.shape, dtype: data.dtype.to_s }
|
|
44
44
|
else
|
|
45
|
-
flat = data.flatten
|
|
46
45
|
shape = infer_shape(data)
|
|
46
|
+
flat = data.flatten
|
|
47
|
+
expected_size = shape.reduce(1, :*)
|
|
48
|
+
if flat.length != expected_size
|
|
49
|
+
raise TensorError,
|
|
50
|
+
"input '#{name}' data size #{flat.length} does not match shape #{shape} (expected #{expected_size})"
|
|
51
|
+
end
|
|
47
52
|
dtype = infer_dtype(flat)
|
|
48
53
|
{ name: name, data: flat, shape: shape, dtype: dtype }
|
|
49
54
|
end
|
|
@@ -70,6 +75,13 @@ module OnnxRuby
|
|
|
70
75
|
current = data
|
|
71
76
|
while current.is_a?(Array)
|
|
72
77
|
shape << current.length
|
|
78
|
+
if current.length > 1 && current.all? { |el| el.is_a?(Array) }
|
|
79
|
+
lengths = current.map(&:length).uniq
|
|
80
|
+
if lengths.size > 1
|
|
81
|
+
raise TensorError,
|
|
82
|
+
"jagged array detected: sub-arrays have lengths #{lengths.sort.join(', ')} at dimension #{shape.size - 1}"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
73
85
|
current = current.first
|
|
74
86
|
end
|
|
75
87
|
shape
|
|
@@ -64,12 +64,15 @@ module OnnxRuby
|
|
|
64
64
|
def checkin(session)
|
|
65
65
|
@mutex.synchronize do
|
|
66
66
|
@pool.push(session)
|
|
67
|
-
@condition.
|
|
67
|
+
@condition.broadcast
|
|
68
68
|
end
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
def create_session
|
|
72
72
|
Session.new(@model_path, **@session_opts)
|
|
73
|
+
rescue => e
|
|
74
|
+
@created -= 1
|
|
75
|
+
raise
|
|
73
76
|
end
|
|
74
77
|
end
|
|
75
78
|
end
|
data/lib/onnx_ruby/tensor.rb
CHANGED
|
@@ -59,6 +59,14 @@ module OnnxRuby
|
|
|
59
59
|
current = data
|
|
60
60
|
while current.is_a?(Array)
|
|
61
61
|
shape << current.length
|
|
62
|
+
# Check for jagged arrays: all sub-arrays at this level must have the same length
|
|
63
|
+
if current.length > 1 && current.all? { |el| el.is_a?(Array) }
|
|
64
|
+
lengths = current.map(&:length).uniq
|
|
65
|
+
if lengths.size > 1
|
|
66
|
+
raise TensorError,
|
|
67
|
+
"jagged array detected: sub-arrays have lengths #{lengths.sort.join(', ')} at dimension #{shape.size - 1}"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
62
70
|
current = current.first
|
|
63
71
|
end
|
|
64
72
|
shape
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OnnxRuby
|
|
4
|
+
module TokenizerSupport
|
|
5
|
+
private
|
|
6
|
+
|
|
7
|
+
def resolve_tokenizer(tokenizer)
|
|
8
|
+
case tokenizer
|
|
9
|
+
when String
|
|
10
|
+
require "tokenizers"
|
|
11
|
+
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
12
|
+
when nil
|
|
13
|
+
nil
|
|
14
|
+
else
|
|
15
|
+
tokenizer
|
|
16
|
+
end
|
|
17
|
+
rescue LoadError
|
|
18
|
+
raise OnnxRuby::Error,
|
|
19
|
+
"tokenizers gem required for text inputs. Install: gem install tokenizers"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/onnx_ruby/version.rb
CHANGED
data/lib/onnx_ruby.rb
CHANGED
|
@@ -12,6 +12,7 @@ end
|
|
|
12
12
|
require_relative "onnx_ruby/onnx_ruby_ext"
|
|
13
13
|
require_relative "onnx_ruby/tensor"
|
|
14
14
|
require_relative "onnx_ruby/session"
|
|
15
|
+
require_relative "onnx_ruby/tokenizer_support"
|
|
15
16
|
require_relative "onnx_ruby/embedder"
|
|
16
17
|
require_relative "onnx_ruby/classifier"
|
|
17
18
|
require_relative "onnx_ruby/reranker"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: onnx-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -74,9 +74,9 @@ extensions:
|
|
|
74
74
|
- ext/onnx_ruby/extconf.rb
|
|
75
75
|
extra_rdoc_files: []
|
|
76
76
|
files:
|
|
77
|
-
- CLAUDE.md
|
|
78
77
|
- Gemfile
|
|
79
78
|
- LICENSE
|
|
79
|
+
- MILESTONES.md
|
|
80
80
|
- README.md
|
|
81
81
|
- Rakefile
|
|
82
82
|
- examples/classification.rb
|
|
@@ -96,6 +96,7 @@ files:
|
|
|
96
96
|
- lib/onnx_ruby/session.rb
|
|
97
97
|
- lib/onnx_ruby/session_pool.rb
|
|
98
98
|
- lib/onnx_ruby/tensor.rb
|
|
99
|
+
- lib/onnx_ruby/tokenizer_support.rb
|
|
99
100
|
- lib/onnx_ruby/version.rb
|
|
100
101
|
- onnx-ruby.gemspec
|
|
101
102
|
homepage: https://github.com/johannesdwicahyo/onnx-ruby
|
data/CLAUDE.md
DELETED
|
@@ -1,334 +0,0 @@
|
|
|
1
|
-
# onnx-ruby
|
|
2
|
-
|
|
3
|
-
## Project Overview
|
|
4
|
-
|
|
5
|
-
Ruby bindings for [ONNX Runtime](https://github.com/microsoft/onnxruntime), Microsoft's high-performance inference engine for ONNX models. This gem wraps the ONNX Runtime C++ API using **Rice** (same approach as zvec-ruby) to give Ruby developers fast local model inference.
|
|
6
|
-
|
|
7
|
-
This unlocks: local embeddings, text classification, named entity recognition, sentiment analysis, reranking, and any other ML model exported to ONNX format — all without Python or API calls.
|
|
8
|
-
|
|
9
|
-
## Author
|
|
10
|
-
|
|
11
|
-
- Name: Johannes Dwi Cahyo
|
|
12
|
-
- GitHub: johannesdwicahyo
|
|
13
|
-
- Repo: git@github.com:johannesdwicahyo/onnx-ruby.git
|
|
14
|
-
|
|
15
|
-
## Technical Approach
|
|
16
|
-
|
|
17
|
-
### Binding Strategy: Rice 4.x (C++ → Ruby)
|
|
18
|
-
|
|
19
|
-
ONNX Runtime has a C++ API (`onnxruntime_cxx_api.h`). We wrap it using Rice, exactly like zvec-ruby.
|
|
20
|
-
|
|
21
|
-
**Important lessons from zvec-ruby to apply here:**
|
|
22
|
-
- Use `require "mkmf-rice"` (not `require "rice/extconf"`) for Rice 4.x
|
|
23
|
-
- Use `define_module_under()` and `define_enum_under()` for Rice 4.x
|
|
24
|
-
- Wrap all raw `VALUE` returns in `Rice::Object()` when pushing to Arrays
|
|
25
|
-
- Use `std::make_shared` when C++ API expects shared_ptr
|
|
26
|
-
- Extract results to Ruby Hashes/Arrays in C++ before returning (avoid dangling pointers)
|
|
27
|
-
- Ship precompiled gems — ONNX Runtime is a large C++ library, nobody wants to build it
|
|
28
|
-
- On macOS use `-force_load` for static archives with static initializers
|
|
29
|
-
- Default to safe options (like mmap=true in zvec-ruby)
|
|
30
|
-
|
|
31
|
-
### ONNX Runtime Linking
|
|
32
|
-
|
|
33
|
-
ONNX Runtime provides **prebuilt shared libraries** (`.so`/`.dylib`) for all platforms. Unlike zvec (which required building from source), we can download the official release and link against it. This is much simpler.
|
|
34
|
-
|
|
35
|
-
Download from: https://github.com/microsoft/onnxruntime/releases
|
|
36
|
-
|
|
37
|
-
The `extconf.rb` should:
|
|
38
|
-
1. Check for `ONNX_RUNTIME_DIR` env var
|
|
39
|
-
2. Check for system-installed onnxruntime via pkg-config
|
|
40
|
-
3. Auto-download the correct prebuilt release if neither found
|
|
41
|
-
|
|
42
|
-
### Precompiled Gems
|
|
43
|
-
|
|
44
|
-
For precompiled gems, statically link or bundle the ONNX Runtime `.dylib`/`.so` inside the gem. The gem will be ~50-80MB but users get zero-install experience.
|
|
45
|
-
|
|
46
|
-
## Core API Design
|
|
47
|
-
|
|
48
|
-
```ruby
|
|
49
|
-
require "onnx_ruby"
|
|
50
|
-
|
|
51
|
-
# --- Session (model loading) ---
|
|
52
|
-
|
|
53
|
-
# Load a model
|
|
54
|
-
session = OnnxRuby::Session.new("model.onnx")
|
|
55
|
-
|
|
56
|
-
# With options
|
|
57
|
-
session = OnnxRuby::Session.new("model.onnx",
|
|
58
|
-
providers: [:cpu], # :cpu, :cuda, :coreml, :tensorrt
|
|
59
|
-
inter_threads: 4,
|
|
60
|
-
intra_threads: 2,
|
|
61
|
-
log_level: :warning
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Model info
|
|
65
|
-
session.inputs # => [{ name: "input_ids", type: :int64, shape: [-1, 512] }]
|
|
66
|
-
session.outputs # => [{ name: "embeddings", type: :float, shape: [-1, 384] }]
|
|
67
|
-
|
|
68
|
-
# --- Inference ---
|
|
69
|
-
|
|
70
|
-
# Run inference
|
|
71
|
-
result = session.run(
|
|
72
|
-
{ "input_ids" => [[101, 2023, 2003, 1037, 3231, 102]] },
|
|
73
|
-
)
|
|
74
|
-
result["embeddings"] # => [[0.0123, -0.0456, ...]]
|
|
75
|
-
|
|
76
|
-
# With output names
|
|
77
|
-
result = session.run(inputs, output_names: ["embeddings"])
|
|
78
|
-
|
|
79
|
-
# --- Tensor ---
|
|
80
|
-
|
|
81
|
-
# Create tensors explicitly
|
|
82
|
-
tensor = OnnxRuby::Tensor.new([1, 2, 3, 4], shape: [2, 2], dtype: :int64)
|
|
83
|
-
tensor.to_a # => [[1, 2], [3, 4]]
|
|
84
|
-
tensor.shape # => [2, 2]
|
|
85
|
-
tensor.dtype # => :int64
|
|
86
|
-
|
|
87
|
-
# From flat array
|
|
88
|
-
tensor = OnnxRuby::Tensor.float([0.1, 0.2, 0.3], shape: [1, 3])
|
|
89
|
-
|
|
90
|
-
# --- High-Level Helpers ---
|
|
91
|
-
|
|
92
|
-
# Embedding model (wraps session with pre/post processing)
|
|
93
|
-
embedder = OnnxRuby::Embedder.new("all-MiniLM-L6-v2.onnx",
|
|
94
|
-
tokenizer: "sentence-transformers/all-MiniLM-L6-v2" # requires tokenizer-ruby
|
|
95
|
-
)
|
|
96
|
-
embeddings = embedder.embed("Hello world") # => [0.0123, ...]
|
|
97
|
-
embeddings = embedder.embed_batch(["Hello", "World"]) # => [[...], [...]]
|
|
98
|
-
|
|
99
|
-
# Classifier
|
|
100
|
-
classifier = OnnxRuby::Classifier.new("intent_model.onnx",
|
|
101
|
-
tokenizer: "bert-base-uncased",
|
|
102
|
-
labels: ["greeting", "farewell", "question", "command"]
|
|
103
|
-
)
|
|
104
|
-
classifier.predict("Hello there!") # => { label: "greeting", score: 0.95 }
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
## Features to Implement
|
|
108
|
-
|
|
109
|
-
### Phase 1 — Core (MVP)
|
|
110
|
-
- [ ] `Session.new(path, options)` — load ONNX model
|
|
111
|
-
- [ ] `session.run(inputs)` — run inference, return outputs
|
|
112
|
-
- [ ] `session.inputs` / `session.outputs` — model metadata
|
|
113
|
-
- [ ] `Tensor` class — create and manipulate tensors
|
|
114
|
-
- [ ] Support dtypes: float32, float64, int32, int64, string, bool
|
|
115
|
-
- [ ] Support shapes: 1D, 2D, 3D, 4D tensors
|
|
116
|
-
- [ ] CPU execution provider
|
|
117
|
-
|
|
118
|
-
### Phase 2 — Providers & Options
|
|
119
|
-
- [ ] CoreML provider (macOS acceleration)
|
|
120
|
-
- [ ] CUDA provider (NVIDIA GPU)
|
|
121
|
-
- [ ] Session options: threading, memory, optimization level
|
|
122
|
-
- [ ] Model optimization: `OnnxRuby.optimize("model.onnx", "optimized.onnx")`
|
|
123
|
-
- [ ] Dynamic shapes (batching)
|
|
124
|
-
|
|
125
|
-
### Phase 3 — High-Level API
|
|
126
|
-
- [ ] `Embedder` — embedding model wrapper (tokenize → infer → normalize)
|
|
127
|
-
- [ ] `Classifier` — text classification wrapper
|
|
128
|
-
- [ ] `Reranker` — cross-encoder reranking wrapper
|
|
129
|
-
- [ ] Integration with tokenizer-ruby for text preprocessing
|
|
130
|
-
- [ ] Model hub: `OnnxRuby::Hub.download("sentence-transformers/all-MiniLM-L6-v2")`
|
|
131
|
-
|
|
132
|
-
### Phase 4 — Rails Integration
|
|
133
|
-
- [ ] `OnnxRuby.configure { |c| c.models_path = "app/models/onnx" }`
|
|
134
|
-
- [ ] Lazy model loading (load on first inference)
|
|
135
|
-
- [ ] Connection pool for thread-safe concurrent inference
|
|
136
|
-
- [ ] ActiveModel integration for embedding generation
|
|
137
|
-
|
|
138
|
-
## Project Structure
|
|
139
|
-
|
|
140
|
-
```
|
|
141
|
-
onnx-ruby/
|
|
142
|
-
├── CLAUDE.md
|
|
143
|
-
├── Gemfile
|
|
144
|
-
├── Rakefile
|
|
145
|
-
├── LICENSE # MIT
|
|
146
|
-
├── README.md
|
|
147
|
-
├── onnx-ruby.gemspec
|
|
148
|
-
├── lib/
|
|
149
|
-
│ ├── onnx_ruby.rb
|
|
150
|
-
│ └── onnx_ruby/
|
|
151
|
-
│ ├── version.rb
|
|
152
|
-
│ ├── session.rb
|
|
153
|
-
│ ├── tensor.rb
|
|
154
|
-
│ ├── embedder.rb
|
|
155
|
-
│ ├── classifier.rb
|
|
156
|
-
│ └── reranker.rb
|
|
157
|
-
├── ext/
|
|
158
|
-
│ └── onnx_ruby/
|
|
159
|
-
│ ├── extconf.rb
|
|
160
|
-
│ └── onnx_ruby_ext.cpp
|
|
161
|
-
├── test/
|
|
162
|
-
│ ├── test_helper.rb
|
|
163
|
-
│ ├── test_session.rb
|
|
164
|
-
│ ├── test_tensor.rb
|
|
165
|
-
│ ├── test_inference.rb
|
|
166
|
-
│ └── models/ # small test ONNX models
|
|
167
|
-
│ └── .gitkeep
|
|
168
|
-
├── script/
|
|
169
|
-
│ ├── download_onnxruntime.sh
|
|
170
|
-
│ └── package_native_gem.rb
|
|
171
|
-
└── examples/
|
|
172
|
-
├── embedding.rb
|
|
173
|
-
├── classification.rb
|
|
174
|
-
└── with_zvec.rb # full RAG example with zvec-ruby
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
## Dependencies
|
|
178
|
-
|
|
179
|
-
### Runtime
|
|
180
|
-
- `rice` (>= 4.0) — C++ to Ruby bindings
|
|
181
|
-
- ONNX Runtime shared library (bundled in precompiled gems)
|
|
182
|
-
|
|
183
|
-
### Optional
|
|
184
|
-
- `tokenizer-ruby` — for Embedder/Classifier text preprocessing
|
|
185
|
-
|
|
186
|
-
### Development
|
|
187
|
-
- `rake-compiler` for building native extensions
|
|
188
|
-
- `rake-compiler-dock` for cross-compilation
|
|
189
|
-
- `minitest` for testing
|
|
190
|
-
- `rake` for tasks
|
|
191
|
-
|
|
192
|
-
## Key C++ Binding Details
|
|
193
|
-
|
|
194
|
-
### ONNX Runtime C++ API Structure
|
|
195
|
-
|
|
196
|
-
```cpp
|
|
197
|
-
#include <onnxruntime_cxx_api.h>
|
|
198
|
-
|
|
199
|
-
// Key classes to wrap:
|
|
200
|
-
Ort::Env // Runtime environment (singleton)
|
|
201
|
-
Ort::Session // Model session
|
|
202
|
-
Ort::SessionOptions
|
|
203
|
-
Ort::Value // Tensor (input/output)
|
|
204
|
-
Ort::MemoryInfo // Memory allocation info
|
|
205
|
-
Ort::TypeInfo // Model input/output type info
|
|
206
|
-
Ort::TensorTypeAndShapeInfo
|
|
207
|
-
```
|
|
208
|
-
|
|
209
|
-
### extconf.rb approach
|
|
210
|
-
|
|
211
|
-
```ruby
|
|
212
|
-
require "mkmf-rice"
|
|
213
|
-
|
|
214
|
-
# Try to find ONNX Runtime
|
|
215
|
-
ort_dir = ENV["ONNX_RUNTIME_DIR"]
|
|
216
|
-
|
|
217
|
-
unless ort_dir
|
|
218
|
-
# Auto-download prebuilt ONNX Runtime for the current platform
|
|
219
|
-
# from https://github.com/microsoft/onnxruntime/releases
|
|
220
|
-
ort_dir = download_onnxruntime() # helper function
|
|
221
|
-
end
|
|
222
|
-
|
|
223
|
-
dir_config("onnxruntime", "#{ort_dir}/include", "#{ort_dir}/lib")
|
|
224
|
-
$INCFLAGS << " -I#{ort_dir}/include"
|
|
225
|
-
$LDFLAGS << " -L#{ort_dir}/lib"
|
|
226
|
-
$libs << " -lonnxruntime"
|
|
227
|
-
|
|
228
|
-
have_header("onnxruntime_cxx_api.h") or
|
|
229
|
-
abort "Cannot find ONNX Runtime headers"
|
|
230
|
-
|
|
231
|
-
create_makefile("onnx_ruby/onnx_ruby_ext")
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
### C++ Extension Skeleton
|
|
235
|
-
|
|
236
|
-
```cpp
|
|
237
|
-
#include <rice/rice.hpp>
|
|
238
|
-
#include <onnxruntime_cxx_api.h>
|
|
239
|
-
|
|
240
|
-
using namespace Rice;
|
|
241
|
-
|
|
242
|
-
// Global ORT environment (initialized once)
|
|
243
|
-
static Ort::Env& get_env() {
|
|
244
|
-
static Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "onnx_ruby");
|
|
245
|
-
return env;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
// Wrap Ort::Session
|
|
249
|
-
// Wrap Ort::Value (Tensor)
|
|
250
|
-
// Handle type conversion: Ruby Array ↔ ORT Tensor
|
|
251
|
-
// Map ORT errors to Ruby exceptions
|
|
252
|
-
|
|
253
|
-
void Init_onnx_ruby_ext() {
|
|
254
|
-
Module rb_mOnnxRuby = define_module("OnnxRuby");
|
|
255
|
-
Module rb_mExt = define_module_under(rb_mOnnxRuby, "Ext");
|
|
256
|
-
|
|
257
|
-
// Define Session, Tensor, etc.
|
|
258
|
-
}
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
### Critical: Tensor ↔ Ruby Array Conversion
|
|
262
|
-
|
|
263
|
-
The most complex part. Need to handle:
|
|
264
|
-
- Ruby Array of floats → ORT float32 tensor (most common for embeddings)
|
|
265
|
-
- Ruby Array of integers → ORT int64 tensor (for token IDs)
|
|
266
|
-
- Nested Ruby Arrays → multi-dimensional tensors
|
|
267
|
-
- ORT output tensors → Ruby Arrays (with proper Float wrapping via `Rice::Object(rb_float_new())`)
|
|
268
|
-
|
|
269
|
-
```cpp
|
|
270
|
-
// Ruby Array → ORT Tensor
|
|
271
|
-
Ort::Value array_to_tensor(Rice::Array arr, const std::vector<int64_t>& shape) {
|
|
272
|
-
// Flatten nested arrays
|
|
273
|
-
// Detect dtype from Ruby values
|
|
274
|
-
// Create ORT tensor with proper memory allocation
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// ORT Tensor → Ruby Array
|
|
278
|
-
Rice::Object tensor_to_array(const Ort::Value& tensor) {
|
|
279
|
-
// Read shape
|
|
280
|
-
// Read dtype
|
|
281
|
-
// Copy data to Ruby Array (with Rice::Object wrapping!)
|
|
282
|
-
}
|
|
283
|
-
```
|
|
284
|
-
|
|
285
|
-
## Testing Strategy
|
|
286
|
-
|
|
287
|
-
- Test with small ONNX models (generate test models with Python: `torch.onnx.export`)
|
|
288
|
-
- Test model loading and metadata inspection
|
|
289
|
-
- Test inference with known inputs/outputs
|
|
290
|
-
- Test all supported dtypes
|
|
291
|
-
- Test batch inference
|
|
292
|
-
- Test error handling (invalid model, wrong input shape, etc.)
|
|
293
|
-
- Benchmark against Python's onnxruntime for correctness
|
|
294
|
-
|
|
295
|
-
### Create test models (Python script to include):
|
|
296
|
-
```python
|
|
297
|
-
# script/create_test_models.py
|
|
298
|
-
import torch
|
|
299
|
-
import torch.nn as nn
|
|
300
|
-
|
|
301
|
-
# Simple linear model for testing
|
|
302
|
-
class SimpleModel(nn.Module):
|
|
303
|
-
def __init__(self):
|
|
304
|
-
super().__init__()
|
|
305
|
-
self.linear = nn.Linear(4, 3)
|
|
306
|
-
def forward(self, x):
|
|
307
|
-
return self.linear(x)
|
|
308
|
-
|
|
309
|
-
model = SimpleModel()
|
|
310
|
-
dummy = torch.randn(1, 4)
|
|
311
|
-
torch.onnx.export(model, dummy, "test/models/simple.onnx",
|
|
312
|
-
input_names=["input"], output_names=["output"])
|
|
313
|
-
```
|
|
314
|
-
|
|
315
|
-
## Publishing
|
|
316
|
-
|
|
317
|
-
- RubyGems.org: `gem push onnx-ruby-*.gem`
|
|
318
|
-
- gem.coop: `GEM_HOST_API_KEY=hjncPswY8PbGDfLPw4RMj928 gem push onnx-ruby-*.gem --host https://beta.gem.coop/@johannesdwicahyo`
|
|
319
|
-
|
|
320
|
-
## Notes from zvec-ruby Experience
|
|
321
|
-
|
|
322
|
-
- **Rice 4.x API**: `define_module_under()`, `define_enum_under()`, not the 3.x syntax
|
|
323
|
-
- **Rice::Object wrapping**: ALWAYS wrap `rb_float_new()`, `Qtrue`, `Qnil` in `Rice::Object()` when pushing to Arrays
|
|
324
|
-
- **shared_ptr**: Use `make_shared` when C++ expects `shared_ptr`, accept by `const T&` in bindings
|
|
325
|
-
- **Extract results in C++**: Don't try to push C++ objects directly into Ruby arrays. Extract to Hashes/Arrays first.
|
|
326
|
-
- **Precompiled gems**: Essential. Use `script/package_native_gem.rb` for macOS, `rake-compiler-dock` for Linux.
|
|
327
|
-
- **ONNX Runtime ships prebuilt binaries**: Much easier than zvec. Download from GitHub releases, link against `.dylib`/`.so`.
|
|
328
|
-
- **Static initializers**: May need `-force_load` if ONNX Runtime uses static registration patterns.
|
|
329
|
-
- **mmap/memory**: ONNX Runtime manages its own memory via allocators. Let it handle memory, don't fight it.
|
|
330
|
-
|
|
331
|
-
## Existing Ruby ONNX Solutions
|
|
332
|
-
|
|
333
|
-
- `onnxruntime` gem by ankane — exists but is FFI-based and limited. We can provide better performance and API with Rice + additional high-level features (Embedder, Classifier, Reranker).
|
|
334
|
-
- Differentiate by: better API, precompiled gems, high-level wrappers, tokenizer-ruby integration
|