onnx-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +334 -0
- data/Gemfile +5 -0
- data/LICENSE +21 -0
- data/README.md +301 -0
- data/Rakefile +17 -0
- data/examples/classification.rb +35 -0
- data/examples/embedding.rb +35 -0
- data/examples/real_world_demo.rb +170 -0
- data/examples/with_zvec.rb +54 -0
- data/ext/onnx_ruby/extconf.rb +75 -0
- data/ext/onnx_ruby/onnx_ruby_ext.cpp +436 -0
- data/lib/onnx_ruby/classifier.rb +107 -0
- data/lib/onnx_ruby/configuration.rb +16 -0
- data/lib/onnx_ruby/embedder.rb +147 -0
- data/lib/onnx_ruby/hub.rb +73 -0
- data/lib/onnx_ruby/lazy_session.rb +38 -0
- data/lib/onnx_ruby/model.rb +71 -0
- data/lib/onnx_ruby/reranker.rb +91 -0
- data/lib/onnx_ruby/session.rb +89 -0
- data/lib/onnx_ruby/session_pool.rb +75 -0
- data/lib/onnx_ruby/tensor.rb +92 -0
- data/lib/onnx_ruby/version.rb +5 -0
- data/lib/onnx_ruby.rb +45 -0
- data/onnx-ruby.gemspec +37 -0
- metadata +125 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Run an ONNX classification model
|
|
5
|
+
#
|
|
6
|
+
# With tokenizer (text input):
|
|
7
|
+
# gem install onnx-ruby tokenizers
|
|
8
|
+
# ruby classification.rb intent_model.onnx bert-base-uncased
|
|
9
|
+
#
|
|
10
|
+
# With raw features:
|
|
11
|
+
# ruby classification.rb classifier.onnx
|
|
12
|
+
|
|
13
|
+
require "onnx_ruby"
|
|
14
|
+
|
|
15
|
+
model_path = ARGV[0] || "classifier.onnx"
|
|
16
|
+
tokenizer_name = ARGV[1]
|
|
17
|
+
abort "Usage: ruby classification.rb <model.onnx> [tokenizer_name]" unless File.exist?(model_path)
|
|
18
|
+
|
|
19
|
+
labels = %w[greeting farewell question command]
|
|
20
|
+
|
|
21
|
+
if tokenizer_name
|
|
22
|
+
classifier = OnnxRuby::Classifier.new(model_path,
|
|
23
|
+
tokenizer: tokenizer_name,
|
|
24
|
+
labels: labels)
|
|
25
|
+
result = classifier.predict("Hello there!")
|
|
26
|
+
puts "Prediction: #{result[:label]} (score: #{result[:score].round(4)})"
|
|
27
|
+
else
|
|
28
|
+
classifier = OnnxRuby::Classifier.new(model_path, labels: labels)
|
|
29
|
+
|
|
30
|
+
# Use raw feature vector
|
|
31
|
+
features = Array.new(8) { rand(-1.0..1.0) }
|
|
32
|
+
result = classifier.predict(features)
|
|
33
|
+
puts "Prediction: #{result[:label]} (score: #{result[:score].round(4)})"
|
|
34
|
+
puts "All scores: #{result[:scores].map { |s| s.round(4) }}"
|
|
35
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Run an ONNX embedding model
|
|
5
|
+
#
|
|
6
|
+
# With tokenizer (text input):
|
|
7
|
+
# gem install onnx-ruby tokenizers
|
|
8
|
+
# ruby embedding.rb all-MiniLM-L6-v2.onnx sentence-transformers/all-MiniLM-L6-v2
|
|
9
|
+
#
|
|
10
|
+
# With raw token IDs:
|
|
11
|
+
# ruby embedding.rb all-MiniLM-L6-v2.onnx
|
|
12
|
+
|
|
13
|
+
require "onnx_ruby"
|
|
14
|
+
|
|
15
|
+
model_path = ARGV[0] || "all-MiniLM-L6-v2.onnx"
|
|
16
|
+
tokenizer_name = ARGV[1]
|
|
17
|
+
abort "Usage: ruby embedding.rb <model.onnx> [tokenizer_name]" unless File.exist?(model_path)
|
|
18
|
+
|
|
19
|
+
if tokenizer_name
|
|
20
|
+
embedder = OnnxRuby::Embedder.new(model_path, tokenizer: tokenizer_name)
|
|
21
|
+
embedding = embedder.embed("Hello world")
|
|
22
|
+
puts "Embedding (#{embedding.length} dims): #{embedding.take(5).map { |v| v.round(4) }}..."
|
|
23
|
+
|
|
24
|
+
batch = embedder.embed_batch(["Hello", "World", "Ruby is great"])
|
|
25
|
+
puts "Batch: #{batch.length} embeddings of #{batch.first.length} dims"
|
|
26
|
+
else
|
|
27
|
+
embedder = OnnxRuby::Embedder.new(model_path)
|
|
28
|
+
|
|
29
|
+
# Use pre-tokenized input
|
|
30
|
+
result = embedder.embed({
|
|
31
|
+
"input_ids" => [101, 2023, 2003, 1037, 3231, 102],
|
|
32
|
+
"attention_mask" => [1, 1, 1, 1, 1, 1]
|
|
33
|
+
})
|
|
34
|
+
puts "Embedding (#{result.length} dims): #{result.take(5).map { |v| v.round(4) }}..."
|
|
35
|
+
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Real-world proof of concept: Semantic search with all-MiniLM-L6-v2
|
|
5
|
+
#
|
|
6
|
+
# This demo:
|
|
7
|
+
# 1. Loads a real HuggingFace sentence-transformers model (86MB)
|
|
8
|
+
# 2. Tokenizes English text with the tokenizers gem
|
|
9
|
+
# 3. Generates 384-dimensional embeddings
|
|
10
|
+
# 4. Performs cosine similarity search
|
|
11
|
+
#
|
|
12
|
+
# Prerequisites:
|
|
13
|
+
# gem install tokenizers
|
|
14
|
+
|
|
15
|
+
require_relative "../lib/onnx_ruby"
|
|
16
|
+
require "tokenizers"
|
|
17
|
+
require "benchmark"
|
|
18
|
+
|
|
19
|
+
MODEL_PATH = File.join(__dir__, "models", "all-MiniLM-L6-v2.onnx")
|
|
20
|
+
TOKENIZER_ID = "sentence-transformers/all-MiniLM-L6-v2"
|
|
21
|
+
|
|
22
|
+
unless File.exist?(MODEL_PATH)
|
|
23
|
+
abort "Model not found. Download it first:\n" \
|
|
24
|
+
" curl -fSL https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx " \
|
|
25
|
+
"-o examples/models/all-MiniLM-L6-v2.onnx"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# --- Setup ---
|
|
29
|
+
|
|
30
|
+
puts "Loading tokenizer..."
|
|
31
|
+
tokenizer = Tokenizers::Tokenizer.from_pretrained(TOKENIZER_ID)
|
|
32
|
+
|
|
33
|
+
puts "Loading ONNX model..."
|
|
34
|
+
embedder = OnnxRuby::Embedder.new(MODEL_PATH, tokenizer: tokenizer, normalize: true)
|
|
35
|
+
|
|
36
|
+
puts "Model inputs: #{embedder.session.inputs.map { |i| "#{i[:name]} #{i[:type]} #{i[:shape]}" }.join(", ")}"
|
|
37
|
+
puts "Model outputs: #{embedder.session.outputs.map { |o| "#{o[:name]} #{o[:type]} #{o[:shape]}" }.join(", ")}"
|
|
38
|
+
puts
|
|
39
|
+
|
|
40
|
+
# --- Document corpus ---
|
|
41
|
+
|
|
42
|
+
documents = [
|
|
43
|
+
"Ruby is a dynamic, open source programming language with a focus on simplicity and productivity.",
|
|
44
|
+
"Python is widely used in data science and machine learning applications.",
|
|
45
|
+
"ONNX Runtime is a cross-platform inference engine for machine learning models.",
|
|
46
|
+
"Rails is a web application framework written in Ruby.",
|
|
47
|
+
"Vector databases store and search high-dimensional embeddings efficiently.",
|
|
48
|
+
"The weather in Tokyo is warm and humid during summer.",
|
|
49
|
+
"PostgreSQL is a powerful open source relational database system.",
|
|
50
|
+
"Transformers have revolutionized natural language processing since 2017.",
|
|
51
|
+
"Docker containers package applications with their dependencies for consistent deployment.",
|
|
52
|
+
"The quick brown fox jumps over the lazy dog.",
|
|
53
|
+
"Semantic search understands the meaning of queries, not just keywords.",
|
|
54
|
+
"GPUs accelerate matrix operations used in deep learning training.",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# --- Generate embeddings ---
|
|
58
|
+
|
|
59
|
+
puts "Generating embeddings for #{documents.length} documents..."
|
|
60
|
+
doc_embeddings = nil
|
|
61
|
+
time = Benchmark.realtime do
|
|
62
|
+
doc_embeddings = embedder.embed_batch(documents)
|
|
63
|
+
end
|
|
64
|
+
puts " Done in #{(time * 1000).round(1)}ms (#{(time / documents.length * 1000).round(1)}ms per document)"
|
|
65
|
+
puts " Embedding dimensions: #{doc_embeddings.first.length}"
|
|
66
|
+
puts
|
|
67
|
+
|
|
68
|
+
# --- Cosine similarity search ---
|
|
69
|
+
|
|
70
|
+
def cosine_similarity(a, b)
|
|
71
|
+
dot = a.zip(b).sum { |x, y| x * y }
|
|
72
|
+
norm_a = Math.sqrt(a.sum { |x| x * x })
|
|
73
|
+
norm_b = Math.sqrt(b.sum { |x| x * x })
|
|
74
|
+
dot / (norm_a * norm_b)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
queries = [
|
|
78
|
+
"How do I build a web app with Ruby?",
|
|
79
|
+
"What is the best way to run ML models locally?",
|
|
80
|
+
"Tell me about database systems",
|
|
81
|
+
"Natural language understanding",
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
queries.each do |query|
|
|
85
|
+
puts "Query: \"#{query}\""
|
|
86
|
+
|
|
87
|
+
query_embedding = embedder.embed(query)
|
|
88
|
+
|
|
89
|
+
results = documents.each_with_index.map do |doc, i|
|
|
90
|
+
{ document: doc, score: cosine_similarity(query_embedding, doc_embeddings[i]), index: i }
|
|
91
|
+
end.sort_by { |r| -r[:score] }
|
|
92
|
+
|
|
93
|
+
results.first(3).each_with_index do |r, rank|
|
|
94
|
+
puts " #{rank + 1}. [#{r[:score].round(4)}] #{r[:document]}"
|
|
95
|
+
end
|
|
96
|
+
puts
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# --- Classifier demo ---
|
|
100
|
+
|
|
101
|
+
puts "=== Classifier Demo ==="
|
|
102
|
+
classifier = OnnxRuby::Classifier.new(
|
|
103
|
+
File.join(__dir__, "..", "test", "models", "classifier.onnx"),
|
|
104
|
+
labels: %w[greeting farewell question command]
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Use embeddings as features (truncated to 8 dims to match test model)
|
|
108
|
+
test_sentences = ["Hello there!", "What is Ruby?", "Goodbye!", "Run the tests"]
|
|
109
|
+
test_sentences.each do |sentence|
|
|
110
|
+
emb = embedder.embed(sentence)
|
|
111
|
+
pred = classifier.predict(emb.first(8)) # our test classifier expects 8 features
|
|
112
|
+
puts " \"#{sentence}\" -> #{pred[:label]} (#{(pred[:score] * 100).round(1)}%)"
|
|
113
|
+
end
|
|
114
|
+
puts
|
|
115
|
+
|
|
116
|
+
# --- Session Pool demo ---
|
|
117
|
+
|
|
118
|
+
puts "=== Concurrent Inference with SessionPool ==="
|
|
119
|
+
pool = OnnxRuby::SessionPool.new(MODEL_PATH, size: 3)
|
|
120
|
+
|
|
121
|
+
sentences = [
|
|
122
|
+
"Machine learning is transforming software",
|
|
123
|
+
"Ruby gems make code reusable",
|
|
124
|
+
"ONNX is an open format for ML models",
|
|
125
|
+
"Concurrent processing improves throughput",
|
|
126
|
+
"Embeddings capture semantic meaning",
|
|
127
|
+
"Thread safety matters in production",
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
results = []
|
|
131
|
+
mutex = Mutex.new
|
|
132
|
+
time = Benchmark.realtime do
|
|
133
|
+
threads = sentences.map do |sentence|
|
|
134
|
+
Thread.new do
|
|
135
|
+
encoding = tokenizer.encode(sentence)
|
|
136
|
+
ids = encoding.ids
|
|
137
|
+
mask = encoding.attention_mask
|
|
138
|
+
|
|
139
|
+
r = pool.run({
|
|
140
|
+
"input_ids" => [ids],
|
|
141
|
+
"attention_mask" => [mask],
|
|
142
|
+
"token_type_ids" => [Array.new(ids.length, 0)]
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
embedding = r.values.first[0]
|
|
146
|
+
mutex.synchronize { results << { sentence: sentence, dims: embedding.length } }
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
threads.each(&:join)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
puts " #{results.length} sentences embedded concurrently in #{(time * 1000).round(1)}ms"
|
|
153
|
+
puts " Pool size: #{pool.size} sessions created (max: 3)"
|
|
154
|
+
results.each { |r| puts " #{r[:sentence]} -> #{r[:dims]}d" }
|
|
155
|
+
puts
|
|
156
|
+
|
|
157
|
+
# --- Lazy loading demo ---
|
|
158
|
+
|
|
159
|
+
puts "=== Lazy Loading Demo ==="
|
|
160
|
+
lazy = OnnxRuby::LazySession.new(MODEL_PATH)
|
|
161
|
+
puts " Before first call: loaded=#{lazy.loaded?}"
|
|
162
|
+
lazy.run({
|
|
163
|
+
"input_ids" => [[101, 2023, 2003, 1037, 3231, 102]],
|
|
164
|
+
"attention_mask" => [[1, 1, 1, 1, 1, 1]],
|
|
165
|
+
"token_type_ids" => [[0, 0, 0, 0, 0, 0]]
|
|
166
|
+
})
|
|
167
|
+
puts " After first call: loaded=#{lazy.loaded?}"
|
|
168
|
+
puts
|
|
169
|
+
|
|
170
|
+
puts "=== All real-world demos completed successfully! ==="
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Full RAG pipeline with onnx-ruby + zvec-ruby
|
|
5
|
+
#
|
|
6
|
+
# Prerequisites:
|
|
7
|
+
# gem install onnx-ruby zvec-ruby tokenizers
|
|
8
|
+
# Download or export an ONNX embedding model
|
|
9
|
+
|
|
10
|
+
require "onnx_ruby"
|
|
11
|
+
|
|
12
|
+
begin
|
|
13
|
+
require "zvec"
|
|
14
|
+
rescue LoadError
|
|
15
|
+
abort "This example requires zvec-ruby: gem install zvec-ruby"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
MODEL_PATH = ARGV[0] || "all-MiniLM-L6-v2.onnx"
|
|
19
|
+
TOKENIZER = ARGV[1] || "sentence-transformers/all-MiniLM-L6-v2"
|
|
20
|
+
|
|
21
|
+
unless File.exist?(MODEL_PATH)
|
|
22
|
+
abort "Usage: ruby with_zvec.rb <embedding_model.onnx> [tokenizer_name]"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# 1. Create embedder
|
|
26
|
+
embedder = OnnxRuby::Embedder.new(MODEL_PATH, tokenizer: TOKENIZER)
|
|
27
|
+
|
|
28
|
+
# 2. Create vector store
|
|
29
|
+
dim = embedder.session.outputs.first[:shape].last
|
|
30
|
+
store = Zvec::Store.new(dimensions: dim)
|
|
31
|
+
|
|
32
|
+
# 3. Index some documents
|
|
33
|
+
documents = [
|
|
34
|
+
"Ruby is a dynamic programming language",
|
|
35
|
+
"ONNX Runtime provides high-performance inference",
|
|
36
|
+
"Vector databases enable semantic search",
|
|
37
|
+
"Machine learning models can run locally",
|
|
38
|
+
"Rails is a web application framework"
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
embeddings = embedder.embed_batch(documents)
|
|
42
|
+
documents.each_with_index do |doc, i|
|
|
43
|
+
store.add(embeddings[i], metadata: { text: doc, id: i })
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# 4. Query
|
|
47
|
+
query = "How to run ML models?"
|
|
48
|
+
query_embedding = embedder.embed(query)
|
|
49
|
+
results = store.search(query_embedding, k: 3)
|
|
50
|
+
|
|
51
|
+
puts "Query: #{query}\n\n"
|
|
52
|
+
results.each_with_index do |result, i|
|
|
53
|
+
puts "#{i + 1}. (score: #{result[:score].round(4)}) #{result[:metadata][:text]}"
|
|
54
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require "mkmf-rice"
|
|
2
|
+
require "fileutils"
|
|
3
|
+
require "tmpdir"
|
|
4
|
+
|
|
5
|
+
ORT_VERSION = "1.24.3"
|
|
6
|
+
|
|
7
|
+
def detect_platform
|
|
8
|
+
os = RbConfig::CONFIG["host_os"]
|
|
9
|
+
cpu = RbConfig::CONFIG["host_cpu"]
|
|
10
|
+
|
|
11
|
+
case os
|
|
12
|
+
when /darwin/
|
|
13
|
+
cpu =~ /arm64|aarch64/ ? "osx-arm64" : "osx-x86_64"
|
|
14
|
+
when /linux/
|
|
15
|
+
cpu =~ /aarch64|arm64/ ? "linux-aarch64" : "linux-x64"
|
|
16
|
+
else
|
|
17
|
+
abort "Unsupported OS: #{os}"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def download_onnxruntime(dest_dir)
|
|
22
|
+
platform = detect_platform
|
|
23
|
+
filename = "onnxruntime-#{platform}-#{ORT_VERSION}.tgz"
|
|
24
|
+
url = "https://github.com/microsoft/onnxruntime/releases/download/v#{ORT_VERSION}/#{filename}"
|
|
25
|
+
tmp_file = File.join(Dir.tmpdir, filename)
|
|
26
|
+
|
|
27
|
+
unless File.exist?(tmp_file)
|
|
28
|
+
puts "Downloading ONNX Runtime v#{ORT_VERSION} for #{platform}..."
|
|
29
|
+
system("curl", "-fSL", url, "-o", tmp_file) or
|
|
30
|
+
abort "Failed to download ONNX Runtime from #{url}"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
FileUtils.mkdir_p(dest_dir)
|
|
34
|
+
puts "Extracting to #{dest_dir}..."
|
|
35
|
+
system("tar", "xzf", tmp_file, "-C", dest_dir, "--strip-components=1") or
|
|
36
|
+
abort "Failed to extract ONNX Runtime"
|
|
37
|
+
|
|
38
|
+
dest_dir
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Find ONNX Runtime
|
|
42
|
+
ort_dir = ENV["ONNX_RUNTIME_DIR"]
|
|
43
|
+
|
|
44
|
+
unless ort_dir
|
|
45
|
+
# Check for bundled copy
|
|
46
|
+
bundled_dir = File.join(__dir__, "onnxruntime")
|
|
47
|
+
if File.exist?(File.join(bundled_dir, "include", "onnxruntime_cxx_api.h"))
|
|
48
|
+
ort_dir = bundled_dir
|
|
49
|
+
else
|
|
50
|
+
ort_dir = download_onnxruntime(bundled_dir)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
ort_include = File.join(ort_dir, "include")
|
|
55
|
+
ort_lib = File.join(ort_dir, "lib")
|
|
56
|
+
|
|
57
|
+
abort "Cannot find ONNX Runtime headers in #{ort_include}" unless File.exist?(File.join(ort_include, "onnxruntime_cxx_api.h"))
|
|
58
|
+
|
|
59
|
+
$INCFLAGS << " -I#{ort_include}"
|
|
60
|
+
$LDFLAGS << " -L#{ort_lib}"
|
|
61
|
+
$libs << " -lonnxruntime"
|
|
62
|
+
|
|
63
|
+
# Set rpath so the shared library can be found at runtime
|
|
64
|
+
case RbConfig::CONFIG["host_os"]
|
|
65
|
+
when /darwin/
|
|
66
|
+
$LDFLAGS << " -Wl,-rpath,#{ort_lib}"
|
|
67
|
+
when /linux/
|
|
68
|
+
$LDFLAGS << " -Wl,-rpath,#{ort_lib}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
$CXXFLAGS = ($CXXFLAGS || "") + " -std=c++17"
|
|
72
|
+
|
|
73
|
+
have_header("onnxruntime_cxx_api.h") or abort "Cannot find onnxruntime_cxx_api.h"
|
|
74
|
+
|
|
75
|
+
create_makefile("onnx_ruby/onnx_ruby_ext")
|