onnx-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Run an ONNX classification model
5
+ #
6
+ # With tokenizer (text input):
7
+ # gem install onnx-ruby tokenizers
8
+ # ruby classification.rb intent_model.onnx bert-base-uncased
9
+ #
10
+ # With raw features:
11
+ # ruby classification.rb classifier.onnx
12
+
13
+ require "onnx_ruby"
14
+
15
+ model_path = ARGV[0] || "classifier.onnx"
16
+ tokenizer_name = ARGV[1]
17
+ abort "Usage: ruby classification.rb <model.onnx> [tokenizer_name]" unless File.exist?(model_path)
18
+
19
+ labels = %w[greeting farewell question command]
20
+
21
+ if tokenizer_name
22
+ classifier = OnnxRuby::Classifier.new(model_path,
23
+ tokenizer: tokenizer_name,
24
+ labels: labels)
25
+ result = classifier.predict("Hello there!")
26
+ puts "Prediction: #{result[:label]} (score: #{result[:score].round(4)})"
27
+ else
28
+ classifier = OnnxRuby::Classifier.new(model_path, labels: labels)
29
+
30
+ # Use raw feature vector
31
+ features = Array.new(8) { rand(-1.0..1.0) }
32
+ result = classifier.predict(features)
33
+ puts "Prediction: #{result[:label]} (score: #{result[:score].round(4)})"
34
+ puts "All scores: #{result[:scores].map { |s| s.round(4) }}"
35
+ end
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Run an ONNX embedding model
5
+ #
6
+ # With tokenizer (text input):
7
+ # gem install onnx-ruby tokenizers
8
+ # ruby embedding.rb all-MiniLM-L6-v2.onnx sentence-transformers/all-MiniLM-L6-v2
9
+ #
10
+ # With raw token IDs:
11
+ # ruby embedding.rb all-MiniLM-L6-v2.onnx
12
+
13
+ require "onnx_ruby"
14
+
15
+ model_path = ARGV[0] || "all-MiniLM-L6-v2.onnx"
16
+ tokenizer_name = ARGV[1]
17
+ abort "Usage: ruby embedding.rb <model.onnx> [tokenizer_name]" unless File.exist?(model_path)
18
+
19
+ if tokenizer_name
20
+ embedder = OnnxRuby::Embedder.new(model_path, tokenizer: tokenizer_name)
21
+ embedding = embedder.embed("Hello world")
22
+ puts "Embedding (#{embedding.length} dims): #{embedding.take(5).map { |v| v.round(4) }}..."
23
+
24
+ batch = embedder.embed_batch(["Hello", "World", "Ruby is great"])
25
+ puts "Batch: #{batch.length} embeddings of #{batch.first.length} dims"
26
+ else
27
+ embedder = OnnxRuby::Embedder.new(model_path)
28
+
29
+ # Use pre-tokenized input
30
+ result = embedder.embed({
31
+ "input_ids" => [101, 2023, 2003, 1037, 3231, 102],
32
+ "attention_mask" => [1, 1, 1, 1, 1, 1]
33
+ })
34
+ puts "Embedding (#{result.length} dims): #{result.take(5).map { |v| v.round(4) }}..."
35
+ end
@@ -0,0 +1,170 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Real-world proof of concept: Semantic search with all-MiniLM-L6-v2
5
+ #
6
+ # This demo:
7
+ # 1. Loads a real HuggingFace sentence-transformers model (86MB)
8
+ # 2. Tokenizes English text with the tokenizers gem
9
+ # 3. Generates 384-dimensional embeddings
10
+ # 4. Performs cosine similarity search
11
+ #
12
+ # Prerequisites:
13
+ # gem install tokenizers
14
+
15
+ require_relative "../lib/onnx_ruby"
16
+ require "tokenizers"
17
+ require "benchmark"
18
+
19
+ MODEL_PATH = File.join(__dir__, "models", "all-MiniLM-L6-v2.onnx")
20
+ TOKENIZER_ID = "sentence-transformers/all-MiniLM-L6-v2"
21
+
22
+ unless File.exist?(MODEL_PATH)
23
+ abort "Model not found. Download it first:\n" \
24
+ " curl -fSL https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx " \
25
+ "-o examples/models/all-MiniLM-L6-v2.onnx"
26
+ end
27
+
28
+ # --- Setup ---
29
+
30
+ puts "Loading tokenizer..."
31
+ tokenizer = Tokenizers::Tokenizer.from_pretrained(TOKENIZER_ID)
32
+
33
+ puts "Loading ONNX model..."
34
+ embedder = OnnxRuby::Embedder.new(MODEL_PATH, tokenizer: tokenizer, normalize: true)
35
+
36
+ puts "Model inputs: #{embedder.session.inputs.map { |i| "#{i[:name]} #{i[:type]} #{i[:shape]}" }.join(", ")}"
37
+ puts "Model outputs: #{embedder.session.outputs.map { |o| "#{o[:name]} #{o[:type]} #{o[:shape]}" }.join(", ")}"
38
+ puts
39
+
40
+ # --- Document corpus ---
41
+
42
+ documents = [
43
+ "Ruby is a dynamic, open source programming language with a focus on simplicity and productivity.",
44
+ "Python is widely used in data science and machine learning applications.",
45
+ "ONNX Runtime is a cross-platform inference engine for machine learning models.",
46
+ "Rails is a web application framework written in Ruby.",
47
+ "Vector databases store and search high-dimensional embeddings efficiently.",
48
+ "The weather in Tokyo is warm and humid during summer.",
49
+ "PostgreSQL is a powerful open source relational database system.",
50
+ "Transformers have revolutionized natural language processing since 2017.",
51
+ "Docker containers package applications with their dependencies for consistent deployment.",
52
+ "The quick brown fox jumps over the lazy dog.",
53
+ "Semantic search understands the meaning of queries, not just keywords.",
54
+ "GPUs accelerate matrix operations used in deep learning training.",
55
+ ]
56
+
57
+ # --- Generate embeddings ---
58
+
59
+ puts "Generating embeddings for #{documents.length} documents..."
60
+ doc_embeddings = nil
61
+ time = Benchmark.realtime do
62
+ doc_embeddings = embedder.embed_batch(documents)
63
+ end
64
+ puts " Done in #{(time * 1000).round(1)}ms (#{(time / documents.length * 1000).round(1)}ms per document)"
65
+ puts " Embedding dimensions: #{doc_embeddings.first.length}"
66
+ puts
67
+
68
+ # --- Cosine similarity search ---
69
+
70
+ def cosine_similarity(a, b)
71
+ dot = a.zip(b).sum { |x, y| x * y }
72
+ norm_a = Math.sqrt(a.sum { |x| x * x })
73
+ norm_b = Math.sqrt(b.sum { |x| x * x })
74
+ dot / (norm_a * norm_b)
75
+ end
76
+
77
+ queries = [
78
+ "How do I build a web app with Ruby?",
79
+ "What is the best way to run ML models locally?",
80
+ "Tell me about database systems",
81
+ "Natural language understanding",
82
+ ]
83
+
84
+ queries.each do |query|
85
+ puts "Query: \"#{query}\""
86
+
87
+ query_embedding = embedder.embed(query)
88
+
89
+ results = documents.each_with_index.map do |doc, i|
90
+ { document: doc, score: cosine_similarity(query_embedding, doc_embeddings[i]), index: i }
91
+ end.sort_by { |r| -r[:score] }
92
+
93
+ results.first(3).each_with_index do |r, rank|
94
+ puts " #{rank + 1}. [#{r[:score].round(4)}] #{r[:document]}"
95
+ end
96
+ puts
97
+ end
98
+
99
+ # --- Classifier demo ---
100
+
101
+ puts "=== Classifier Demo ==="
102
+ classifier = OnnxRuby::Classifier.new(
103
+ File.join(__dir__, "..", "test", "models", "classifier.onnx"),
104
+ labels: %w[greeting farewell question command]
105
+ )
106
+
107
+ # Use embeddings as features (truncated to 8 dims to match test model)
108
+ test_sentences = ["Hello there!", "What is Ruby?", "Goodbye!", "Run the tests"]
109
+ test_sentences.each do |sentence|
110
+ emb = embedder.embed(sentence)
111
+ pred = classifier.predict(emb.first(8)) # our test classifier expects 8 features
112
+ puts " \"#{sentence}\" -> #{pred[:label]} (#{(pred[:score] * 100).round(1)}%)"
113
+ end
114
+ puts
115
+
116
+ # --- Session Pool demo ---
117
+
118
+ puts "=== Concurrent Inference with SessionPool ==="
119
+ pool = OnnxRuby::SessionPool.new(MODEL_PATH, size: 3)
120
+
121
+ sentences = [
122
+ "Machine learning is transforming software",
123
+ "Ruby gems make code reusable",
124
+ "ONNX is an open format for ML models",
125
+ "Concurrent processing improves throughput",
126
+ "Embeddings capture semantic meaning",
127
+ "Thread safety matters in production",
128
+ ]
129
+
130
+ results = []
131
+ mutex = Mutex.new
132
+ time = Benchmark.realtime do
133
+ threads = sentences.map do |sentence|
134
+ Thread.new do
135
+ encoding = tokenizer.encode(sentence)
136
+ ids = encoding.ids
137
+ mask = encoding.attention_mask
138
+
139
+ r = pool.run({
140
+ "input_ids" => [ids],
141
+ "attention_mask" => [mask],
142
+ "token_type_ids" => [Array.new(ids.length, 0)]
143
+ })
144
+
145
+ embedding = r.values.first[0]
146
+ mutex.synchronize { results << { sentence: sentence, dims: embedding.length } }
147
+ end
148
+ end
149
+ threads.each(&:join)
150
+ end
151
+
152
+ puts " #{results.length} sentences embedded concurrently in #{(time * 1000).round(1)}ms"
153
+ puts " Pool size: #{pool.size} sessions created (max: 3)"
154
+ results.each { |r| puts " #{r[:sentence]} -> #{r[:dims]}d" }
155
+ puts
156
+
157
+ # --- Lazy loading demo ---
158
+
159
+ puts "=== Lazy Loading Demo ==="
160
+ lazy = OnnxRuby::LazySession.new(MODEL_PATH)
161
+ puts " Before first call: loaded=#{lazy.loaded?}"
162
+ lazy.run({
163
+ "input_ids" => [[101, 2023, 2003, 1037, 3231, 102]],
164
+ "attention_mask" => [[1, 1, 1, 1, 1, 1]],
165
+ "token_type_ids" => [[0, 0, 0, 0, 0, 0]]
166
+ })
167
+ puts " After first call: loaded=#{lazy.loaded?}"
168
+ puts
169
+
170
+ puts "=== All real-world demos completed successfully! ==="
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Full RAG pipeline with onnx-ruby + zvec-ruby
5
+ #
6
+ # Prerequisites:
7
+ # gem install onnx-ruby zvec-ruby tokenizers
8
+ # Download or export an ONNX embedding model
9
+
10
+ require "onnx_ruby"
11
+
12
+ begin
13
+ require "zvec"
14
+ rescue LoadError
15
+ abort "This example requires zvec-ruby: gem install zvec-ruby"
16
+ end
17
+
18
+ MODEL_PATH = ARGV[0] || "all-MiniLM-L6-v2.onnx"
19
+ TOKENIZER = ARGV[1] || "sentence-transformers/all-MiniLM-L6-v2"
20
+
21
+ unless File.exist?(MODEL_PATH)
22
+ abort "Usage: ruby with_zvec.rb <embedding_model.onnx> [tokenizer_name]"
23
+ end
24
+
25
+ # 1. Create embedder
26
+ embedder = OnnxRuby::Embedder.new(MODEL_PATH, tokenizer: TOKENIZER)
27
+
28
+ # 2. Create vector store
29
+ dim = embedder.session.outputs.first[:shape].last
30
+ store = Zvec::Store.new(dimensions: dim)
31
+
32
+ # 3. Index some documents
33
+ documents = [
34
+ "Ruby is a dynamic programming language",
35
+ "ONNX Runtime provides high-performance inference",
36
+ "Vector databases enable semantic search",
37
+ "Machine learning models can run locally",
38
+ "Rails is a web application framework"
39
+ ]
40
+
41
+ embeddings = embedder.embed_batch(documents)
42
+ documents.each_with_index do |doc, i|
43
+ store.add(embeddings[i], metadata: { text: doc, id: i })
44
+ end
45
+
46
+ # 4. Query
47
+ query = "How to run ML models?"
48
+ query_embedding = embedder.embed(query)
49
+ results = store.search(query_embedding, k: 3)
50
+
51
+ puts "Query: #{query}\n\n"
52
+ results.each_with_index do |result, i|
53
+ puts "#{i + 1}. (score: #{result[:score].round(4)}) #{result[:metadata][:text]}"
54
+ end
@@ -0,0 +1,75 @@
1
+ require "mkmf-rice"
2
+ require "fileutils"
3
+ require "tmpdir"
4
+
5
+ ORT_VERSION = "1.24.3"
6
+
7
+ def detect_platform
8
+ os = RbConfig::CONFIG["host_os"]
9
+ cpu = RbConfig::CONFIG["host_cpu"]
10
+
11
+ case os
12
+ when /darwin/
13
+ cpu =~ /arm64|aarch64/ ? "osx-arm64" : "osx-x86_64"
14
+ when /linux/
15
+ cpu =~ /aarch64|arm64/ ? "linux-aarch64" : "linux-x64"
16
+ else
17
+ abort "Unsupported OS: #{os}"
18
+ end
19
+ end
20
+
21
+ def download_onnxruntime(dest_dir)
22
+ platform = detect_platform
23
+ filename = "onnxruntime-#{platform}-#{ORT_VERSION}.tgz"
24
+ url = "https://github.com/microsoft/onnxruntime/releases/download/v#{ORT_VERSION}/#{filename}"
25
+ tmp_file = File.join(Dir.tmpdir, filename)
26
+
27
+ unless File.exist?(tmp_file)
28
+ puts "Downloading ONNX Runtime v#{ORT_VERSION} for #{platform}..."
29
+ system("curl", "-fSL", url, "-o", tmp_file) or
30
+ abort "Failed to download ONNX Runtime from #{url}"
31
+ end
32
+
33
+ FileUtils.mkdir_p(dest_dir)
34
+ puts "Extracting to #{dest_dir}..."
35
+ system("tar", "xzf", tmp_file, "-C", dest_dir, "--strip-components=1") or
36
+ abort "Failed to extract ONNX Runtime"
37
+
38
+ dest_dir
39
+ end
40
+
41
+ # Find ONNX Runtime
42
+ ort_dir = ENV["ONNX_RUNTIME_DIR"]
43
+
44
+ unless ort_dir
45
+ # Check for bundled copy
46
+ bundled_dir = File.join(__dir__, "onnxruntime")
47
+ if File.exist?(File.join(bundled_dir, "include", "onnxruntime_cxx_api.h"))
48
+ ort_dir = bundled_dir
49
+ else
50
+ ort_dir = download_onnxruntime(bundled_dir)
51
+ end
52
+ end
53
+
54
+ ort_include = File.join(ort_dir, "include")
55
+ ort_lib = File.join(ort_dir, "lib")
56
+
57
+ abort "Cannot find ONNX Runtime headers in #{ort_include}" unless File.exist?(File.join(ort_include, "onnxruntime_cxx_api.h"))
58
+
59
+ $INCFLAGS << " -I#{ort_include}"
60
+ $LDFLAGS << " -L#{ort_lib}"
61
+ $libs << " -lonnxruntime"
62
+
63
+ # Set rpath so the shared library can be found at runtime
64
+ case RbConfig::CONFIG["host_os"]
65
+ when /darwin/
66
+ $LDFLAGS << " -Wl,-rpath,#{ort_lib}"
67
+ when /linux/
68
+ $LDFLAGS << " -Wl,-rpath,#{ort_lib}"
69
+ end
70
+
71
+ $CXXFLAGS = ($CXXFLAGS || "") + " -std=c++17"
72
+
73
+ have_header("onnxruntime_cxx_api.h") or abort "Cannot find onnxruntime_cxx_api.h"
74
+
75
+ create_makefile("onnx_ruby/onnx_ruby_ext")