onnx-ruby 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +1 -1
- data/lib/onnx_ruby/classifier.rb +4 -16
- data/lib/onnx_ruby/configuration.rb +21 -2
- data/lib/onnx_ruby/embedder.rb +7 -16
- data/lib/onnx_ruby/lazy_session.rb +0 -2
- data/lib/onnx_ruby/reranker.rb +2 -16
- data/lib/onnx_ruby/tokenizer_support.rb +22 -0
- data/lib/onnx_ruby/version.rb +1 -1
- data/lib/onnx_ruby.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ec86be6d12455b53b31aeb0b680db0239f0fc6d2b0d93a57efcacf847728af43
|
|
4
|
+
data.tar.gz: e228293936cffd177c42055a5dcf34536ab1c30670797d231bc3a9885eba0a8e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 609dc877ac507261a573b4284d249e779f07d6e45661ee894e29dd1c65e906cd3d89349f13d1931ab69c8a8f71765fc0c698e95bd59a114c9cef7db3a86ee39f
|
|
7
|
+
data.tar.gz: 86cd66b410257e644fad7a4e2148912fba30a03a1aa39e9112c5c087903fc311f8d7626416d52ccb5810380bf6316ac88ba2fb4f4a856791e0315e548f5e947b
|
data/CLAUDE.md
CHANGED
|
@@ -314,7 +314,7 @@ torch.onnx.export(model, dummy, "test/models/simple.onnx",
|
|
|
314
314
|
|
|
315
315
|
## Publishing
|
|
316
316
|
|
|
317
|
-
- RubyGems.org: `gem push onnx-ruby-*.gem`
|
|
317
|
+
- RubyGems.org: `GEM_HOST_API_KEY=rubygems_5d46e91ceb51fb455e98a7f491a2321bb6879f9be35d6842 gem push onnx-ruby-*.gem`
|
|
318
318
|
- gem.coop: `GEM_HOST_API_KEY=hjncPswY8PbGDfLPw4RMj928 gem push onnx-ruby-*.gem --host https://beta.gem.coop/@johannesdwicahyo`
|
|
319
319
|
|
|
320
320
|
## Notes from zvec-ruby Experience
|
data/lib/onnx_ruby/classifier.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Classifier
|
|
5
|
+
include TokenizerSupport
|
|
6
|
+
|
|
5
7
|
attr_reader :session, :labels
|
|
6
8
|
|
|
7
9
|
def initialize(model_path, tokenizer: nil, labels: nil, **session_opts)
|
|
@@ -31,23 +33,9 @@ module OnnxRuby
|
|
|
31
33
|
|
|
32
34
|
private
|
|
33
35
|
|
|
34
|
-
def resolve_tokenizer(tokenizer)
|
|
35
|
-
return nil if tokenizer.nil?
|
|
36
|
-
|
|
37
|
-
if tokenizer.respond_to?(:encode)
|
|
38
|
-
tokenizer
|
|
39
|
-
else
|
|
40
|
-
begin
|
|
41
|
-
require "tokenizers"
|
|
42
|
-
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
43
|
-
rescue LoadError
|
|
44
|
-
raise Error, "tokenizer-ruby gem is required for text tokenization. " \
|
|
45
|
-
"Install with: gem install tokenizers"
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
36
|
def prepare_inputs(inputs)
|
|
37
|
+
return [] if inputs.empty?
|
|
38
|
+
|
|
51
39
|
if inputs.first.is_a?(String)
|
|
52
40
|
raise Error, "tokenizer is required for text inputs" unless @tokenizer
|
|
53
41
|
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Configuration
|
|
5
|
-
attr_accessor :models_path, :default_providers, :default_log_level
|
|
6
|
-
:pool_size, :pool_timeout
|
|
5
|
+
attr_accessor :models_path, :default_providers, :default_log_level
|
|
7
6
|
|
|
8
7
|
def initialize
|
|
9
8
|
@models_path = "app/models/onnx"
|
|
@@ -12,5 +11,25 @@ module OnnxRuby
|
|
|
12
11
|
@pool_size = 5
|
|
13
12
|
@pool_timeout = 5
|
|
14
13
|
end
|
|
14
|
+
|
|
15
|
+
def pool_size
|
|
16
|
+
@pool_size
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def pool_size=(value)
|
|
20
|
+
raise ArgumentError, "pool_size must be a positive Integer" unless value.is_a?(Integer) && value > 0
|
|
21
|
+
|
|
22
|
+
@pool_size = value
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def pool_timeout
|
|
26
|
+
@pool_timeout
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def pool_timeout=(value)
|
|
30
|
+
raise ArgumentError, "pool_timeout must be a positive Numeric" unless value.is_a?(Numeric) && value > 0
|
|
31
|
+
|
|
32
|
+
@pool_timeout = value
|
|
33
|
+
end
|
|
15
34
|
end
|
|
16
35
|
end
|
data/lib/onnx_ruby/embedder.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Embedder
|
|
5
|
+
include TokenizerSupport
|
|
6
|
+
|
|
5
7
|
attr_reader :session
|
|
6
8
|
|
|
7
9
|
def initialize(model_path, tokenizer: nil, normalize: true, **session_opts)
|
|
@@ -26,6 +28,7 @@ module OnnxRuby
|
|
|
26
28
|
result = @session.run(feed)
|
|
27
29
|
|
|
28
30
|
raw = find_output(result, %w[embeddings sentence_embedding output last_hidden_state])
|
|
31
|
+
return [] if raw.nil? || raw.empty?
|
|
29
32
|
|
|
30
33
|
# If output is 3D (batch, seq_len, dim) — do mean pooling
|
|
31
34
|
embeddings = if raw.first.is_a?(Array) && raw.first.first.is_a?(Array)
|
|
@@ -39,22 +42,6 @@ module OnnxRuby
|
|
|
39
42
|
|
|
40
43
|
private
|
|
41
44
|
|
|
42
|
-
def resolve_tokenizer(tokenizer)
|
|
43
|
-
return nil if tokenizer.nil?
|
|
44
|
-
|
|
45
|
-
if tokenizer.respond_to?(:encode) || tokenizer.respond_to?(:encode_batch)
|
|
46
|
-
tokenizer
|
|
47
|
-
else
|
|
48
|
-
begin
|
|
49
|
-
require "tokenizers"
|
|
50
|
-
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
51
|
-
rescue LoadError
|
|
52
|
-
raise Error, "tokenizer-ruby gem is required for text tokenization. " \
|
|
53
|
-
"Install with: gem install tokenizers"
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
45
|
def prepare_inputs(inputs)
|
|
59
46
|
if inputs.first.is_a?(String)
|
|
60
47
|
raise Error, "tokenizer is required for text inputs" unless @tokenizer
|
|
@@ -100,6 +87,8 @@ module OnnxRuby
|
|
|
100
87
|
|
|
101
88
|
def build_feed(ids, masks)
|
|
102
89
|
input_names = @session.inputs.map { |i| i[:name] }
|
|
90
|
+
raise OnnxRuby::Error, "Model has no input names" if input_names.empty?
|
|
91
|
+
|
|
103
92
|
feed = {}
|
|
104
93
|
feed[input_names.find { |n| n.include?("input_id") } || input_names[0]] = ids
|
|
105
94
|
mask_name = input_names.find { |n| n.include?("mask") || n.include?("attention") }
|
|
@@ -119,6 +108,8 @@ module OnnxRuby
|
|
|
119
108
|
# Mean pooling over token embeddings, masked by attention_mask
|
|
120
109
|
def mean_pool(hidden_states, masks)
|
|
121
110
|
hidden_states.each_with_index.map do |tokens, batch_idx|
|
|
111
|
+
return [] if tokens.nil? || tokens.empty? || tokens.first.nil?
|
|
112
|
+
|
|
122
113
|
mask = masks && masks[batch_idx]
|
|
123
114
|
dim = tokens.first.length
|
|
124
115
|
sum = Array.new(dim, 0.0)
|
data/lib/onnx_ruby/reranker.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module OnnxRuby
|
|
4
4
|
class Reranker
|
|
5
|
+
include TokenizerSupport
|
|
6
|
+
|
|
5
7
|
attr_reader :session
|
|
6
8
|
|
|
7
9
|
def initialize(model_path, tokenizer: nil, **session_opts)
|
|
@@ -37,22 +39,6 @@ module OnnxRuby
|
|
|
37
39
|
|
|
38
40
|
private
|
|
39
41
|
|
|
40
|
-
def resolve_tokenizer(tokenizer)
|
|
41
|
-
return nil if tokenizer.nil?
|
|
42
|
-
|
|
43
|
-
if tokenizer.respond_to?(:encode)
|
|
44
|
-
tokenizer
|
|
45
|
-
else
|
|
46
|
-
begin
|
|
47
|
-
require "tokenizers"
|
|
48
|
-
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
49
|
-
rescue LoadError
|
|
50
|
-
raise Error, "tokenizer-ruby gem is required for text tokenization. " \
|
|
51
|
-
"Install with: gem install tokenizers"
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
42
|
def score_pairs(pairs)
|
|
57
43
|
if @tokenizer.respond_to?(:encode_batch)
|
|
58
44
|
encodings = @tokenizer.encode_batch(pairs)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OnnxRuby
|
|
4
|
+
module TokenizerSupport
|
|
5
|
+
private
|
|
6
|
+
|
|
7
|
+
def resolve_tokenizer(tokenizer)
|
|
8
|
+
case tokenizer
|
|
9
|
+
when String
|
|
10
|
+
require "tokenizers"
|
|
11
|
+
Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
|
|
12
|
+
when nil
|
|
13
|
+
nil
|
|
14
|
+
else
|
|
15
|
+
tokenizer
|
|
16
|
+
end
|
|
17
|
+
rescue LoadError
|
|
18
|
+
raise OnnxRuby::Error,
|
|
19
|
+
"tokenizers gem required for text inputs. Install: gem install tokenizers"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/onnx_ruby/version.rb
CHANGED
data/lib/onnx_ruby.rb
CHANGED
|
@@ -12,6 +12,7 @@ end
|
|
|
12
12
|
require_relative "onnx_ruby/onnx_ruby_ext"
|
|
13
13
|
require_relative "onnx_ruby/tensor"
|
|
14
14
|
require_relative "onnx_ruby/session"
|
|
15
|
+
require_relative "onnx_ruby/tokenizer_support"
|
|
15
16
|
require_relative "onnx_ruby/embedder"
|
|
16
17
|
require_relative "onnx_ruby/classifier"
|
|
17
18
|
require_relative "onnx_ruby/reranker"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: onnx-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -96,6 +96,7 @@ files:
|
|
|
96
96
|
- lib/onnx_ruby/session.rb
|
|
97
97
|
- lib/onnx_ruby/session_pool.rb
|
|
98
98
|
- lib/onnx_ruby/tensor.rb
|
|
99
|
+
- lib/onnx_ruby/tokenizer_support.rb
|
|
99
100
|
- lib/onnx_ruby/version.rb
|
|
100
101
|
- onnx-ruby.gemspec
|
|
101
102
|
homepage: https://github.com/johannesdwicahyo/onnx-ruby
|