onnx-ruby 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 809c3c981b4ec890ed7169969b64f556dffb4a44a0c44e6126ec3f2ba28f2f07
4
- data.tar.gz: 91233f1ee8c921c5cf4a2352f98e0f322aac91b27b0db01c55212c0ae8f9a750
3
+ metadata.gz: ec86be6d12455b53b31aeb0b680db0239f0fc6d2b0d93a57efcacf847728af43
4
+ data.tar.gz: e228293936cffd177c42055a5dcf34536ab1c30670797d231bc3a9885eba0a8e
5
5
  SHA512:
6
- metadata.gz: b1ad99c83090be72d099836e84992d1a108b876272fd152fab1de4b43569930b1bdf985ca79e5fc8b27e3a4a8da65b33d823a560b9dd6cd4d7df384c942adb0a
7
- data.tar.gz: f9a9c2b4278f7c20dffd890ece27cecf24004127944826b65bcef7a31a383c5f3799cecb01ca29e2ffd984f92698cb1ecf50e3681df2147b104c24839b9021df
6
+ metadata.gz: 609dc877ac507261a573b4284d249e779f07d6e45661ee894e29dd1c65e906cd3d89349f13d1931ab69c8a8f71765fc0c698e95bd59a114c9cef7db3a86ee39f
7
+ data.tar.gz: 86cd66b410257e644fad7a4e2148912fba30a03a1aa39e9112c5c087903fc311f8d7626416d52ccb5810380bf6316ac88ba2fb4f4a856791e0315e548f5e947b
data/CLAUDE.md CHANGED
@@ -314,7 +314,7 @@ torch.onnx.export(model, dummy, "test/models/simple.onnx",
314
314
 
315
315
  ## Publishing
316
316
 
317
- - RubyGems.org: `gem push onnx-ruby-*.gem`
317
+ - RubyGems.org: `GEM_HOST_API_KEY=rubygems_5d46e91ceb51fb455e98a7f491a2321bb6879f9be35d6842 gem push onnx-ruby-*.gem`
318
318
  - gem.coop: `GEM_HOST_API_KEY=hjncPswY8PbGDfLPw4RMj928 gem push onnx-ruby-*.gem --host https://beta.gem.coop/@johannesdwicahyo`
319
319
 
320
320
  ## Notes from zvec-ruby Experience
@@ -2,6 +2,8 @@
2
2
 
3
3
  module OnnxRuby
4
4
  class Classifier
5
+ include TokenizerSupport
6
+
5
7
  attr_reader :session, :labels
6
8
 
7
9
  def initialize(model_path, tokenizer: nil, labels: nil, **session_opts)
@@ -31,23 +33,9 @@ module OnnxRuby
31
33
 
32
34
  private
33
35
 
34
- def resolve_tokenizer(tokenizer)
35
- return nil if tokenizer.nil?
36
-
37
- if tokenizer.respond_to?(:encode)
38
- tokenizer
39
- else
40
- begin
41
- require "tokenizers"
42
- Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
43
- rescue LoadError
44
- raise Error, "tokenizer-ruby gem is required for text tokenization. " \
45
- "Install with: gem install tokenizers"
46
- end
47
- end
48
- end
49
-
50
36
  def prepare_inputs(inputs)
37
+ return [] if inputs.empty?
38
+
51
39
  if inputs.first.is_a?(String)
52
40
  raise Error, "tokenizer is required for text inputs" unless @tokenizer
53
41
 
@@ -2,8 +2,7 @@
2
2
 
3
3
  module OnnxRuby
4
4
  class Configuration
5
- attr_accessor :models_path, :default_providers, :default_log_level,
6
- :pool_size, :pool_timeout
5
+ attr_accessor :models_path, :default_providers, :default_log_level
7
6
 
8
7
  def initialize
9
8
  @models_path = "app/models/onnx"
@@ -12,5 +11,25 @@ module OnnxRuby
12
11
  @pool_size = 5
13
12
  @pool_timeout = 5
14
13
  end
14
+
15
+ def pool_size
16
+ @pool_size
17
+ end
18
+
19
+ def pool_size=(value)
20
+ raise ArgumentError, "pool_size must be a positive Integer" unless value.is_a?(Integer) && value > 0
21
+
22
+ @pool_size = value
23
+ end
24
+
25
+ def pool_timeout
26
+ @pool_timeout
27
+ end
28
+
29
+ def pool_timeout=(value)
30
+ raise ArgumentError, "pool_timeout must be a positive Numeric" unless value.is_a?(Numeric) && value > 0
31
+
32
+ @pool_timeout = value
33
+ end
15
34
  end
16
35
  end
@@ -2,6 +2,8 @@
2
2
 
3
3
  module OnnxRuby
4
4
  class Embedder
5
+ include TokenizerSupport
6
+
5
7
  attr_reader :session
6
8
 
7
9
  def initialize(model_path, tokenizer: nil, normalize: true, **session_opts)
@@ -26,6 +28,7 @@ module OnnxRuby
26
28
  result = @session.run(feed)
27
29
 
28
30
  raw = find_output(result, %w[embeddings sentence_embedding output last_hidden_state])
31
+ return [] if raw.nil? || raw.empty?
29
32
 
30
33
  # If output is 3D (batch, seq_len, dim) — do mean pooling
31
34
  embeddings = if raw.first.is_a?(Array) && raw.first.first.is_a?(Array)
@@ -39,22 +42,6 @@ module OnnxRuby
39
42
 
40
43
  private
41
44
 
42
- def resolve_tokenizer(tokenizer)
43
- return nil if tokenizer.nil?
44
-
45
- if tokenizer.respond_to?(:encode) || tokenizer.respond_to?(:encode_batch)
46
- tokenizer
47
- else
48
- begin
49
- require "tokenizers"
50
- Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
51
- rescue LoadError
52
- raise Error, "tokenizer-ruby gem is required for text tokenization. " \
53
- "Install with: gem install tokenizers"
54
- end
55
- end
56
- end
57
-
58
45
  def prepare_inputs(inputs)
59
46
  if inputs.first.is_a?(String)
60
47
  raise Error, "tokenizer is required for text inputs" unless @tokenizer
@@ -100,6 +87,8 @@ module OnnxRuby
100
87
 
101
88
  def build_feed(ids, masks)
102
89
  input_names = @session.inputs.map { |i| i[:name] }
90
+ raise OnnxRuby::Error, "Model has no input names" if input_names.empty?
91
+
103
92
  feed = {}
104
93
  feed[input_names.find { |n| n.include?("input_id") } || input_names[0]] = ids
105
94
  mask_name = input_names.find { |n| n.include?("mask") || n.include?("attention") }
@@ -119,6 +108,8 @@ module OnnxRuby
119
108
  # Mean pooling over token embeddings, masked by attention_mask
120
109
  def mean_pool(hidden_states, masks)
121
110
  hidden_states.each_with_index.map do |tokens, batch_idx|
111
+ return [] if tokens.nil? || tokens.empty? || tokens.first.nil?
112
+
122
113
  mask = masks && masks[batch_idx]
123
114
  dim = tokens.first.length
124
115
  sum = Array.new(dim, 0.0)
@@ -28,8 +28,6 @@ module OnnxRuby
28
28
  private
29
29
 
30
30
  def load_session
31
- return @session if @session
32
-
33
31
  @mutex.synchronize do
34
32
  @session ||= Session.new(@model_path, **@opts)
35
33
  end
@@ -2,6 +2,8 @@
2
2
 
3
3
  module OnnxRuby
4
4
  class Reranker
5
+ include TokenizerSupport
6
+
5
7
  attr_reader :session
6
8
 
7
9
  def initialize(model_path, tokenizer: nil, **session_opts)
@@ -37,22 +39,6 @@ module OnnxRuby
37
39
 
38
40
  private
39
41
 
40
- def resolve_tokenizer(tokenizer)
41
- return nil if tokenizer.nil?
42
-
43
- if tokenizer.respond_to?(:encode)
44
- tokenizer
45
- else
46
- begin
47
- require "tokenizers"
48
- Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
49
- rescue LoadError
50
- raise Error, "tokenizer-ruby gem is required for text tokenization. " \
51
- "Install with: gem install tokenizers"
52
- end
53
- end
54
- end
55
-
56
42
  def score_pairs(pairs)
57
43
  if @tokenizer.respond_to?(:encode_batch)
58
44
  encodings = @tokenizer.encode_batch(pairs)
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OnnxRuby
4
+ module TokenizerSupport
5
+ private
6
+
7
+ def resolve_tokenizer(tokenizer)
8
+ case tokenizer
9
+ when String
10
+ require "tokenizers"
11
+ Tokenizers::Tokenizer.from_pretrained(tokenizer.to_s)
12
+ when nil
13
+ nil
14
+ else
15
+ tokenizer
16
+ end
17
+ rescue LoadError
18
+ raise OnnxRuby::Error,
19
+ "tokenizers gem required for text inputs. Install: gem install tokenizers"
20
+ end
21
+ end
22
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OnnxRuby
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/onnx_ruby.rb CHANGED
@@ -12,6 +12,7 @@ end
12
12
  require_relative "onnx_ruby/onnx_ruby_ext"
13
13
  require_relative "onnx_ruby/tensor"
14
14
  require_relative "onnx_ruby/session"
15
+ require_relative "onnx_ruby/tokenizer_support"
15
16
  require_relative "onnx_ruby/embedder"
16
17
  require_relative "onnx_ruby/classifier"
17
18
  require_relative "onnx_ruby/reranker"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onnx-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Johannes Dwi Cahyo
@@ -96,6 +96,7 @@ files:
96
96
  - lib/onnx_ruby/session.rb
97
97
  - lib/onnx_ruby/session_pool.rb
98
98
  - lib/onnx_ruby/tensor.rb
99
+ - lib/onnx_ruby/tokenizer_support.rb
99
100
  - lib/onnx_ruby/version.rb
100
101
  - onnx-ruby.gemspec
101
102
  homepage: https://github.com/johannesdwicahyo/onnx-ruby