gliner 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40c13e552a887bd1d77fc718be8708b228c9e213c9bb49c3591b113738346bd4
4
- data.tar.gz: e6dce5cda3fcef66bb9a627ffe23c5324b9395af6bd5eeb3c45a4ea400f52c63
3
+ metadata.gz: a8e4bdddc47289f29cce2469ff6bbeecc9ef260b39f3b02f6e3e89b23e3e4e7a
4
+ data.tar.gz: b59ad90385c8da5b478f007f5715994c11f1f1a0b85617acc501acb8b17723aa
5
5
  SHA512:
6
- metadata.gz: 723f039e065cd9b86111de3d9478cf711d01efdea5a67ce426cf1bcfb4e84396abe9f9969150b743a8f0e54be5caea0401d25a1dcf191c0f09a932189f19fbed
7
- data.tar.gz: 1566d0fc8446ee1463c0dfbeb37d7eb7021ca0f0d04bdf3d83148f1e7b33eb1d2eea4ddea44dcef91bf9b11f7cd2508cdcfd9120ade6fb72ac70aa5510d22896
6
+ metadata.gz: 072cc980f4653d74da83d3cfea1b09d0cbf8e023bfb6d3829ce6879163b7dc77a170ef63ad68138eca7d892521f30eda8d0be6c72e62cbe0353732aa4542bab3
7
+ data.tar.gz: fb84732285ff71edad266533cfd513d51036c709a4c5275fdc6548a373f11ead86eb66978fd0b28522c567af52e7d2ce8111053d721ec6ddcee6546db8bf58ed
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
- # Gliner
1
+ # GLiNER
2
+ [![tests](https://github.com/elcuervo/gliner/actions/workflows/tests.yml/badge.svg)](https://github.com/elcuervo/gliner/actions/workflows/tests.yml)
3
+ ![Gem Version](https://img.shields.io/gem/v/gliner)
2
4
 
3
5
  ![](https://images.unsplash.com/photo-1625768376503-68d2495d78c5?q=80&w=2225&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D)
4
6
 
@@ -17,53 +19,48 @@ require "gliner"
17
19
 
18
20
  Gliner.configure do |config|
19
21
  config.threshold = 0.2
20
- config.model_dir = "/path/to/gliner2-multi-v1"
21
- config.model_file = "model.onnx"
22
+ # If unset, auto! downloads the default model to .cache/
23
+ # Or set a local path explicitly:
24
+ # config.model = "/path/to/gliner2-multi-v1"
25
+ config.variant = :fp16
26
+ config.auto!
22
27
  end
23
28
 
24
- Gliner.load("path/to/gliner2-multi-v1")
25
-
26
29
  text = "Apple CEO Tim Cook announced iPhone 15 in Cupertino yesterday."
27
30
  labels = ["company", "person", "product", "location"]
28
31
 
29
32
  model = Gliner[labels]
30
33
  pp model[text]
31
- ```
32
-
33
- Expected shape:
34
34
 
35
- ```ruby
36
- {"entities"=>{"company"=>["Apple"], "person"=>["Tim Cook"], "product"=>["iPhone 15"], "location"=>["Cupertino"]}}
35
+ # => {"company"=>["Apple"], "person"=>["Tim Cook"], "product"=>["iPhone 15"], "location"=>["Cupertino"]}
37
36
  ```
38
37
 
39
38
  You can also pass per-entity configs:
40
39
 
41
40
  ```ruby
42
41
  labels = {
43
- "email" => { "description" => "Email addresses", "dtype" => "list", "threshold" => 0.9 },
44
- "person" => { "description" => "Person names", "dtype" => "str" }
42
+ email: { description: "Email addresses", dtype: "list", threshold: 0.9 },
43
+ person: { description: "Person names", dtype: "str" }
45
44
  }
46
45
 
47
46
  model = Gliner[labels]
48
47
  pp model["Email John Doe at john@example.com.", threshold: 0.5]
48
+
49
+ # => {"email"=>["john@example.com"], "person"=>"John Doe"}
49
50
  ```
50
51
 
51
52
  ### Classification
52
53
 
53
54
  ```ruby
54
55
  model = Gliner.classify[
55
- { "sentiment" => %w[positive negative neutral] }
56
+ { sentiment: %w[positive negative neutral] }
56
57
  ]
57
58
 
58
59
  result = model["This laptop has amazing performance but terrible battery life!"]
59
60
 
60
61
  pp result
61
- ```
62
62
 
63
- Expected shape:
64
-
65
- ```ruby
66
- {"sentiment"=>"negative"}
63
+ # => {"sentiment"=>"negative"}
67
64
  ```
68
65
 
69
66
  ### Structured extraction
@@ -72,7 +69,7 @@ Expected shape:
72
69
  text = "iPhone 15 Pro Max with 256GB storage, A17 Pro chip, priced at $1199."
73
70
 
74
71
  structure = {
75
- "product" => [
72
+ product: [
76
73
  "name::str::Full product name and model",
77
74
  "storage::str::Storage capacity",
78
75
  "processor::str::Chip or processor information",
@@ -83,18 +80,16 @@ structure = {
83
80
  result = Gliner[structure][text]
84
81
 
85
82
  pp result
86
- ```
87
83
 
88
- Expected shape:
89
-
90
- ```ruby
91
- {"product"=>[{"name"=>"iPhone 15 Pro Max", "storage"=>"256GB", "processor"=>"A17 Pro chip", "price"=>"$1199"}]}
84
+ # => {"product"=>[{"name"=>"iPhone 15 Pro Max", "storage"=>"256GB", "processor"=>"A17 Pro", "price"=>"1199"}]}
92
85
  ```
93
86
 
94
87
  Choices can be included in field specs:
95
88
 
96
89
  ```ruby
97
- result = Gliner[{ "order" => ["status::[pending|processing|shipped]::str"] }]["Status: shipped"]
90
+ result = Gliner[{ order: ["status::[pending|processing|shipped]::str"] }]["Status: shipped"]
91
+
92
+ # => {"order"=>[{"status"=>"shipped"}]}
98
93
  ```
99
94
 
100
95
  ## Model files
@@ -102,18 +97,19 @@ result = Gliner[{ "order" => ["status::[pending|processing|shipped]::str"] }]["S
102
97
  This implementation expects a directory containing:
103
98
 
104
99
  - `tokenizer.json`
105
- - `model.onnx` or `model_int8.onnx`
100
+ - `model.onnx`, `model_fp16.onnx`, or `model_int8.onnx`
106
101
  - (optional) `config.json` with `max_width` and `max_seq_len`
107
102
 
108
103
  One publicly available ONNX export is `cuerbot/gliner2-multi-v1` on Hugging Face.
109
- By default, `model_int8.onnx` is used; set `config.model_file` or `GLINER_MODEL_FILE` to override.
104
+ By default, `model_fp16.onnx` is used; set `config.variant` (or `GLINER_MODEL_FILE`) to override.
105
+ Variants map to files as: `:fp16` → `model_fp16.onnx`, `:fp32` → `model.onnx`, `:int8` → `model_int8.onnx`.
110
106
 
111
- You can also configure the model directory in code:
107
+ You can also configure the model source directly:
112
108
 
113
109
  ```ruby
114
110
  Gliner.configure do |config|
115
- config.model_dir = "/path/to/model_dir"
116
- config.model_file = "model_int8.onnx"
111
+ config.model = "/path/to/model_dir"
112
+ config.variant = :int8
117
113
  end
118
114
  ```
119
115
 
@@ -150,7 +146,7 @@ If you omit `MODEL_DIR`, the console auto-downloads a public test model (configu
150
146
  ```bash
151
147
  rake console
152
148
  # or:
153
- GLINER_REPO_ID=cuerbot/gliner2-multi-v1 GLINER_MODEL_FILE=model_int8.onnx rake console
149
+ GLINER_REPO_ID=cuerbot/gliner2-multi-v1 GLINER_MODEL_FILE=model_fp16.onnx rake console
154
150
  ```
155
151
 
156
152
  Or:
data/bin/console CHANGED
@@ -12,13 +12,15 @@ require "httpx"
12
12
  require "irb"
13
13
 
14
14
  DEFAULT_REPO_ID = "cuerbot/gliner2-multi-v1"
15
- DEFAULT_MODEL_FILE = "model_int8.onnx"
15
+ DEFAULT_MODEL_FILE = "model_fp16.onnx"
16
+ DEFAULT_MODEL_SUBDIR = "onnx"
16
17
 
17
- def ensure_model_dir!(repo_id:, model_file:)
18
+ def ensure_model_dir!(repo_id:, model_file:, model_subdir:)
18
19
  dir = File.expand_path("../tmp/models/#{repo_id.tr('/', '__')}", __dir__)
19
20
  FileUtils.mkdir_p(dir)
20
21
 
21
22
  base = "https://huggingface.co/#{repo_id}/resolve/main"
23
+ base = "#{base}/#{model_subdir}" unless model_subdir.nil? || model_subdir.empty?
22
24
  files = ["tokenizer.json", "config.json", model_file]
23
25
 
24
26
  files.each do |file|
@@ -40,13 +42,14 @@ end
40
42
  model_dir = ARGV[0] || ENV["GLINER_MODEL_DIR"]
41
43
  repo_id = ENV["GLINER_REPO_ID"] || DEFAULT_REPO_ID
42
44
  model_file = ENV["GLINER_MODEL_FILE"] || DEFAULT_MODEL_FILE
45
+ model_subdir = ENV["GLINER_MODEL_SUBDIR"] || DEFAULT_MODEL_SUBDIR
43
46
 
44
47
  if model_dir && !model_dir.empty?
45
48
  $gliner_model = Gliner.load(model_dir, file: model_file)
46
49
  else
47
50
  begin
48
51
  require "fileutils"
49
- model_dir = ensure_model_dir!(repo_id: repo_id, model_file: model_file)
52
+ model_dir = ensure_model_dir!(repo_id: repo_id, model_file: model_file, model_subdir: model_subdir)
50
53
  $gliner_model = Gliner.load(model_dir, file: model_file)
51
54
  rescue => e
52
55
  warn "No model loaded (auto-download failed: #{e.class}: #{e.message})"
data/gliner.gemspec CHANGED
@@ -16,11 +16,13 @@ Gem::Specification.new do |spec|
16
16
  spec.files = Dir.glob('lib/**/*') + Dir.glob('bin/*') + %w[README.md LICENSE gliner.gemspec]
17
17
  spec.require_paths = ['lib']
18
18
 
19
+ spec.add_dependency 'httpx', '~> 1.0'
19
20
  spec.add_dependency 'onnxruntime', '~> 0.10'
20
21
  spec.add_dependency 'tokenizers', '~> 0.6'
21
22
 
22
- spec.add_development_dependency 'httpx', '~> 1.0'
23
23
  spec.add_development_dependency 'rake', '~> 13.0'
24
24
  spec.add_development_dependency 'rspec', '~> 3.13'
25
25
  spec.add_development_dependency 'rubocop', '~> 1.50'
26
+
27
+ spec.metadata['rubygems_mfa_required'] = 'true'
26
28
  end
@@ -5,6 +5,7 @@ require 'gliner/position_iteration'
5
5
  module Gliner
6
6
  class Classifier
7
7
  include PositionIteration
8
+
8
9
  def initialize(inference, max_width:)
9
10
  @inference = inference
10
11
  @max_width = max_width
@@ -63,6 +64,5 @@ module Gliner
63
64
  def format_label(label, score, include_confidence)
64
65
  include_confidence ? { 'label' => label, 'confidence' => score } : label
65
66
  end
66
-
67
67
  end
68
68
  end
@@ -4,12 +4,30 @@ module Gliner
4
4
  class Configuration
5
5
  DEFAULT_THRESHOLD = 0.5
6
6
 
7
- attr_accessor :threshold, :model_dir, :model_file
7
+ attr_accessor :threshold, :model
8
+ attr_reader :variant
8
9
 
9
10
  def initialize
10
11
  @threshold = DEFAULT_THRESHOLD
11
- @model_dir = nil
12
- @model_file = nil
12
+ @model = nil
13
+ @variant = :fp16
14
+ @auto = false
15
+ end
16
+
17
+ def variant=(value)
18
+ @variant = value&.to_sym
19
+ end
20
+
21
+ def auto!(value = true)
22
+ @auto = !!value
23
+ end
24
+
25
+ def auto=(value)
26
+ @auto = !!value
27
+ end
28
+
29
+ def auto?
30
+ @auto
13
31
  end
14
32
  end
15
33
  end
data/lib/gliner/model.rb CHANGED
@@ -22,7 +22,7 @@ module Gliner
22
22
  DEFAULT_MAX_WIDTH = 8
23
23
  DEFAULT_MAX_SEQ_LEN = 512
24
24
 
25
- def self.from_dir(dir, file: 'model_int8.onnx')
25
+ def self.from_dir(dir, file: 'model_fp16.onnx')
26
26
  config_path = File.join(dir, 'config.json')
27
27
  config = File.exist?(config_path) ? JSON.parse(File.read(config_path)) : {}
28
28
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gliner
4
- VERSION = '0.1.1'
4
+ VERSION = '0.2.0'
5
5
  end
data/lib/gliner.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'fileutils'
4
+ require 'httpx'
3
5
  require 'gliner/version'
4
6
  require 'gliner/configuration'
5
7
  require 'gliner/model'
@@ -9,6 +11,11 @@ require 'gliner/runners/structured_runner'
9
11
  require 'gliner/runners/classification_runner'
10
12
 
11
13
  module Gliner
14
+ HF_REPO = 'cuerbot/gliner2-multi-v1'
15
+ HF_DIR = 'onnx'
16
+
17
+ DEFAULT_MODEL_BASE = "https://huggingface.co/#{HF_REPO}/resolve/main/#{HF_DIR}".freeze
18
+
12
19
  Error = Class.new(StandardError)
13
20
 
14
21
  PreparedInput = Data.define(
@@ -41,11 +48,13 @@ module Gliner
41
48
  end
42
49
 
43
50
  class << self
44
- attr_writer :model
45
- attr_writer :config
51
+ attr_writer :model, :config
46
52
 
47
53
  def configure
48
54
  yield(config)
55
+
56
+ reset_model!
57
+ apply_model_source!
49
58
  end
50
59
 
51
60
  def config
@@ -53,53 +62,48 @@ module Gliner
53
62
  end
54
63
 
55
64
  def load(dir, file: nil)
56
- file ||= ENV['GLINER_MODEL_FILE']
57
- file ||= config.model_file
65
+ file ||= ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
58
66
 
59
- self.model = Model.from_dir(dir, file: file || 'model_int8.onnx')
67
+ self.model = Model.from_dir(dir, file: file)
60
68
  end
61
69
 
62
70
  def model
63
71
  @model ||= model_from_config || model_from_env
64
72
  end
65
73
 
66
- def model!
67
- fetch_model!
68
- end
69
-
70
74
  def [](config)
71
- runner_for(config).new(fetch_model!, config)
75
+ runner_for(config).new(model!, config)
72
76
  end
73
77
 
74
78
  def classify
75
79
  Runners::ClassificationRunner
76
80
  end
77
81
 
82
+ def model!
83
+ model = self.model
84
+
85
+ return model if model
86
+
87
+ raise Error, 'No model loaded. Call Gliner.load("/path/to/model"), set config.model, or set GLINER_MODEL_DIR.'
88
+ end
89
+
78
90
  private
79
91
 
80
92
  def model_from_config
81
- dir = config.model_dir
82
- return nil if dir.nil? || dir.empty?
93
+ source = config.model
94
+ return nil if source.nil?
83
95
 
84
- file = config.model_file
85
- return Model.from_dir(dir) if file.nil? || file.empty?
86
-
87
- Model.from_dir(dir, file: file)
96
+ file = model_file_for_variant(config.variant)
97
+ Model.from_dir(source, file: file)
88
98
  end
89
99
 
90
100
  def model_from_env
91
101
  dir = ENV.fetch('GLINER_MODEL_DIR', nil)
92
- return nil if dir.nil? || dir.empty?
102
+ return if dir.nil?
93
103
 
94
- file = ENV['GLINER_MODEL_FILE'] || 'model_int8.onnx'
95
- Model.from_dir(dir, file: file)
96
- end
104
+ file = ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
97
105
 
98
- def fetch_model!
99
- model = self.model
100
- return model if model
101
-
102
- raise Error, 'No model loaded. Call Gliner.load("/path/to/model"), set config.model_dir, or set GLINER_MODEL_DIR.'
106
+ Model.from_dir(dir, file: file)
103
107
  end
104
108
 
105
109
  def runner_for(config)
@@ -112,9 +116,53 @@ module Gliner
112
116
  return false unless config.is_a?(Hash)
113
117
 
114
118
  keys = config.transform_keys(&:to_s)
119
+
115
120
  return true if keys.key?('name') && keys.key?('fields')
116
121
 
117
122
  config.values.all? { |value| value.is_a?(Array) }
118
123
  end
124
+
125
+ def reset_model!
126
+ @model = nil
127
+ end
128
+
129
+ def apply_model_source!
130
+ return unless config.auto?
131
+
132
+ source = config.model
133
+ return unless source.nil? || source.empty?
134
+
135
+ config.model = download_default_model
136
+ end
137
+
138
+ def download_default_model
139
+ model_file = model_file_for_variant(config.variant)
140
+ root = File.expand_path('..', __dir__)
141
+ dir = File.join(root, '.cache', 'models', HF_REPO.tr('/', '__'))
142
+
143
+ FileUtils.mkdir_p(dir)
144
+
145
+ files = ['tokenizer.json', 'config.json', model_file]
146
+ client = HTTPX.plugin(:follow_redirects)
147
+
148
+ files.each do |file|
149
+ response = client.get("#{DEFAULT_MODEL_BASE}/#{file}")
150
+ raise Error, "Download failed: #{file}" if response.error?
151
+
152
+ File.binwrite(File.join(dir, file), response.body.to_s)
153
+ end
154
+
155
+ dir
156
+ end
157
+
158
+ def model_file_for_variant(variant = :fp16)
159
+ case variant.to_sym
160
+ when :fp16 then 'model_fp16.onnx'
161
+ when :fp32 then 'model.onnx'
162
+ when :int8 then 'model_int8.onnx'
163
+ else
164
+ raise Error, "Unknown model variant: #{variant.inspect}"
165
+ end
166
+ end
119
167
  end
120
168
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gliner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - elcuervo
@@ -10,47 +10,47 @@ cert_chain: []
10
10
  date: 1980-01-01 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
- name: onnxruntime
13
+ name: httpx
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '0.10'
18
+ version: '1.0'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - "~>"
24
24
  - !ruby/object:Gem::Version
25
- version: '0.10'
25
+ version: '1.0'
26
26
  - !ruby/object:Gem::Dependency
27
- name: tokenizers
27
+ name: onnxruntime
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: '0.6'
32
+ version: '0.10'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '0.6'
39
+ version: '0.10'
40
40
  - !ruby/object:Gem::Dependency
41
- name: httpx
41
+ name: tokenizers
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.0'
47
- type: :development
46
+ version: '0.6'
47
+ type: :runtime
48
48
  prerelease: false
49
49
  version_requirements: !ruby/object:Gem::Requirement
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '1.0'
53
+ version: '0.6'
54
54
  - !ruby/object:Gem::Dependency
55
55
  name: rake
56
56
  requirement: !ruby/object:Gem::Requirement
@@ -130,7 +130,8 @@ files:
130
130
  homepage: https://github.com/elcuervo/gliner
131
131
  licenses:
132
132
  - MIT
133
- metadata: {}
133
+ metadata:
134
+ rubygems_mfa_required: 'true'
134
135
  rdoc_options: []
135
136
  require_paths:
136
137
  - lib
@@ -145,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
146
  - !ruby/object:Gem::Version
146
147
  version: '0'
147
148
  requirements: []
148
- rubygems_version: 3.7.2
149
+ rubygems_version: 3.6.9
149
150
  specification_version: 4
150
151
  summary: Schema-based information extraction (GLiNER2) via ONNX Runtime
151
152
  test_files: []