gliner 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -31
- data/bin/console +6 -36
- data/gliner.gemspec +4 -1
- data/lib/gliner/classifier.rb +1 -1
- data/lib/gliner/configuration.rb +21 -3
- data/lib/gliner/model.rb +1 -1
- data/lib/gliner/runners/classification_runner.rb +7 -0
- data/lib/gliner/runners/entity_runner.rb +9 -0
- data/lib/gliner/runners/inspectable.rb +13 -0
- data/lib/gliner/runners/structured_runner.rb +5 -0
- data/lib/gliner/version.rb +1 -1
- data/lib/gliner.rb +97 -27
- metadata +25 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 749c6a152abe40b2735e56724d01bcb9df57df3ed883f7b7a7a735c7d4dbe613
|
|
4
|
+
data.tar.gz: dd8add045f98be4876bea92faadd68b2da4a64613502c710c3a6769d224016bd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 996bd63e35f9302f1a01a78d92d39ca47013deb42e58ff3c332ac260c2351f172d5a1897bdb3f673a4a1cf1f63c2ef1b1a7c04c2025ff94be6eb71548c79bf97
|
|
7
|
+
data.tar.gz: 246d3939daa2ca82df524ae6a89dbd734792d5c6952857d517634f50cc59176f26fd1bef15677b655b1c2f9cb1b35c5cb700bdbbc764d6949b6b941b7c83e77f
|
data/README.md
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
#
|
|
1
|
+
# GLiNER
|
|
2
|
+
[](https://github.com/elcuervo/gliner/actions/workflows/tests.yml)
|
|
3
|
+

|
|
2
4
|
|
|
3
5
|

|
|
4
6
|
|
|
@@ -17,53 +19,47 @@ require "gliner"
|
|
|
17
19
|
|
|
18
20
|
Gliner.configure do |config|
|
|
19
21
|
config.threshold = 0.2
|
|
20
|
-
|
|
21
|
-
|
|
22
|
+
# By default, the gem downloads the default model to .cache/
|
|
23
|
+
# Or set a local path explicitly:
|
|
24
|
+
# config.model = "/path/to/gliner2-multi-v1"
|
|
25
|
+
config.variant = :fp16
|
|
22
26
|
end
|
|
23
27
|
|
|
24
|
-
Gliner.load("path/to/gliner2-multi-v1")
|
|
25
|
-
|
|
26
28
|
text = "Apple CEO Tim Cook announced iPhone 15 in Cupertino yesterday."
|
|
27
29
|
labels = ["company", "person", "product", "location"]
|
|
28
30
|
|
|
29
31
|
model = Gliner[labels]
|
|
30
32
|
pp model[text]
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Expected shape:
|
|
34
33
|
|
|
35
|
-
|
|
36
|
-
{"entities"=>{"company"=>["Apple"], "person"=>["Tim Cook"], "product"=>["iPhone 15"], "location"=>["Cupertino"]}}
|
|
34
|
+
# => {"company"=>["Apple"], "person"=>["Tim Cook"], "product"=>["iPhone 15"], "location"=>["Cupertino"]}
|
|
37
35
|
```
|
|
38
36
|
|
|
39
37
|
You can also pass per-entity configs:
|
|
40
38
|
|
|
41
39
|
```ruby
|
|
42
40
|
labels = {
|
|
43
|
-
|
|
44
|
-
|
|
41
|
+
email: { description: "Email addresses", dtype: "list", threshold: 0.9 },
|
|
42
|
+
person: { description: "Person names", dtype: "str" }
|
|
45
43
|
}
|
|
46
44
|
|
|
47
45
|
model = Gliner[labels]
|
|
48
46
|
pp model["Email John Doe at john@example.com.", threshold: 0.5]
|
|
47
|
+
|
|
48
|
+
# => {"email"=>["john@example.com"], "person"=>"John Doe"}
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
### Classification
|
|
52
52
|
|
|
53
53
|
```ruby
|
|
54
54
|
model = Gliner.classify[
|
|
55
|
-
{
|
|
55
|
+
{ sentiment: %w[positive negative neutral] }
|
|
56
56
|
]
|
|
57
57
|
|
|
58
58
|
result = model["This laptop has amazing performance but terrible battery life!"]
|
|
59
59
|
|
|
60
60
|
pp result
|
|
61
|
-
```
|
|
62
61
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
```ruby
|
|
66
|
-
{"sentiment"=>"negative"}
|
|
62
|
+
# => {"sentiment"=>"negative"}
|
|
67
63
|
```
|
|
68
64
|
|
|
69
65
|
### Structured extraction
|
|
@@ -72,7 +68,7 @@ Expected shape:
|
|
|
72
68
|
text = "iPhone 15 Pro Max with 256GB storage, A17 Pro chip, priced at $1199."
|
|
73
69
|
|
|
74
70
|
structure = {
|
|
75
|
-
|
|
71
|
+
product: [
|
|
76
72
|
"name::str::Full product name and model",
|
|
77
73
|
"storage::str::Storage capacity",
|
|
78
74
|
"processor::str::Chip or processor information",
|
|
@@ -83,18 +79,16 @@ structure = {
|
|
|
83
79
|
result = Gliner[structure][text]
|
|
84
80
|
|
|
85
81
|
pp result
|
|
86
|
-
```
|
|
87
82
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
```ruby
|
|
91
|
-
{"product"=>[{"name"=>"iPhone 15 Pro Max", "storage"=>"256GB", "processor"=>"A17 Pro chip", "price"=>"$1199"}]}
|
|
83
|
+
# => {"product"=>[{"name"=>"iPhone 15 Pro Max", "storage"=>"256GB", "processor"=>"A17 Pro", "price"=>"1199"}]}
|
|
92
84
|
```
|
|
93
85
|
|
|
94
86
|
Choices can be included in field specs:
|
|
95
87
|
|
|
96
88
|
```ruby
|
|
97
|
-
result = Gliner[{
|
|
89
|
+
result = Gliner[{ order: ["status::[pending|processing|shipped]::str"] }]["Status: shipped"]
|
|
90
|
+
|
|
91
|
+
# => {"order"=>[{"status"=>"shipped"}]}
|
|
98
92
|
```
|
|
99
93
|
|
|
100
94
|
## Model files
|
|
@@ -102,18 +96,19 @@ result = Gliner[{ "order" => ["status::[pending|processing|shipped]::str"] }]["S
|
|
|
102
96
|
This implementation expects a directory containing:
|
|
103
97
|
|
|
104
98
|
- `tokenizer.json`
|
|
105
|
-
- `model.onnx` or `model_int8.onnx`
|
|
99
|
+
- `model.onnx`, `model_fp16.onnx`, or `model_int8.onnx`
|
|
106
100
|
- (optional) `config.json` with `max_width` and `max_seq_len`
|
|
107
101
|
|
|
108
102
|
One publicly available ONNX export is `cuerbot/gliner2-multi-v1` on Hugging Face.
|
|
109
|
-
By default, `
|
|
103
|
+
By default, `model_fp16.onnx` is used; set `config.variant` (or `GLINER_MODEL_FILE`) to override.
|
|
104
|
+
Variants map to files as: `:fp16` → `model_fp16.onnx`, `:fp32` → `model.onnx`, `:int8` → `model_int8.onnx`.
|
|
110
105
|
|
|
111
|
-
You can also configure the model
|
|
106
|
+
You can also configure the model source directly:
|
|
112
107
|
|
|
113
108
|
```ruby
|
|
114
109
|
Gliner.configure do |config|
|
|
115
|
-
config.
|
|
116
|
-
config.
|
|
110
|
+
config.model = "/path/to/model_dir"
|
|
111
|
+
config.variant = :int8
|
|
117
112
|
end
|
|
118
113
|
```
|
|
119
114
|
|
|
@@ -150,7 +145,7 @@ If you omit `MODEL_DIR`, the console auto-downloads a public test model (configu
|
|
|
150
145
|
```bash
|
|
151
146
|
rake console
|
|
152
147
|
# or:
|
|
153
|
-
GLINER_REPO_ID=cuerbot/gliner2-multi-v1 GLINER_MODEL_FILE=
|
|
148
|
+
GLINER_REPO_ID=cuerbot/gliner2-multi-v1 GLINER_MODEL_FILE=model_fp16.onnx rake console
|
|
154
149
|
```
|
|
155
150
|
|
|
156
151
|
Or:
|
data/bin/console
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
|
|
2
3
|
# frozen_string_literal: true
|
|
3
4
|
|
|
4
5
|
begin
|
|
@@ -7,50 +8,20 @@ rescue LoadError
|
|
|
7
8
|
end
|
|
8
9
|
|
|
9
10
|
require "gliner"
|
|
10
|
-
require "fileutils"
|
|
11
|
-
require "httpx"
|
|
12
11
|
require "irb"
|
|
13
12
|
|
|
14
|
-
DEFAULT_REPO_ID = "cuerbot/gliner2-multi-v1"
|
|
15
|
-
DEFAULT_MODEL_FILE = "model_int8.onnx"
|
|
16
|
-
|
|
17
|
-
def ensure_model_dir!(repo_id:, model_file:)
|
|
18
|
-
dir = File.expand_path("../tmp/models/#{repo_id.tr('/', '__')}", __dir__)
|
|
19
|
-
FileUtils.mkdir_p(dir)
|
|
20
|
-
|
|
21
|
-
base = "https://huggingface.co/#{repo_id}/resolve/main"
|
|
22
|
-
files = ["tokenizer.json", "config.json", model_file]
|
|
23
|
-
|
|
24
|
-
files.each do |file|
|
|
25
|
-
dest = File.join(dir, file)
|
|
26
|
-
next if File.exist?(dest) && File.size?(dest)
|
|
27
|
-
download("#{base}/#{file}", dest)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
dir
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def download(url, dest)
|
|
34
|
-
response = HTTPX.get(url)
|
|
35
|
-
raise "Download failed: #{url} (status: #{response.status})" unless response.status.between?(200, 299)
|
|
36
|
-
|
|
37
|
-
File.binwrite(dest, response.body.to_s)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
13
|
model_dir = ARGV[0] || ENV["GLINER_MODEL_DIR"]
|
|
41
|
-
|
|
42
|
-
model_file = ENV["GLINER_MODEL_FILE"] || DEFAULT_MODEL_FILE
|
|
14
|
+
model_file = ENV["GLINER_MODEL_FILE"]
|
|
43
15
|
|
|
44
16
|
if model_dir && !model_dir.empty?
|
|
45
|
-
$gliner_model = Gliner.load(model_dir, file: model_file)
|
|
17
|
+
$gliner_model = model_file ? Gliner.load(model_dir, file: model_file) : Gliner.load(model_dir)
|
|
46
18
|
else
|
|
47
19
|
begin
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
$gliner_model = Gliner.load(model_dir, file: model_file)
|
|
20
|
+
Gliner.configure { |config| config.auto = true }
|
|
21
|
+
$gliner_model = Gliner.model
|
|
51
22
|
rescue => e
|
|
52
23
|
warn "No model loaded (auto-download failed: #{e.class}: #{e.message})"
|
|
53
|
-
warn "Set GLINER_MODEL_DIR to a local model dir
|
|
24
|
+
warn "Set GLINER_MODEL_DIR or configure Gliner.config.model to a local model dir."
|
|
54
25
|
end
|
|
55
26
|
end
|
|
56
27
|
|
|
@@ -76,6 +47,5 @@ puts "- helper: gliner_classify(text, tasks)"
|
|
|
76
47
|
puts "- helper: gliner_extract_json(text, structures)"
|
|
77
48
|
puts "- model variable: $gliner_model"
|
|
78
49
|
puts "- model dir: #{model_dir.inspect}"
|
|
79
|
-
puts "- auto-download env: GLINER_REPO_ID=#{repo_id.inspect} GLINER_MODEL_FILE=#{model_file.inspect}" unless $gliner_model
|
|
80
50
|
|
|
81
51
|
IRB.start(__FILE__)
|
data/gliner.gemspec
CHANGED
|
@@ -16,11 +16,14 @@ Gem::Specification.new do |spec|
|
|
|
16
16
|
spec.files = Dir.glob('lib/**/*') + Dir.glob('bin/*') + %w[README.md LICENSE gliner.gemspec]
|
|
17
17
|
spec.require_paths = ['lib']
|
|
18
18
|
|
|
19
|
+
spec.add_dependency 'httpx', '~> 1.0'
|
|
19
20
|
spec.add_dependency 'onnxruntime', '~> 0.10'
|
|
20
21
|
spec.add_dependency 'tokenizers', '~> 0.6'
|
|
21
22
|
|
|
22
|
-
spec.add_development_dependency 'httpx', '~> 1.0'
|
|
23
23
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
24
|
+
spec.add_development_dependency 'irb', '~> 1.16.0'
|
|
24
25
|
spec.add_development_dependency 'rspec', '~> 3.13'
|
|
25
26
|
spec.add_development_dependency 'rubocop', '~> 1.50'
|
|
27
|
+
|
|
28
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
26
29
|
end
|
data/lib/gliner/classifier.rb
CHANGED
|
@@ -5,6 +5,7 @@ require 'gliner/position_iteration'
|
|
|
5
5
|
module Gliner
|
|
6
6
|
class Classifier
|
|
7
7
|
include PositionIteration
|
|
8
|
+
|
|
8
9
|
def initialize(inference, max_width:)
|
|
9
10
|
@inference = inference
|
|
10
11
|
@max_width = max_width
|
|
@@ -63,6 +64,5 @@ module Gliner
|
|
|
63
64
|
def format_label(label, score, include_confidence)
|
|
64
65
|
include_confidence ? { 'label' => label, 'confidence' => score } : label
|
|
65
66
|
end
|
|
66
|
-
|
|
67
67
|
end
|
|
68
68
|
end
|
data/lib/gliner/configuration.rb
CHANGED
|
@@ -4,12 +4,30 @@ module Gliner
|
|
|
4
4
|
class Configuration
|
|
5
5
|
DEFAULT_THRESHOLD = 0.5
|
|
6
6
|
|
|
7
|
-
attr_accessor :threshold, :
|
|
7
|
+
attr_accessor :threshold, :model
|
|
8
|
+
attr_reader :variant
|
|
8
9
|
|
|
9
10
|
def initialize
|
|
10
11
|
@threshold = DEFAULT_THRESHOLD
|
|
11
|
-
@
|
|
12
|
-
@
|
|
12
|
+
@model = nil
|
|
13
|
+
@variant = :fp16
|
|
14
|
+
@auto = true
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def variant=(value)
|
|
18
|
+
@variant = value&.to_sym
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def auto!(value = true)
|
|
22
|
+
@auto = !!value
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def auto=(value)
|
|
26
|
+
@auto = !!value
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def auto?
|
|
30
|
+
@auto
|
|
13
31
|
end
|
|
14
32
|
end
|
|
15
33
|
end
|
data/lib/gliner/model.rb
CHANGED
|
@@ -22,7 +22,7 @@ module Gliner
|
|
|
22
22
|
DEFAULT_MAX_WIDTH = 8
|
|
23
23
|
DEFAULT_MAX_SEQ_LEN = 512
|
|
24
24
|
|
|
25
|
-
def self.from_dir(dir, file: '
|
|
25
|
+
def self.from_dir(dir, file: 'model_fp16.onnx')
|
|
26
26
|
config_path = File.join(dir, 'config.json')
|
|
27
27
|
config = File.exist?(config_path) ? JSON.parse(File.read(config_path)) : {}
|
|
28
28
|
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class ClassificationRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def self.[](tasks)
|
|
7
9
|
new(Gliner.model!, tasks)
|
|
8
10
|
end
|
|
@@ -21,6 +23,11 @@ module Gliner
|
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
alias call []
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def inspect_label = 'Classification'
|
|
30
|
+
def inspect_items = @tasks.keys
|
|
24
31
|
end
|
|
25
32
|
end
|
|
26
33
|
end
|
|
@@ -3,8 +3,12 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class EntityRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def initialize(model, config)
|
|
7
9
|
parsed = model.entity_task.parse_config(config)
|
|
10
|
+
|
|
11
|
+
@labels = parsed[:labels]
|
|
8
12
|
@task = PreparedTask.new(model.entity_task, parsed)
|
|
9
13
|
end
|
|
10
14
|
|
|
@@ -14,6 +18,11 @@ module Gliner
|
|
|
14
18
|
end
|
|
15
19
|
|
|
16
20
|
alias call []
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def inspect_label = 'Entity'
|
|
25
|
+
def inspect_items = @labels
|
|
17
26
|
end
|
|
18
27
|
end
|
|
19
28
|
end
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class StructuredRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def initialize(model, config)
|
|
7
9
|
@tasks = build_tasks(model, config)
|
|
8
10
|
end
|
|
@@ -17,6 +19,9 @@ module Gliner
|
|
|
17
19
|
|
|
18
20
|
private
|
|
19
21
|
|
|
22
|
+
def inspect_label = 'Structure'
|
|
23
|
+
def inspect_items = @tasks.keys
|
|
24
|
+
|
|
20
25
|
def build_tasks(model, config)
|
|
21
26
|
raise Error, 'structures must be a Hash' unless config.is_a?(Hash)
|
|
22
27
|
|
data/lib/gliner/version.rb
CHANGED
data/lib/gliner.rb
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
require 'httpx'
|
|
3
5
|
require 'gliner/version'
|
|
4
6
|
require 'gliner/configuration'
|
|
5
7
|
require 'gliner/model'
|
|
6
8
|
require 'gliner/runners/prepared_task'
|
|
9
|
+
require 'gliner/runners/inspectable'
|
|
7
10
|
require 'gliner/runners/entity_runner'
|
|
8
11
|
require 'gliner/runners/structured_runner'
|
|
9
12
|
require 'gliner/runners/classification_runner'
|
|
10
13
|
|
|
11
14
|
module Gliner
|
|
15
|
+
HF_REPO = 'cuerbot/gliner2-multi-v1'
|
|
16
|
+
HF_DIR = 'onnx'
|
|
17
|
+
|
|
18
|
+
DEFAULT_MODEL_BASE = "https://huggingface.co/#{HF_REPO}/resolve/main/#{HF_DIR}".freeze
|
|
19
|
+
|
|
12
20
|
Error = Class.new(StandardError)
|
|
13
21
|
|
|
14
22
|
PreparedInput = Data.define(
|
|
@@ -41,11 +49,13 @@ module Gliner
|
|
|
41
49
|
end
|
|
42
50
|
|
|
43
51
|
class << self
|
|
44
|
-
attr_writer :model
|
|
45
|
-
attr_writer :config
|
|
52
|
+
attr_writer :model, :config
|
|
46
53
|
|
|
47
54
|
def configure
|
|
48
55
|
yield(config)
|
|
56
|
+
|
|
57
|
+
reset_model!
|
|
58
|
+
apply_model_source!
|
|
49
59
|
end
|
|
50
60
|
|
|
51
61
|
def config
|
|
@@ -53,53 +63,51 @@ module Gliner
|
|
|
53
63
|
end
|
|
54
64
|
|
|
55
65
|
def load(dir, file: nil)
|
|
56
|
-
file ||= ENV['GLINER_MODEL_FILE']
|
|
57
|
-
file ||= config.model_file
|
|
66
|
+
file ||= ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
|
|
58
67
|
|
|
59
|
-
self.model = Model.from_dir(dir, file: file
|
|
68
|
+
self.model = Model.from_dir(dir, file: file)
|
|
60
69
|
end
|
|
61
70
|
|
|
62
71
|
def model
|
|
63
|
-
@model ||=
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
fetch_model!
|
|
72
|
+
@model ||= begin
|
|
73
|
+
apply_model_source!
|
|
74
|
+
model_from_config || model_from_env
|
|
75
|
+
end
|
|
68
76
|
end
|
|
69
77
|
|
|
70
78
|
def [](config)
|
|
71
|
-
runner_for(config).new(
|
|
79
|
+
runner_for(config).new(model!, config)
|
|
72
80
|
end
|
|
73
81
|
|
|
74
82
|
def classify
|
|
75
83
|
Runners::ClassificationRunner
|
|
76
84
|
end
|
|
77
85
|
|
|
86
|
+
def model!
|
|
87
|
+
model = self.model
|
|
88
|
+
|
|
89
|
+
return model if model
|
|
90
|
+
|
|
91
|
+
raise Error, 'No model loaded. Call Gliner.load("/path/to/model"), set config.model, or set GLINER_MODEL_DIR.'
|
|
92
|
+
end
|
|
93
|
+
|
|
78
94
|
private
|
|
79
95
|
|
|
80
96
|
def model_from_config
|
|
81
|
-
|
|
82
|
-
return nil if
|
|
83
|
-
|
|
84
|
-
file = config.model_file
|
|
85
|
-
return Model.from_dir(dir) if file.nil? || file.empty?
|
|
97
|
+
source = config.model
|
|
98
|
+
return nil if source.nil?
|
|
86
99
|
|
|
87
|
-
|
|
100
|
+
file = model_file_for_variant(config.variant)
|
|
101
|
+
Model.from_dir(source, file: file)
|
|
88
102
|
end
|
|
89
103
|
|
|
90
104
|
def model_from_env
|
|
91
|
-
dir =
|
|
92
|
-
return
|
|
105
|
+
dir = env_model_dir
|
|
106
|
+
return if dir.nil?
|
|
93
107
|
|
|
94
|
-
file = ENV['GLINER_MODEL_FILE'] ||
|
|
95
|
-
Model.from_dir(dir, file: file)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def fetch_model!
|
|
99
|
-
model = self.model
|
|
100
|
-
return model if model
|
|
108
|
+
file = ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
|
|
101
109
|
|
|
102
|
-
|
|
110
|
+
Model.from_dir(dir, file: file)
|
|
103
111
|
end
|
|
104
112
|
|
|
105
113
|
def runner_for(config)
|
|
@@ -112,9 +120,71 @@ module Gliner
|
|
|
112
120
|
return false unless config.is_a?(Hash)
|
|
113
121
|
|
|
114
122
|
keys = config.transform_keys(&:to_s)
|
|
123
|
+
|
|
115
124
|
return true if keys.key?('name') && keys.key?('fields')
|
|
116
125
|
|
|
117
126
|
config.values.all? { |value| value.is_a?(Array) }
|
|
118
127
|
end
|
|
128
|
+
|
|
129
|
+
def reset_model!
|
|
130
|
+
@model = nil
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def apply_model_source!
|
|
134
|
+
return unless config.auto?
|
|
135
|
+
|
|
136
|
+
source = config.model
|
|
137
|
+
return unless source.nil?
|
|
138
|
+
return if env_model_dir
|
|
139
|
+
|
|
140
|
+
config.model = download_default_model
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def download_default_model
|
|
144
|
+
model_file = model_file_for_variant(config.variant)
|
|
145
|
+
root = File.expand_path('..', __dir__)
|
|
146
|
+
dir = File.join(root, '.cache', 'models', HF_REPO.tr('/', '__'))
|
|
147
|
+
|
|
148
|
+
FileUtils.mkdir_p(dir)
|
|
149
|
+
|
|
150
|
+
files = ['tokenizer.json', 'config.json', model_file]
|
|
151
|
+
client = HTTPX.plugin(:follow_redirects).with(max_redirects: 5)
|
|
152
|
+
|
|
153
|
+
files.each do |file|
|
|
154
|
+
dest = File.join(dir, file)
|
|
155
|
+
next if File.exist?(dest) && File.size?(dest)
|
|
156
|
+
download_file!(client, "#{DEFAULT_MODEL_BASE}/#{file}", dest)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
dir
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def download_file!(client, url, dest)
|
|
163
|
+
response = client.get(url)
|
|
164
|
+
status = response.status
|
|
165
|
+
|
|
166
|
+
unless status && status.between?(200, 299)
|
|
167
|
+
raise Error, "Download failed: #{url} (status: #{status || 'unknown'})"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
File.binwrite(dest, response.body.to_s)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def env_model_dir
|
|
174
|
+
dir = ENV.fetch('GLINER_MODEL_DIR', nil)
|
|
175
|
+
return nil if dir.nil? || dir.empty?
|
|
176
|
+
|
|
177
|
+
dir
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def model_file_for_variant(variant = :fp16)
|
|
181
|
+
case variant.to_sym
|
|
182
|
+
when :fp16 then 'model_fp16.onnx'
|
|
183
|
+
when :fp32 then 'model.onnx'
|
|
184
|
+
when :int8 then 'model_int8.onnx'
|
|
185
|
+
else
|
|
186
|
+
raise Error, "Unknown model variant: #{variant.inspect}"
|
|
187
|
+
end
|
|
188
|
+
end
|
|
119
189
|
end
|
|
120
190
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: gliner
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- elcuervo
|
|
@@ -9,6 +9,20 @@ bindir: bin
|
|
|
9
9
|
cert_chain: []
|
|
10
10
|
date: 1980-01-01 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: httpx
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '1.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '1.0'
|
|
12
26
|
- !ruby/object:Gem::Dependency
|
|
13
27
|
name: onnxruntime
|
|
14
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -38,33 +52,33 @@ dependencies:
|
|
|
38
52
|
- !ruby/object:Gem::Version
|
|
39
53
|
version: '0.6'
|
|
40
54
|
- !ruby/object:Gem::Dependency
|
|
41
|
-
name:
|
|
55
|
+
name: rake
|
|
42
56
|
requirement: !ruby/object:Gem::Requirement
|
|
43
57
|
requirements:
|
|
44
58
|
- - "~>"
|
|
45
59
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: '
|
|
60
|
+
version: '13.0'
|
|
47
61
|
type: :development
|
|
48
62
|
prerelease: false
|
|
49
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
64
|
requirements:
|
|
51
65
|
- - "~>"
|
|
52
66
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '
|
|
67
|
+
version: '13.0'
|
|
54
68
|
- !ruby/object:Gem::Dependency
|
|
55
|
-
name:
|
|
69
|
+
name: irb
|
|
56
70
|
requirement: !ruby/object:Gem::Requirement
|
|
57
71
|
requirements:
|
|
58
72
|
- - "~>"
|
|
59
73
|
- !ruby/object:Gem::Version
|
|
60
|
-
version:
|
|
74
|
+
version: 1.16.0
|
|
61
75
|
type: :development
|
|
62
76
|
prerelease: false
|
|
63
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
64
78
|
requirements:
|
|
65
79
|
- - "~>"
|
|
66
80
|
- !ruby/object:Gem::Version
|
|
67
|
-
version:
|
|
81
|
+
version: 1.16.0
|
|
68
82
|
- !ruby/object:Gem::Dependency
|
|
69
83
|
name: rspec
|
|
70
84
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -117,6 +131,7 @@ files:
|
|
|
117
131
|
- lib/gliner/position_iteration.rb
|
|
118
132
|
- lib/gliner/runners/classification_runner.rb
|
|
119
133
|
- lib/gliner/runners/entity_runner.rb
|
|
134
|
+
- lib/gliner/runners/inspectable.rb
|
|
120
135
|
- lib/gliner/runners/prepared_task.rb
|
|
121
136
|
- lib/gliner/runners/structured_runner.rb
|
|
122
137
|
- lib/gliner/span_extractor.rb
|
|
@@ -130,7 +145,8 @@ files:
|
|
|
130
145
|
homepage: https://github.com/elcuervo/gliner
|
|
131
146
|
licenses:
|
|
132
147
|
- MIT
|
|
133
|
-
metadata:
|
|
148
|
+
metadata:
|
|
149
|
+
rubygems_mfa_required: 'true'
|
|
134
150
|
rdoc_options: []
|
|
135
151
|
require_paths:
|
|
136
152
|
- lib
|
|
@@ -145,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
145
161
|
- !ruby/object:Gem::Version
|
|
146
162
|
version: '0'
|
|
147
163
|
requirements: []
|
|
148
|
-
rubygems_version: 3.
|
|
164
|
+
rubygems_version: 3.6.9
|
|
149
165
|
specification_version: 4
|
|
150
166
|
summary: Schema-based information extraction (GLiNER2) via ONNX Runtime
|
|
151
167
|
test_files: []
|