gliner 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -2
- data/bin/console +7 -39
- data/gliner.gemspec +2 -0
- data/lib/gliner/configuration.rb +1 -1
- data/lib/gliner/runners/classification_runner.rb +7 -0
- data/lib/gliner/runners/entity_runner.rb +9 -0
- data/lib/gliner/runners/inspectable.rb +13 -0
- data/lib/gliner/runners/structured_runner.rb +5 -0
- data/lib/gliner/version.rb +1 -1
- data/lib/gliner.rb +29 -9
- metadata +30 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 81732c1ce09e933b7e2f57291ee6c9c0c9a8442f7a8488667d324a7b25d2981f
|
|
4
|
+
data.tar.gz: 59cc5210b5915c7e195d99a3c2d80d613f3d9e3aaa9c9ddb58d4311b585bf923
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 17e88cb80220df06929d6642c92373e88a1628aaee83eb3c8175ea1c5454dc790bb262ea18565d314e533c49082aace8c85b78325aae3ddffa5a5d68188451f7
|
|
7
|
+
data.tar.gz: 1cafd31a6b5d1c92b1088d3584457f4231853485eb0ba8728155fde333b9f99e5d7b1ec7414ddcf6a9f7547f991ccf98c37564cf9b95a9b1b2ab77f1e11ee68a
|
data/README.md
CHANGED
|
@@ -19,11 +19,10 @@ require "gliner"
|
|
|
19
19
|
|
|
20
20
|
Gliner.configure do |config|
|
|
21
21
|
config.threshold = 0.2
|
|
22
|
-
#
|
|
22
|
+
# By default, the gem downloads the default model to .cache/
|
|
23
23
|
# Or set a local path explicitly:
|
|
24
24
|
# config.model = "/path/to/gliner2-multi-v1"
|
|
25
25
|
config.variant = :fp16
|
|
26
|
-
config.auto!
|
|
27
26
|
end
|
|
28
27
|
|
|
29
28
|
text = "Apple CEO Tim Cook announced iPhone 15 in Cupertino yesterday."
|
data/bin/console
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
|
|
2
3
|
# frozen_string_literal: true
|
|
3
4
|
|
|
4
5
|
begin
|
|
@@ -7,53 +8,21 @@ rescue LoadError
|
|
|
7
8
|
end
|
|
8
9
|
|
|
9
10
|
require "gliner"
|
|
10
|
-
require "fileutils"
|
|
11
|
-
require "httpx"
|
|
12
11
|
require "irb"
|
|
13
|
-
|
|
14
|
-
DEFAULT_REPO_ID = "cuerbot/gliner2-multi-v1"
|
|
15
|
-
DEFAULT_MODEL_FILE = "model_fp16.onnx"
|
|
16
|
-
DEFAULT_MODEL_SUBDIR = "onnx"
|
|
17
|
-
|
|
18
|
-
def ensure_model_dir!(repo_id:, model_file:, model_subdir:)
|
|
19
|
-
dir = File.expand_path("../tmp/models/#{repo_id.tr('/', '__')}", __dir__)
|
|
20
|
-
FileUtils.mkdir_p(dir)
|
|
21
|
-
|
|
22
|
-
base = "https://huggingface.co/#{repo_id}/resolve/main"
|
|
23
|
-
base = "#{base}/#{model_subdir}" unless model_subdir.nil? || model_subdir.empty?
|
|
24
|
-
files = ["tokenizer.json", "config.json", model_file]
|
|
25
|
-
|
|
26
|
-
files.each do |file|
|
|
27
|
-
dest = File.join(dir, file)
|
|
28
|
-
next if File.exist?(dest) && File.size?(dest)
|
|
29
|
-
download("#{base}/#{file}", dest)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
dir
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def download(url, dest)
|
|
36
|
-
response = HTTPX.get(url)
|
|
37
|
-
raise "Download failed: #{url} (status: #{response.status})" unless response.status.between?(200, 299)
|
|
38
|
-
|
|
39
|
-
File.binwrite(dest, response.body.to_s)
|
|
40
|
-
end
|
|
12
|
+
require "debug"
|
|
41
13
|
|
|
42
14
|
model_dir = ARGV[0] || ENV["GLINER_MODEL_DIR"]
|
|
43
|
-
|
|
44
|
-
model_file = ENV["GLINER_MODEL_FILE"] || DEFAULT_MODEL_FILE
|
|
45
|
-
model_subdir = ENV["GLINER_MODEL_SUBDIR"] || DEFAULT_MODEL_SUBDIR
|
|
15
|
+
model_file = ENV["GLINER_MODEL_FILE"]
|
|
46
16
|
|
|
47
17
|
if model_dir && !model_dir.empty?
|
|
48
|
-
$gliner_model = Gliner.load(model_dir, file: model_file)
|
|
18
|
+
$gliner_model = model_file ? Gliner.load(model_dir, file: model_file) : Gliner.load(model_dir)
|
|
49
19
|
else
|
|
50
20
|
begin
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
$gliner_model = Gliner.load(model_dir, file: model_file)
|
|
21
|
+
Gliner.configure { |config| config.auto = true }
|
|
22
|
+
$gliner_model = Gliner.model
|
|
54
23
|
rescue => e
|
|
55
24
|
warn "No model loaded (auto-download failed: #{e.class}: #{e.message})"
|
|
56
|
-
warn "Set GLINER_MODEL_DIR to a local model dir
|
|
25
|
+
warn "Set GLINER_MODEL_DIR or configure Gliner.config.model to a local model dir."
|
|
57
26
|
end
|
|
58
27
|
end
|
|
59
28
|
|
|
@@ -79,6 +48,5 @@ puts "- helper: gliner_classify(text, tasks)"
|
|
|
79
48
|
puts "- helper: gliner_extract_json(text, structures)"
|
|
80
49
|
puts "- model variable: $gliner_model"
|
|
81
50
|
puts "- model dir: #{model_dir.inspect}"
|
|
82
|
-
puts "- auto-download env: GLINER_REPO_ID=#{repo_id.inspect} GLINER_MODEL_FILE=#{model_file.inspect}" unless $gliner_model
|
|
83
51
|
|
|
84
52
|
IRB.start(__FILE__)
|
data/gliner.gemspec
CHANGED
|
@@ -21,8 +21,10 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.add_dependency 'tokenizers', '~> 0.6'
|
|
22
22
|
|
|
23
23
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
24
|
+
spec.add_development_dependency 'irb', '~> 1.16.0'
|
|
24
25
|
spec.add_development_dependency 'rspec', '~> 3.13'
|
|
25
26
|
spec.add_development_dependency 'rubocop', '~> 1.50'
|
|
27
|
+
spec.add_development_dependency 'debug', '~> 1.11'
|
|
26
28
|
|
|
27
29
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
28
30
|
end
|
data/lib/gliner/configuration.rb
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class ClassificationRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def self.[](tasks)
|
|
7
9
|
new(Gliner.model!, tasks)
|
|
8
10
|
end
|
|
@@ -21,6 +23,11 @@ module Gliner
|
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
alias call []
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def inspect_label = 'Classification'
|
|
30
|
+
def inspect_items = @tasks.keys
|
|
24
31
|
end
|
|
25
32
|
end
|
|
26
33
|
end
|
|
@@ -3,8 +3,12 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class EntityRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def initialize(model, config)
|
|
7
9
|
parsed = model.entity_task.parse_config(config)
|
|
10
|
+
|
|
11
|
+
@labels = parsed[:labels]
|
|
8
12
|
@task = PreparedTask.new(model.entity_task, parsed)
|
|
9
13
|
end
|
|
10
14
|
|
|
@@ -14,6 +18,11 @@ module Gliner
|
|
|
14
18
|
end
|
|
15
19
|
|
|
16
20
|
alias call []
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def inspect_label = 'Entity'
|
|
25
|
+
def inspect_items = @labels
|
|
17
26
|
end
|
|
18
27
|
end
|
|
19
28
|
end
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class StructuredRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def initialize(model, config)
|
|
7
9
|
@tasks = build_tasks(model, config)
|
|
8
10
|
end
|
|
@@ -17,6 +19,9 @@ module Gliner
|
|
|
17
19
|
|
|
18
20
|
private
|
|
19
21
|
|
|
22
|
+
def inspect_label = 'Structure'
|
|
23
|
+
def inspect_items = @tasks.keys
|
|
24
|
+
|
|
20
25
|
def build_tasks(model, config)
|
|
21
26
|
raise Error, 'structures must be a Hash' unless config.is_a?(Hash)
|
|
22
27
|
|
data/lib/gliner/version.rb
CHANGED
data/lib/gliner.rb
CHANGED
|
@@ -6,6 +6,7 @@ require 'gliner/version'
|
|
|
6
6
|
require 'gliner/configuration'
|
|
7
7
|
require 'gliner/model'
|
|
8
8
|
require 'gliner/runners/prepared_task'
|
|
9
|
+
require 'gliner/runners/inspectable'
|
|
9
10
|
require 'gliner/runners/entity_runner'
|
|
10
11
|
require 'gliner/runners/structured_runner'
|
|
11
12
|
require 'gliner/runners/classification_runner'
|
|
@@ -68,7 +69,10 @@ module Gliner
|
|
|
68
69
|
end
|
|
69
70
|
|
|
70
71
|
def model
|
|
71
|
-
@model ||=
|
|
72
|
+
@model ||= begin
|
|
73
|
+
apply_model_source!
|
|
74
|
+
model_from_config || model_from_env
|
|
75
|
+
end
|
|
72
76
|
end
|
|
73
77
|
|
|
74
78
|
def [](config)
|
|
@@ -98,7 +102,7 @@ module Gliner
|
|
|
98
102
|
end
|
|
99
103
|
|
|
100
104
|
def model_from_env
|
|
101
|
-
dir =
|
|
105
|
+
dir = env_model_dir
|
|
102
106
|
return if dir.nil?
|
|
103
107
|
|
|
104
108
|
file = ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
|
|
@@ -130,31 +134,47 @@ module Gliner
|
|
|
130
134
|
return unless config.auto?
|
|
131
135
|
|
|
132
136
|
source = config.model
|
|
133
|
-
|
|
137
|
+
|
|
138
|
+
return unless source.nil?
|
|
139
|
+
return if env_model_dir
|
|
134
140
|
|
|
135
141
|
config.model = download_default_model
|
|
136
142
|
end
|
|
137
143
|
|
|
144
|
+
def client
|
|
145
|
+
@client ||= HTTPX.plugin(:follow_redirects)
|
|
146
|
+
end
|
|
147
|
+
|
|
138
148
|
def download_default_model
|
|
139
149
|
model_file = model_file_for_variant(config.variant)
|
|
140
|
-
|
|
141
|
-
dir = File.join(root, '.cache', 'models', HF_REPO.tr('/', '__'))
|
|
150
|
+
dir = File.join(Dir.pwd, '.cache', 'gliner', HF_REPO.tr('/', '__'))
|
|
142
151
|
|
|
143
152
|
FileUtils.mkdir_p(dir)
|
|
144
153
|
|
|
145
154
|
files = ['tokenizer.json', 'config.json', model_file]
|
|
146
|
-
client = HTTPX.plugin(:follow_redirects)
|
|
147
155
|
|
|
148
156
|
files.each do |file|
|
|
149
|
-
|
|
150
|
-
|
|
157
|
+
target = File.join(dir, file)
|
|
158
|
+
|
|
159
|
+
next if File.exist?(target) && File.size?(target)
|
|
151
160
|
|
|
152
|
-
|
|
161
|
+
puts "Downloading #{DEFAULT_MODEL_BASE}/#{file}"
|
|
162
|
+
|
|
163
|
+
client
|
|
164
|
+
.get("#{DEFAULT_MODEL_BASE}/#{file}")
|
|
165
|
+
.copy_to(target)
|
|
153
166
|
end
|
|
154
167
|
|
|
155
168
|
dir
|
|
156
169
|
end
|
|
157
170
|
|
|
171
|
+
def env_model_dir
|
|
172
|
+
dir = ENV.fetch('GLINER_MODEL_DIR', nil)
|
|
173
|
+
return nil if dir.nil? || dir.empty?
|
|
174
|
+
|
|
175
|
+
dir
|
|
176
|
+
end
|
|
177
|
+
|
|
158
178
|
def model_file_for_variant(variant = :fp16)
|
|
159
179
|
case variant.to_sym
|
|
160
180
|
when :fp16 then 'model_fp16.onnx'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: gliner
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- elcuervo
|
|
@@ -65,6 +65,20 @@ dependencies:
|
|
|
65
65
|
- - "~>"
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
67
|
version: '13.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: irb
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: 1.16.0
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: 1.16.0
|
|
68
82
|
- !ruby/object:Gem::Dependency
|
|
69
83
|
name: rspec
|
|
70
84
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -93,6 +107,20 @@ dependencies:
|
|
|
93
107
|
- - "~>"
|
|
94
108
|
- !ruby/object:Gem::Version
|
|
95
109
|
version: '1.50'
|
|
110
|
+
- !ruby/object:Gem::Dependency
|
|
111
|
+
name: debug
|
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - "~>"
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: '1.11'
|
|
117
|
+
type: :development
|
|
118
|
+
prerelease: false
|
|
119
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - "~>"
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: '1.11'
|
|
96
124
|
description: Basic Ruby inference wrapper for the GLiNER2 ONNX model.
|
|
97
125
|
executables: []
|
|
98
126
|
extensions: []
|
|
@@ -117,6 +145,7 @@ files:
|
|
|
117
145
|
- lib/gliner/position_iteration.rb
|
|
118
146
|
- lib/gliner/runners/classification_runner.rb
|
|
119
147
|
- lib/gliner/runners/entity_runner.rb
|
|
148
|
+
- lib/gliner/runners/inspectable.rb
|
|
120
149
|
- lib/gliner/runners/prepared_task.rb
|
|
121
150
|
- lib/gliner/runners/structured_runner.rb
|
|
122
151
|
- lib/gliner/span_extractor.rb
|