gliner 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -2
- data/bin/console +6 -39
- data/gliner.gemspec +1 -0
- data/lib/gliner/configuration.rb +1 -1
- data/lib/gliner/runners/classification_runner.rb +7 -0
- data/lib/gliner/runners/entity_runner.rb +9 -0
- data/lib/gliner/runners/inspectable.rb +13 -0
- data/lib/gliner/runners/structured_runner.rb +5 -0
- data/lib/gliner/version.rb +1 -1
- data/lib/gliner.rb +29 -7
- metadata +16 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 749c6a152abe40b2735e56724d01bcb9df57df3ed883f7b7a7a735c7d4dbe613
|
|
4
|
+
data.tar.gz: dd8add045f98be4876bea92faadd68b2da4a64613502c710c3a6769d224016bd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 996bd63e35f9302f1a01a78d92d39ca47013deb42e58ff3c332ac260c2351f172d5a1897bdb3f673a4a1cf1f63c2ef1b1a7c04c2025ff94be6eb71548c79bf97
|
|
7
|
+
data.tar.gz: 246d3939daa2ca82df524ae6a89dbd734792d5c6952857d517634f50cc59176f26fd1bef15677b655b1c2f9cb1b35c5cb700bdbbc764d6949b6b941b7c83e77f
|
data/README.md
CHANGED
|
@@ -19,11 +19,10 @@ require "gliner"
|
|
|
19
19
|
|
|
20
20
|
Gliner.configure do |config|
|
|
21
21
|
config.threshold = 0.2
|
|
22
|
-
#
|
|
22
|
+
# By default, the gem downloads the default model to .cache/
|
|
23
23
|
# Or set a local path explicitly:
|
|
24
24
|
# config.model = "/path/to/gliner2-multi-v1"
|
|
25
25
|
config.variant = :fp16
|
|
26
|
-
config.auto!
|
|
27
26
|
end
|
|
28
27
|
|
|
29
28
|
text = "Apple CEO Tim Cook announced iPhone 15 in Cupertino yesterday."
|
data/bin/console
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
|
|
2
3
|
# frozen_string_literal: true
|
|
3
4
|
|
|
4
5
|
begin
|
|
@@ -7,53 +8,20 @@ rescue LoadError
|
|
|
7
8
|
end
|
|
8
9
|
|
|
9
10
|
require "gliner"
|
|
10
|
-
require "fileutils"
|
|
11
|
-
require "httpx"
|
|
12
11
|
require "irb"
|
|
13
12
|
|
|
14
|
-
DEFAULT_REPO_ID = "cuerbot/gliner2-multi-v1"
|
|
15
|
-
DEFAULT_MODEL_FILE = "model_fp16.onnx"
|
|
16
|
-
DEFAULT_MODEL_SUBDIR = "onnx"
|
|
17
|
-
|
|
18
|
-
def ensure_model_dir!(repo_id:, model_file:, model_subdir:)
|
|
19
|
-
dir = File.expand_path("../tmp/models/#{repo_id.tr('/', '__')}", __dir__)
|
|
20
|
-
FileUtils.mkdir_p(dir)
|
|
21
|
-
|
|
22
|
-
base = "https://huggingface.co/#{repo_id}/resolve/main"
|
|
23
|
-
base = "#{base}/#{model_subdir}" unless model_subdir.nil? || model_subdir.empty?
|
|
24
|
-
files = ["tokenizer.json", "config.json", model_file]
|
|
25
|
-
|
|
26
|
-
files.each do |file|
|
|
27
|
-
dest = File.join(dir, file)
|
|
28
|
-
next if File.exist?(dest) && File.size?(dest)
|
|
29
|
-
download("#{base}/#{file}", dest)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
dir
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def download(url, dest)
|
|
36
|
-
response = HTTPX.get(url)
|
|
37
|
-
raise "Download failed: #{url} (status: #{response.status})" unless response.status.between?(200, 299)
|
|
38
|
-
|
|
39
|
-
File.binwrite(dest, response.body.to_s)
|
|
40
|
-
end
|
|
41
|
-
|
|
42
13
|
model_dir = ARGV[0] || ENV["GLINER_MODEL_DIR"]
|
|
43
|
-
|
|
44
|
-
model_file = ENV["GLINER_MODEL_FILE"] || DEFAULT_MODEL_FILE
|
|
45
|
-
model_subdir = ENV["GLINER_MODEL_SUBDIR"] || DEFAULT_MODEL_SUBDIR
|
|
14
|
+
model_file = ENV["GLINER_MODEL_FILE"]
|
|
46
15
|
|
|
47
16
|
if model_dir && !model_dir.empty?
|
|
48
|
-
$gliner_model = Gliner.load(model_dir, file: model_file)
|
|
17
|
+
$gliner_model = model_file ? Gliner.load(model_dir, file: model_file) : Gliner.load(model_dir)
|
|
49
18
|
else
|
|
50
19
|
begin
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
$gliner_model = Gliner.load(model_dir, file: model_file)
|
|
20
|
+
Gliner.configure { |config| config.auto = true }
|
|
21
|
+
$gliner_model = Gliner.model
|
|
54
22
|
rescue => e
|
|
55
23
|
warn "No model loaded (auto-download failed: #{e.class}: #{e.message})"
|
|
56
|
-
warn "Set GLINER_MODEL_DIR to a local model dir
|
|
24
|
+
warn "Set GLINER_MODEL_DIR or configure Gliner.config.model to a local model dir."
|
|
57
25
|
end
|
|
58
26
|
end
|
|
59
27
|
|
|
@@ -79,6 +47,5 @@ puts "- helper: gliner_classify(text, tasks)"
|
|
|
79
47
|
puts "- helper: gliner_extract_json(text, structures)"
|
|
80
48
|
puts "- model variable: $gliner_model"
|
|
81
49
|
puts "- model dir: #{model_dir.inspect}"
|
|
82
|
-
puts "- auto-download env: GLINER_REPO_ID=#{repo_id.inspect} GLINER_MODEL_FILE=#{model_file.inspect}" unless $gliner_model
|
|
83
50
|
|
|
84
51
|
IRB.start(__FILE__)
|
data/gliner.gemspec
CHANGED
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.add_dependency 'tokenizers', '~> 0.6'
|
|
22
22
|
|
|
23
23
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
24
|
+
spec.add_development_dependency 'irb', '~> 1.16.0'
|
|
24
25
|
spec.add_development_dependency 'rspec', '~> 3.13'
|
|
25
26
|
spec.add_development_dependency 'rubocop', '~> 1.50'
|
|
26
27
|
|
data/lib/gliner/configuration.rb
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class ClassificationRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def self.[](tasks)
|
|
7
9
|
new(Gliner.model!, tasks)
|
|
8
10
|
end
|
|
@@ -21,6 +23,11 @@ module Gliner
|
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
alias call []
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def inspect_label = 'Classification'
|
|
30
|
+
def inspect_items = @tasks.keys
|
|
24
31
|
end
|
|
25
32
|
end
|
|
26
33
|
end
|
|
@@ -3,8 +3,12 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class EntityRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def initialize(model, config)
|
|
7
9
|
parsed = model.entity_task.parse_config(config)
|
|
10
|
+
|
|
11
|
+
@labels = parsed[:labels]
|
|
8
12
|
@task = PreparedTask.new(model.entity_task, parsed)
|
|
9
13
|
end
|
|
10
14
|
|
|
@@ -14,6 +18,11 @@ module Gliner
|
|
|
14
18
|
end
|
|
15
19
|
|
|
16
20
|
alias call []
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def inspect_label = 'Entity'
|
|
25
|
+
def inspect_items = @labels
|
|
17
26
|
end
|
|
18
27
|
end
|
|
19
28
|
end
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Gliner
|
|
4
4
|
module Runners
|
|
5
5
|
class StructuredRunner
|
|
6
|
+
include Inspectable
|
|
7
|
+
|
|
6
8
|
def initialize(model, config)
|
|
7
9
|
@tasks = build_tasks(model, config)
|
|
8
10
|
end
|
|
@@ -17,6 +19,9 @@ module Gliner
|
|
|
17
19
|
|
|
18
20
|
private
|
|
19
21
|
|
|
22
|
+
def inspect_label = 'Structure'
|
|
23
|
+
def inspect_items = @tasks.keys
|
|
24
|
+
|
|
20
25
|
def build_tasks(model, config)
|
|
21
26
|
raise Error, 'structures must be a Hash' unless config.is_a?(Hash)
|
|
22
27
|
|
data/lib/gliner/version.rb
CHANGED
data/lib/gliner.rb
CHANGED
|
@@ -6,6 +6,7 @@ require 'gliner/version'
|
|
|
6
6
|
require 'gliner/configuration'
|
|
7
7
|
require 'gliner/model'
|
|
8
8
|
require 'gliner/runners/prepared_task'
|
|
9
|
+
require 'gliner/runners/inspectable'
|
|
9
10
|
require 'gliner/runners/entity_runner'
|
|
10
11
|
require 'gliner/runners/structured_runner'
|
|
11
12
|
require 'gliner/runners/classification_runner'
|
|
@@ -68,7 +69,10 @@ module Gliner
|
|
|
68
69
|
end
|
|
69
70
|
|
|
70
71
|
def model
|
|
71
|
-
@model ||=
|
|
72
|
+
@model ||= begin
|
|
73
|
+
apply_model_source!
|
|
74
|
+
model_from_config || model_from_env
|
|
75
|
+
end
|
|
72
76
|
end
|
|
73
77
|
|
|
74
78
|
def [](config)
|
|
@@ -98,7 +102,7 @@ module Gliner
|
|
|
98
102
|
end
|
|
99
103
|
|
|
100
104
|
def model_from_env
|
|
101
|
-
dir =
|
|
105
|
+
dir = env_model_dir
|
|
102
106
|
return if dir.nil?
|
|
103
107
|
|
|
104
108
|
file = ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
|
|
@@ -130,7 +134,8 @@ module Gliner
|
|
|
130
134
|
return unless config.auto?
|
|
131
135
|
|
|
132
136
|
source = config.model
|
|
133
|
-
return unless source.nil?
|
|
137
|
+
return unless source.nil?
|
|
138
|
+
return if env_model_dir
|
|
134
139
|
|
|
135
140
|
config.model = download_default_model
|
|
136
141
|
end
|
|
@@ -143,15 +148,32 @@ module Gliner
|
|
|
143
148
|
FileUtils.mkdir_p(dir)
|
|
144
149
|
|
|
145
150
|
files = ['tokenizer.json', 'config.json', model_file]
|
|
146
|
-
client = HTTPX.plugin(:follow_redirects)
|
|
151
|
+
client = HTTPX.plugin(:follow_redirects).with(max_redirects: 5)
|
|
147
152
|
|
|
148
153
|
files.each do |file|
|
|
149
|
-
|
|
150
|
-
|
|
154
|
+
dest = File.join(dir, file)
|
|
155
|
+
next if File.exist?(dest) && File.size?(dest)
|
|
156
|
+
download_file!(client, "#{DEFAULT_MODEL_BASE}/#{file}", dest)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
dir
|
|
160
|
+
end
|
|
151
161
|
|
|
152
|
-
|
|
162
|
+
def download_file!(client, url, dest)
|
|
163
|
+
response = client.get(url)
|
|
164
|
+
status = response.status
|
|
165
|
+
|
|
166
|
+
unless status && status.between?(200, 299)
|
|
167
|
+
raise Error, "Download failed: #{url} (status: #{status || 'unknown'})"
|
|
153
168
|
end
|
|
154
169
|
|
|
170
|
+
File.binwrite(dest, response.body.to_s)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def env_model_dir
|
|
174
|
+
dir = ENV.fetch('GLINER_MODEL_DIR', nil)
|
|
175
|
+
return nil if dir.nil? || dir.empty?
|
|
176
|
+
|
|
155
177
|
dir
|
|
156
178
|
end
|
|
157
179
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: gliner
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- elcuervo
|
|
@@ -65,6 +65,20 @@ dependencies:
|
|
|
65
65
|
- - "~>"
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
67
|
version: '13.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: irb
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: 1.16.0
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: 1.16.0
|
|
68
82
|
- !ruby/object:Gem::Dependency
|
|
69
83
|
name: rspec
|
|
70
84
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -117,6 +131,7 @@ files:
|
|
|
117
131
|
- lib/gliner/position_iteration.rb
|
|
118
132
|
- lib/gliner/runners/classification_runner.rb
|
|
119
133
|
- lib/gliner/runners/entity_runner.rb
|
|
134
|
+
- lib/gliner/runners/inspectable.rb
|
|
120
135
|
- lib/gliner/runners/prepared_task.rb
|
|
121
136
|
- lib/gliner/runners/structured_runner.rb
|
|
122
137
|
- lib/gliner/span_extractor.rb
|