gliner 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8e4bdddc47289f29cce2469ff6bbeecc9ef260b39f3b02f6e3e89b23e3e4e7a
4
- data.tar.gz: b59ad90385c8da5b478f007f5715994c11f1f1a0b85617acc501acb8b17723aa
3
+ metadata.gz: 749c6a152abe40b2735e56724d01bcb9df57df3ed883f7b7a7a735c7d4dbe613
4
+ data.tar.gz: dd8add045f98be4876bea92faadd68b2da4a64613502c710c3a6769d224016bd
5
5
  SHA512:
6
- metadata.gz: 072cc980f4653d74da83d3cfea1b09d0cbf8e023bfb6d3829ce6879163b7dc77a170ef63ad68138eca7d892521f30eda8d0be6c72e62cbe0353732aa4542bab3
7
- data.tar.gz: fb84732285ff71edad266533cfd513d51036c709a4c5275fdc6548a373f11ead86eb66978fd0b28522c567af52e7d2ce8111053d721ec6ddcee6546db8bf58ed
6
+ metadata.gz: 996bd63e35f9302f1a01a78d92d39ca47013deb42e58ff3c332ac260c2351f172d5a1897bdb3f673a4a1cf1f63c2ef1b1a7c04c2025ff94be6eb71548c79bf97
7
+ data.tar.gz: 246d3939daa2ca82df524ae6a89dbd734792d5c6952857d517634f50cc59176f26fd1bef15677b655b1c2f9cb1b35c5cb700bdbbc764d6949b6b941b7c83e77f
data/README.md CHANGED
@@ -19,11 +19,10 @@ require "gliner"
19
19
 
20
20
  Gliner.configure do |config|
21
21
  config.threshold = 0.2
22
- # If unset, auto! downloads the default model to .cache/
22
+ # By default, the gem downloads the default model to .cache/
23
23
  # Or set a local path explicitly:
24
24
  # config.model = "/path/to/gliner2-multi-v1"
25
25
  config.variant = :fp16
26
- config.auto!
27
26
  end
28
27
 
29
28
  text = "Apple CEO Tim Cook announced iPhone 15 in Cupertino yesterday."
data/bin/console CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+
2
3
  # frozen_string_literal: true
3
4
 
4
5
  begin
@@ -7,53 +8,20 @@ rescue LoadError
7
8
  end
8
9
 
9
10
  require "gliner"
10
- require "fileutils"
11
- require "httpx"
12
11
  require "irb"
13
12
 
14
- DEFAULT_REPO_ID = "cuerbot/gliner2-multi-v1"
15
- DEFAULT_MODEL_FILE = "model_fp16.onnx"
16
- DEFAULT_MODEL_SUBDIR = "onnx"
17
-
18
- def ensure_model_dir!(repo_id:, model_file:, model_subdir:)
19
- dir = File.expand_path("../tmp/models/#{repo_id.tr('/', '__')}", __dir__)
20
- FileUtils.mkdir_p(dir)
21
-
22
- base = "https://huggingface.co/#{repo_id}/resolve/main"
23
- base = "#{base}/#{model_subdir}" unless model_subdir.nil? || model_subdir.empty?
24
- files = ["tokenizer.json", "config.json", model_file]
25
-
26
- files.each do |file|
27
- dest = File.join(dir, file)
28
- next if File.exist?(dest) && File.size?(dest)
29
- download("#{base}/#{file}", dest)
30
- end
31
-
32
- dir
33
- end
34
-
35
- def download(url, dest)
36
- response = HTTPX.get(url)
37
- raise "Download failed: #{url} (status: #{response.status})" unless response.status.between?(200, 299)
38
-
39
- File.binwrite(dest, response.body.to_s)
40
- end
41
-
42
13
  model_dir = ARGV[0] || ENV["GLINER_MODEL_DIR"]
43
- repo_id = ENV["GLINER_REPO_ID"] || DEFAULT_REPO_ID
44
- model_file = ENV["GLINER_MODEL_FILE"] || DEFAULT_MODEL_FILE
45
- model_subdir = ENV["GLINER_MODEL_SUBDIR"] || DEFAULT_MODEL_SUBDIR
14
+ model_file = ENV["GLINER_MODEL_FILE"]
46
15
 
47
16
  if model_dir && !model_dir.empty?
48
- $gliner_model = Gliner.load(model_dir, file: model_file)
17
+ $gliner_model = model_file ? Gliner.load(model_dir, file: model_file) : Gliner.load(model_dir)
49
18
  else
50
19
  begin
51
- require "fileutils"
52
- model_dir = ensure_model_dir!(repo_id: repo_id, model_file: model_file, model_subdir: model_subdir)
53
- $gliner_model = Gliner.load(model_dir, file: model_file)
20
+ Gliner.configure { |config| config.auto = true }
21
+ $gliner_model = Gliner.model
54
22
  rescue => e
55
23
  warn "No model loaded (auto-download failed: #{e.class}: #{e.message})"
56
- warn "Set GLINER_MODEL_DIR to a local model dir, or set GLINER_REPO_ID/GLINER_MODEL_FILE for auto-download."
24
+ warn "Set GLINER_MODEL_DIR or configure Gliner.config.model to a local model dir."
57
25
  end
58
26
  end
59
27
 
@@ -79,6 +47,5 @@ puts "- helper: gliner_classify(text, tasks)"
79
47
  puts "- helper: gliner_extract_json(text, structures)"
80
48
  puts "- model variable: $gliner_model"
81
49
  puts "- model dir: #{model_dir.inspect}"
82
- puts "- auto-download env: GLINER_REPO_ID=#{repo_id.inspect} GLINER_MODEL_FILE=#{model_file.inspect}" unless $gliner_model
83
50
 
84
51
  IRB.start(__FILE__)
data/gliner.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_dependency 'tokenizers', '~> 0.6'
22
22
 
23
23
  spec.add_development_dependency 'rake', '~> 13.0'
24
+ spec.add_development_dependency 'irb', '~> 1.16.0'
24
25
  spec.add_development_dependency 'rspec', '~> 3.13'
25
26
  spec.add_development_dependency 'rubocop', '~> 1.50'
26
27
 
@@ -11,7 +11,7 @@ module Gliner
11
11
  @threshold = DEFAULT_THRESHOLD
12
12
  @model = nil
13
13
  @variant = :fp16
14
- @auto = false
14
+ @auto = true
15
15
  end
16
16
 
17
17
  def variant=(value)
@@ -3,6 +3,8 @@
3
3
  module Gliner
4
4
  module Runners
5
5
  class ClassificationRunner
6
+ include Inspectable
7
+
6
8
  def self.[](tasks)
7
9
  new(Gliner.model!, tasks)
8
10
  end
@@ -21,6 +23,11 @@ module Gliner
21
23
  end
22
24
 
23
25
  alias call []
26
+
27
+ private
28
+
29
+ def inspect_label = 'Classification'
30
+ def inspect_items = @tasks.keys
24
31
  end
25
32
  end
26
33
  end
@@ -3,8 +3,12 @@
3
3
  module Gliner
4
4
  module Runners
5
5
  class EntityRunner
6
+ include Inspectable
7
+
6
8
  def initialize(model, config)
7
9
  parsed = model.entity_task.parse_config(config)
10
+
11
+ @labels = parsed[:labels]
8
12
  @task = PreparedTask.new(model.entity_task, parsed)
9
13
  end
10
14
 
@@ -14,6 +18,11 @@ module Gliner
14
18
  end
15
19
 
16
20
  alias call []
21
+
22
+ private
23
+
24
+ def inspect_label = 'Entity'
25
+ def inspect_items = @labels
17
26
  end
18
27
  end
19
28
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gliner
4
+ module Runners
5
+ module Inspectable
6
+ def inspect
7
+ items = Array(inspect_items).map(&:to_s)
8
+
9
+ "#<Gliner(#{inspect_label}) input=#{items.inspect}>"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -3,6 +3,8 @@
3
3
  module Gliner
4
4
  module Runners
5
5
  class StructuredRunner
6
+ include Inspectable
7
+
6
8
  def initialize(model, config)
7
9
  @tasks = build_tasks(model, config)
8
10
  end
@@ -17,6 +19,9 @@ module Gliner
17
19
 
18
20
  private
19
21
 
22
+ def inspect_label = 'Structure'
23
+ def inspect_items = @tasks.keys
24
+
20
25
  def build_tasks(model, config)
21
26
  raise Error, 'structures must be a Hash' unless config.is_a?(Hash)
22
27
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gliner
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
data/lib/gliner.rb CHANGED
@@ -6,6 +6,7 @@ require 'gliner/version'
6
6
  require 'gliner/configuration'
7
7
  require 'gliner/model'
8
8
  require 'gliner/runners/prepared_task'
9
+ require 'gliner/runners/inspectable'
9
10
  require 'gliner/runners/entity_runner'
10
11
  require 'gliner/runners/structured_runner'
11
12
  require 'gliner/runners/classification_runner'
@@ -68,7 +69,10 @@ module Gliner
68
69
  end
69
70
 
70
71
  def model
71
- @model ||= model_from_config || model_from_env
72
+ @model ||= begin
73
+ apply_model_source!
74
+ model_from_config || model_from_env
75
+ end
72
76
  end
73
77
 
74
78
  def [](config)
@@ -98,7 +102,7 @@ module Gliner
98
102
  end
99
103
 
100
104
  def model_from_env
101
- dir = ENV.fetch('GLINER_MODEL_DIR', nil)
105
+ dir = env_model_dir
102
106
  return if dir.nil?
103
107
 
104
108
  file = ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
@@ -130,7 +134,8 @@ module Gliner
130
134
  return unless config.auto?
131
135
 
132
136
  source = config.model
133
- return unless source.nil? || source.empty?
137
+ return unless source.nil?
138
+ return if env_model_dir
134
139
 
135
140
  config.model = download_default_model
136
141
  end
@@ -143,15 +148,32 @@ module Gliner
143
148
  FileUtils.mkdir_p(dir)
144
149
 
145
150
  files = ['tokenizer.json', 'config.json', model_file]
146
- client = HTTPX.plugin(:follow_redirects)
151
+ client = HTTPX.plugin(:follow_redirects).with(max_redirects: 5)
147
152
 
148
153
  files.each do |file|
149
- response = client.get("#{DEFAULT_MODEL_BASE}/#{file}")
150
- raise Error, "Download failed: #{file}" if response.error?
154
+ dest = File.join(dir, file)
155
+ next if File.exist?(dest) && File.size?(dest)
156
+ download_file!(client, "#{DEFAULT_MODEL_BASE}/#{file}", dest)
157
+ end
158
+
159
+ dir
160
+ end
151
161
 
152
- File.binwrite(File.join(dir, file), response.body.to_s)
162
+ def download_file!(client, url, dest)
163
+ response = client.get(url)
164
+ status = response.status
165
+
166
+ unless status && status.between?(200, 299)
167
+ raise Error, "Download failed: #{url} (status: #{status || 'unknown'})"
153
168
  end
154
169
 
170
+ File.binwrite(dest, response.body.to_s)
171
+ end
172
+
173
+ def env_model_dir
174
+ dir = ENV.fetch('GLINER_MODEL_DIR', nil)
175
+ return nil if dir.nil? || dir.empty?
176
+
155
177
  dir
156
178
  end
157
179
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gliner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - elcuervo
@@ -65,6 +65,20 @@ dependencies:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
67
  version: '13.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: irb
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: 1.16.0
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: 1.16.0
68
82
  - !ruby/object:Gem::Dependency
69
83
  name: rspec
70
84
  requirement: !ruby/object:Gem::Requirement
@@ -117,6 +131,7 @@ files:
117
131
  - lib/gliner/position_iteration.rb
118
132
  - lib/gliner/runners/classification_runner.rb
119
133
  - lib/gliner/runners/entity_runner.rb
134
+ - lib/gliner/runners/inspectable.rb
120
135
  - lib/gliner/runners/prepared_task.rb
121
136
  - lib/gliner/runners/structured_runner.rb
122
137
  - lib/gliner/span_extractor.rb