gliner 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8e4bdddc47289f29cce2469ff6bbeecc9ef260b39f3b02f6e3e89b23e3e4e7a
4
- data.tar.gz: b59ad90385c8da5b478f007f5715994c11f1f1a0b85617acc501acb8b17723aa
3
+ metadata.gz: 81732c1ce09e933b7e2f57291ee6c9c0c9a8442f7a8488667d324a7b25d2981f
4
+ data.tar.gz: 59cc5210b5915c7e195d99a3c2d80d613f3d9e3aaa9c9ddb58d4311b585bf923
5
5
  SHA512:
6
- metadata.gz: 072cc980f4653d74da83d3cfea1b09d0cbf8e023bfb6d3829ce6879163b7dc77a170ef63ad68138eca7d892521f30eda8d0be6c72e62cbe0353732aa4542bab3
7
- data.tar.gz: fb84732285ff71edad266533cfd513d51036c709a4c5275fdc6548a373f11ead86eb66978fd0b28522c567af52e7d2ce8111053d721ec6ddcee6546db8bf58ed
6
+ metadata.gz: 17e88cb80220df06929d6642c92373e88a1628aaee83eb3c8175ea1c5454dc790bb262ea18565d314e533c49082aace8c85b78325aae3ddffa5a5d68188451f7
7
+ data.tar.gz: 1cafd31a6b5d1c92b1088d3584457f4231853485eb0ba8728155fde333b9f99e5d7b1ec7414ddcf6a9f7547f991ccf98c37564cf9b95a9b1b2ab77f1e11ee68a
data/README.md CHANGED
@@ -19,11 +19,10 @@ require "gliner"
19
19
 
20
20
  Gliner.configure do |config|
21
21
  config.threshold = 0.2
22
- # If unset, auto! downloads the default model to .cache/
22
+ # By default, the gem downloads the default model to .cache/
23
23
  # Or set a local path explicitly:
24
24
  # config.model = "/path/to/gliner2-multi-v1"
25
25
  config.variant = :fp16
26
- config.auto!
27
26
  end
28
27
 
29
28
  text = "Apple CEO Tim Cook announced iPhone 15 in Cupertino yesterday."
data/bin/console CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+
2
3
  # frozen_string_literal: true
3
4
 
4
5
  begin
@@ -7,53 +8,21 @@ rescue LoadError
7
8
  end
8
9
 
9
10
  require "gliner"
10
- require "fileutils"
11
- require "httpx"
12
11
  require "irb"
13
-
14
- DEFAULT_REPO_ID = "cuerbot/gliner2-multi-v1"
15
- DEFAULT_MODEL_FILE = "model_fp16.onnx"
16
- DEFAULT_MODEL_SUBDIR = "onnx"
17
-
18
- def ensure_model_dir!(repo_id:, model_file:, model_subdir:)
19
- dir = File.expand_path("../tmp/models/#{repo_id.tr('/', '__')}", __dir__)
20
- FileUtils.mkdir_p(dir)
21
-
22
- base = "https://huggingface.co/#{repo_id}/resolve/main"
23
- base = "#{base}/#{model_subdir}" unless model_subdir.nil? || model_subdir.empty?
24
- files = ["tokenizer.json", "config.json", model_file]
25
-
26
- files.each do |file|
27
- dest = File.join(dir, file)
28
- next if File.exist?(dest) && File.size?(dest)
29
- download("#{base}/#{file}", dest)
30
- end
31
-
32
- dir
33
- end
34
-
35
- def download(url, dest)
36
- response = HTTPX.get(url)
37
- raise "Download failed: #{url} (status: #{response.status})" unless response.status.between?(200, 299)
38
-
39
- File.binwrite(dest, response.body.to_s)
40
- end
12
+ require "debug"
41
13
 
42
14
  model_dir = ARGV[0] || ENV["GLINER_MODEL_DIR"]
43
- repo_id = ENV["GLINER_REPO_ID"] || DEFAULT_REPO_ID
44
- model_file = ENV["GLINER_MODEL_FILE"] || DEFAULT_MODEL_FILE
45
- model_subdir = ENV["GLINER_MODEL_SUBDIR"] || DEFAULT_MODEL_SUBDIR
15
+ model_file = ENV["GLINER_MODEL_FILE"]
46
16
 
47
17
  if model_dir && !model_dir.empty?
48
- $gliner_model = Gliner.load(model_dir, file: model_file)
18
+ $gliner_model = model_file ? Gliner.load(model_dir, file: model_file) : Gliner.load(model_dir)
49
19
  else
50
20
  begin
51
- require "fileutils"
52
- model_dir = ensure_model_dir!(repo_id: repo_id, model_file: model_file, model_subdir: model_subdir)
53
- $gliner_model = Gliner.load(model_dir, file: model_file)
21
+ Gliner.configure { |config| config.auto = true }
22
+ $gliner_model = Gliner.model
54
23
  rescue => e
55
24
  warn "No model loaded (auto-download failed: #{e.class}: #{e.message})"
56
- warn "Set GLINER_MODEL_DIR to a local model dir, or set GLINER_REPO_ID/GLINER_MODEL_FILE for auto-download."
25
+ warn "Set GLINER_MODEL_DIR or configure Gliner.config.model to a local model dir."
57
26
  end
58
27
  end
59
28
 
@@ -79,6 +48,5 @@ puts "- helper: gliner_classify(text, tasks)"
79
48
  puts "- helper: gliner_extract_json(text, structures)"
80
49
  puts "- model variable: $gliner_model"
81
50
  puts "- model dir: #{model_dir.inspect}"
82
- puts "- auto-download env: GLINER_REPO_ID=#{repo_id.inspect} GLINER_MODEL_FILE=#{model_file.inspect}" unless $gliner_model
83
51
 
84
52
  IRB.start(__FILE__)
data/gliner.gemspec CHANGED
@@ -21,8 +21,10 @@ Gem::Specification.new do |spec|
21
21
  spec.add_dependency 'tokenizers', '~> 0.6'
22
22
 
23
23
  spec.add_development_dependency 'rake', '~> 13.0'
24
+ spec.add_development_dependency 'irb', '~> 1.16.0'
24
25
  spec.add_development_dependency 'rspec', '~> 3.13'
25
26
  spec.add_development_dependency 'rubocop', '~> 1.50'
27
+ spec.add_development_dependency 'debug', '~> 1.11'
26
28
 
27
29
  spec.metadata['rubygems_mfa_required'] = 'true'
28
30
  end
@@ -11,7 +11,7 @@ module Gliner
11
11
  @threshold = DEFAULT_THRESHOLD
12
12
  @model = nil
13
13
  @variant = :fp16
14
- @auto = false
14
+ @auto = true
15
15
  end
16
16
 
17
17
  def variant=(value)
@@ -3,6 +3,8 @@
3
3
  module Gliner
4
4
  module Runners
5
5
  class ClassificationRunner
6
+ include Inspectable
7
+
6
8
  def self.[](tasks)
7
9
  new(Gliner.model!, tasks)
8
10
  end
@@ -21,6 +23,11 @@ module Gliner
21
23
  end
22
24
 
23
25
  alias call []
26
+
27
+ private
28
+
29
+ def inspect_label = 'Classification'
30
+ def inspect_items = @tasks.keys
24
31
  end
25
32
  end
26
33
  end
@@ -3,8 +3,12 @@
3
3
  module Gliner
4
4
  module Runners
5
5
  class EntityRunner
6
+ include Inspectable
7
+
6
8
  def initialize(model, config)
7
9
  parsed = model.entity_task.parse_config(config)
10
+
11
+ @labels = parsed[:labels]
8
12
  @task = PreparedTask.new(model.entity_task, parsed)
9
13
  end
10
14
 
@@ -14,6 +18,11 @@ module Gliner
14
18
  end
15
19
 
16
20
  alias call []
21
+
22
+ private
23
+
24
+ def inspect_label = 'Entity'
25
+ def inspect_items = @labels
17
26
  end
18
27
  end
19
28
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gliner
4
+ module Runners
5
+ module Inspectable
6
+ def inspect
7
+ items = Array(inspect_items).map(&:to_s)
8
+
9
+ "#<Gliner(#{inspect_label}) input=#{items.inspect}>"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -3,6 +3,8 @@
3
3
  module Gliner
4
4
  module Runners
5
5
  class StructuredRunner
6
+ include Inspectable
7
+
6
8
  def initialize(model, config)
7
9
  @tasks = build_tasks(model, config)
8
10
  end
@@ -17,6 +19,9 @@ module Gliner
17
19
 
18
20
  private
19
21
 
22
+ def inspect_label = 'Structure'
23
+ def inspect_items = @tasks.keys
24
+
20
25
  def build_tasks(model, config)
21
26
  raise Error, 'structures must be a Hash' unless config.is_a?(Hash)
22
27
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gliner
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.2'
5
5
  end
data/lib/gliner.rb CHANGED
@@ -6,6 +6,7 @@ require 'gliner/version'
6
6
  require 'gliner/configuration'
7
7
  require 'gliner/model'
8
8
  require 'gliner/runners/prepared_task'
9
+ require 'gliner/runners/inspectable'
9
10
  require 'gliner/runners/entity_runner'
10
11
  require 'gliner/runners/structured_runner'
11
12
  require 'gliner/runners/classification_runner'
@@ -68,7 +69,10 @@ module Gliner
68
69
  end
69
70
 
70
71
  def model
71
- @model ||= model_from_config || model_from_env
72
+ @model ||= begin
73
+ apply_model_source!
74
+ model_from_config || model_from_env
75
+ end
72
76
  end
73
77
 
74
78
  def [](config)
@@ -98,7 +102,7 @@ module Gliner
98
102
  end
99
103
 
100
104
  def model_from_env
101
- dir = ENV.fetch('GLINER_MODEL_DIR', nil)
105
+ dir = env_model_dir
102
106
  return if dir.nil?
103
107
 
104
108
  file = ENV['GLINER_MODEL_FILE'] || model_file_for_variant(config.variant)
@@ -130,31 +134,47 @@ module Gliner
130
134
  return unless config.auto?
131
135
 
132
136
  source = config.model
133
- return unless source.nil? || source.empty?
137
+
138
+ return unless source.nil?
139
+ return if env_model_dir
134
140
 
135
141
  config.model = download_default_model
136
142
  end
137
143
 
144
+ def client
145
+ @client ||= HTTPX.plugin(:follow_redirects)
146
+ end
147
+
138
148
  def download_default_model
139
149
  model_file = model_file_for_variant(config.variant)
140
- root = File.expand_path('..', __dir__)
141
- dir = File.join(root, '.cache', 'models', HF_REPO.tr('/', '__'))
150
+ dir = File.join(Dir.pwd, '.cache', 'gliner', HF_REPO.tr('/', '__'))
142
151
 
143
152
  FileUtils.mkdir_p(dir)
144
153
 
145
154
  files = ['tokenizer.json', 'config.json', model_file]
146
- client = HTTPX.plugin(:follow_redirects)
147
155
 
148
156
  files.each do |file|
149
- response = client.get("#{DEFAULT_MODEL_BASE}/#{file}")
150
- raise Error, "Download failed: #{file}" if response.error?
157
+ target = File.join(dir, file)
158
+
159
+ next if File.exist?(target) && File.size?(target)
151
160
 
152
- File.binwrite(File.join(dir, file), response.body.to_s)
161
+ puts "Downloading #{DEFAULT_MODEL_BASE}/#{file}"
162
+
163
+ client
164
+ .get("#{DEFAULT_MODEL_BASE}/#{file}")
165
+ .copy_to(target)
153
166
  end
154
167
 
155
168
  dir
156
169
  end
157
170
 
171
+ def env_model_dir
172
+ dir = ENV.fetch('GLINER_MODEL_DIR', nil)
173
+ return nil if dir.nil? || dir.empty?
174
+
175
+ dir
176
+ end
177
+
158
178
  def model_file_for_variant(variant = :fp16)
159
179
  case variant.to_sym
160
180
  when :fp16 then 'model_fp16.onnx'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gliner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - elcuervo
@@ -65,6 +65,20 @@ dependencies:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
67
  version: '13.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: irb
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: 1.16.0
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: 1.16.0
68
82
  - !ruby/object:Gem::Dependency
69
83
  name: rspec
70
84
  requirement: !ruby/object:Gem::Requirement
@@ -93,6 +107,20 @@ dependencies:
93
107
  - - "~>"
94
108
  - !ruby/object:Gem::Version
95
109
  version: '1.50'
110
+ - !ruby/object:Gem::Dependency
111
+ name: debug
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.11'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '1.11'
96
124
  description: Basic Ruby inference wrapper for the GLiNER2 ONNX model.
97
125
  executables: []
98
126
  extensions: []
@@ -117,6 +145,7 @@ files:
117
145
  - lib/gliner/position_iteration.rb
118
146
  - lib/gliner/runners/classification_runner.rb
119
147
  - lib/gliner/runners/entity_runner.rb
148
+ - lib/gliner/runners/inspectable.rb
120
149
  - lib/gliner/runners/prepared_task.rb
121
150
  - lib/gliner/runners/structured_runner.rb
122
151
  - lib/gliner/span_extractor.rb