informers 1.1.1 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a61f01755798e81a975641d60e5bfe09484ced7ce6a3453020c9978dc35b1942
4
- data.tar.gz: 811f9c1dc4499ae7de8ebf8e02c0c4e98a0c0bc0af6aaca51025e42ba8165540
3
+ metadata.gz: dfeaf81a8a597d5c25d340e38d0f0be665d3737a1cc271ece883e5b83472da31
4
+ data.tar.gz: 36943e4c472c864e5951a02ea336c47192e77bcc51161113e022675cce552b82
5
5
  SHA512:
6
- metadata.gz: 97b27363fab1e43895e368dbddc819fd4db23d42ce517359e5971347cd902b654f0c66700f07b36cd5f476bd3ea205a91e4f7e7ee0e7d8d455f0dce377bedb2b
7
- data.tar.gz: dd1a7f795609423419ce213b00a5aca409f6b4a5bffb111250b4deffcbc6a8113fadf8d603c59fa78fa0f310904a0a3299e3bcdc48101f574171a024d13567e6
6
+ metadata.gz: f6f9a35abcdcb57ddd24c3d81498fa517df554a48b722d7fcee0776e950762ef6412f90916688ca273b95bac79b2441a92a26bcecdc27e2e3c504eafc2b39793
7
+ data.tar.gz: b85b9a12439001996c4a1580bfdca650ae920791867a3245531b8b13de0b082ebca5ee76be70e8581947e38cf7ebd8c6474d537234ccef4cddb04cc2c6b3dfa5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 1.2.1 (2025-02-01)
2
+
3
+ - Fixed error when terminal width is zero
4
+
5
+ ## 1.2.0 (2024-11-14)
6
+
7
+ - Added support for models with external data
8
+ - Added `device` option
9
+ - Added `dtype` option
10
+ - Added `session_options` option
11
+
1
12
  ## 1.1.1 (2024-10-14)
2
13
 
3
14
  - Added `audio-classification` pipeline
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Informers
2
2
 
3
- :fire: Fast [transformer](https://github.com/xenova/transformers.js) inference for Ruby
3
+ :fire: Fast [transformer](https://github.com/huggingface/transformers.js) inference for Ruby
4
4
 
5
5
  For non-ONNX models, check out [Transformers.rb](https://github.com/ankane/transformers-ruby) :slightly_smiling_face:
6
6
 
@@ -24,7 +24,9 @@ gem "informers"
24
24
  Embedding
25
25
 
26
26
  - [sentence-transformers/all-MiniLM-L6-v2](#sentence-transformersall-MiniLM-L6-v2)
27
- - [Xenova/multi-qa-MiniLM-L6-cos-v1](#xenovamulti-qa-MiniLM-L6-cos-v1)
27
+ - [sentence-transformers/multi-qa-MiniLM-L6-cos-v1](#sentence-transformersmulti-qa-MiniLM-L6-cos-v1)
28
+ - [sentence-transformers/all-mpnet-base-v2](#sentence-transformersall-mpnet-base-v2)
29
+ - [sentence-transformers/paraphrase-MiniLM-L6-v2](#sentence-transformersparaphrase-minilm-l6-v2)
28
30
  - [mixedbread-ai/mxbai-embed-large-v1](#mixedbread-aimxbai-embed-large-v1)
29
31
  - [Supabase/gte-small](#supabasegte-small)
30
32
  - [intfloat/e5-base-v2](#intfloate5-base-v2)
@@ -32,13 +34,13 @@ Embedding
32
34
  - [BAAI/bge-base-en-v1.5](#baaibge-base-en-v15)
33
35
  - [jinaai/jina-embeddings-v2-base-en](#jinaaijina-embeddings-v2-base-en)
34
36
  - [Snowflake/snowflake-arctic-embed-m-v1.5](#snowflakesnowflake-arctic-embed-m-v15)
35
- - [Xenova/all-mpnet-base-v2](#xenovaall-mpnet-base-v2)
36
37
 
37
38
  Reranking
38
39
 
39
40
  - [mixedbread-ai/mxbai-rerank-base-v1](#mixedbread-aimxbai-rerank-base-v1)
40
41
  - [jinaai/jina-reranker-v1-turbo-en](#jinaaijina-reranker-v1-turbo-en)
41
42
  - [BAAI/bge-reranker-base](#baaibge-reranker-base)
43
+ - [Xenova/ms-marco-MiniLM-L-6-v2](#xenovams-marco-minilm-l-6-v2)
42
44
 
43
45
  ### sentence-transformers/all-MiniLM-L6-v2
44
46
 
@@ -51,13 +53,7 @@ model = Informers.pipeline("embedding", "sentence-transformers/all-MiniLM-L6-v2"
51
53
  embeddings = model.(sentences)
52
54
  ```
53
55
 
54
- For a quantized version, use:
55
-
56
- ```ruby
57
- model = Informers.pipeline("embedding", "Xenova/all-MiniLM-L6-v2", quantized: true)
58
- ```
59
-
60
- ### Xenova/multi-qa-MiniLM-L6-cos-v1
56
+ ### sentence-transformers/multi-qa-MiniLM-L6-cos-v1
61
57
 
62
58
  [Docs](https://huggingface.co/Xenova/multi-qa-MiniLM-L6-cos-v1)
63
59
 
@@ -65,13 +61,35 @@ model = Informers.pipeline("embedding", "Xenova/all-MiniLM-L6-v2", quantized: tr
65
61
  query = "How many people live in London?"
66
62
  docs = ["Around 9 Million people live in London", "London is known for its financial district"]
67
63
 
68
- model = Informers.pipeline("embedding", "Xenova/multi-qa-MiniLM-L6-cos-v1")
64
+ model = Informers.pipeline("embedding", "sentence-transformers/multi-qa-MiniLM-L6-cos-v1")
69
65
  query_embedding = model.(query)
70
66
  doc_embeddings = model.(docs)
71
67
  scores = doc_embeddings.map { |e| e.zip(query_embedding).sum { |d, q| d * q } }
72
68
  doc_score_pairs = docs.zip(scores).sort_by { |d, s| -s }
73
69
  ```
74
70
 
71
+ ### sentence-transformers/all-mpnet-base-v2
72
+
73
+ [Docs](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
74
+
75
+ ```ruby
76
+ sentences = ["This is an example sentence", "Each sentence is converted"]
77
+
78
+ model = Informers.pipeline("embedding", "sentence-transformers/all-mpnet-base-v2")
79
+ embeddings = model.(sentences)
80
+ ```
81
+
82
+ ### sentence-transformers/paraphrase-MiniLM-L6-v2
83
+
84
+ [Docs](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2)
85
+
86
+ ```ruby
87
+ sentences = ["This is an example sentence", "Each sentence is converted"]
88
+
89
+ model = Informers.pipeline("embedding", "sentence-transformers/paraphrase-MiniLM-L6-v2")
90
+ embeddings = model.(sentences, normalize: false)
91
+ ```
92
+
75
93
  ### mixedbread-ai/mxbai-embed-large-v1
76
94
 
77
95
  [Docs](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)
@@ -180,17 +198,6 @@ model = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5
180
198
  embeddings = model.(input, model_output: "sentence_embedding", pooling: "none")
181
199
  ```
182
200
 
183
- ### Xenova/all-mpnet-base-v2
184
-
185
- [Docs](https://huggingface.co/Xenova/all-mpnet-base-v2)
186
-
187
- ```ruby
188
- sentences = ["This is an example sentence", "Each sentence is converted"]
189
-
190
- model = Informers.pipeline("embedding", "Xenova/all-mpnet-base-v2")
191
- embeddings = model.(sentences)
192
- ```
193
-
194
201
  ### mixedbread-ai/mxbai-rerank-base-v1
195
202
 
196
203
  [Docs](https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1)
@@ -227,9 +234,21 @@ model = Informers.pipeline("reranking", "BAAI/bge-reranker-base")
227
234
  result = model.(query, docs)
228
235
  ```
229
236
 
237
+ ### Xenova/ms-marco-MiniLM-L-6-v2
238
+
239
+ [Docs](https://huggingface.co/Xenova/ms-marco-MiniLM-L-6-v2)
240
+
241
+ ```ruby
242
+ query = "How many people live in London?"
243
+ docs = ["Around 9 Million people live in London", "London is known for its financial district"]
244
+
245
+ model = Informers.pipeline("reranking", "Xenova/ms-marco-MiniLM-L-6-v2")
246
+ result = model.(query, docs)
247
+ ```
248
+
230
249
  ### Other
231
250
 
232
- The model must include a `.onnx` file ([example](https://huggingface.co/Xenova/all-MiniLM-L6-v2/tree/main/onnx)). If the file is not at `onnx/model.onnx` or `onnx/model_quantized.onnx`, use the `model_file_name` option to specify the location.
251
+ The model must include a `.onnx` file ([example](https://huggingface.co/Xenova/all-MiniLM-L6-v2/tree/main/onnx)). If the file is not at `onnx/model.onnx`, use the `model_file_name` option to specify the location.
233
252
 
234
253
  ## Pipelines
235
254
 
@@ -411,9 +430,31 @@ qa = Informers.pipeline("document-question-answering")
411
430
  qa.("image.jpg", "What is the invoice number?")
412
431
  ```
413
432
 
433
+ ## Reference
434
+
435
+ Specify a variant of the model if available (`fp32`, `fp16`, `int8`, `uint8`, `q8`, `q4`, `q4f16`, or `bnb4`)
436
+
437
+ ```ruby
438
+ Informers.pipeline("embedding", "Xenova/all-MiniLM-L6-v2", dtype: "fp16")
439
+ ```
440
+
441
+ Specify a device (`cpu`, `cuda`, or `coreml`)
442
+
443
+ ```ruby
444
+ Informers.pipeline("embedding", device: "cuda")
445
+ ```
446
+
447
+ Note: Follow [these instructions](https://github.com/ankane/onnxruntime-ruby?tab=readme-ov-file#gpu-support) for `cuda`
448
+
449
+ Specify ONNX Runtime [session options](https://github.com/ankane/onnxruntime-ruby?tab=readme-ov-file#session-options)
450
+
451
+ ```ruby
452
+ Informers.pipeline("embedding", session_options: {log_severity_level: 2})
453
+ ```
454
+
414
455
  ## Credits
415
456
 
416
- This library was ported from [Transformers.js](https://github.com/xenova/transformers.js) and is available under the same license.
457
+ This library was ported from [Transformers.js](https://github.com/huggingface/transformers.js) and is available under the same license.
417
458
 
418
459
  ## Upgrading
419
460
 
@@ -0,0 +1,19 @@
1
+ module Informers
2
+ module Backends
3
+ module Onnx
4
+ def self.device_to_execution_providers(device)
5
+ case device&.to_s
6
+ when "cpu", nil
7
+ []
8
+ when "cuda"
9
+ ["CUDAExecutionProvider"]
10
+ when "coreml"
11
+ ["CoreMLExecutionProvider"]
12
+ else
13
+ supported_devices = ["cpu", "cuda", "coreml"]
14
+ raise ArgumentError, "Unsupported device: #{device}. Should be one of: #{supported_devices.join(", ")}"
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,4 +1,5 @@
1
1
  module Informers
2
+ # TODO remove in 2.0
2
3
  class Model
3
4
  def initialize(model_id, quantized: false)
4
5
  @model = Informers.pipeline("embedding", model_id, quantized: quantized)
@@ -22,7 +22,10 @@ module Informers
22
22
  cache_dir: nil,
23
23
  local_files_only: false,
24
24
  revision: "main",
25
- model_file_name: nil
25
+ device: nil,
26
+ dtype: nil,
27
+ model_file_name: nil,
28
+ session_options: {}
26
29
  )
27
30
  options = {
28
31
  quantized:,
@@ -31,7 +34,10 @@ module Informers
31
34
  cache_dir:,
32
35
  local_files_only:,
33
36
  revision:,
34
- model_file_name:
37
+ device:,
38
+ dtype:,
39
+ model_file_name:,
40
+ session_options:
35
41
  }
36
42
  config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **options)
37
43
  if options[:config].nil?
@@ -109,7 +115,10 @@ module Informers
109
115
  cache_dir: nil,
110
116
  local_files_only: false,
111
117
  revision: "main",
112
- model_file_name: nil
118
+ device: nil,
119
+ dtype: nil,
120
+ model_file_name: nil,
121
+ session_options: {}
113
122
  )
114
123
  options = {
115
124
  quantized:,
@@ -118,22 +127,25 @@ module Informers
118
127
  cache_dir:,
119
128
  local_files_only:,
120
129
  revision:,
121
- model_file_name:
130
+ device:,
131
+ dtype:,
132
+ model_file_name:,
133
+ session_options:
122
134
  }
123
135
 
124
136
  model_name = MODEL_CLASS_TO_NAME_MAPPING[self]
125
137
  model_type = MODEL_TYPE_MAPPING[model_name]
126
138
 
139
+ config ||= AutoConfig.from_pretrained(pretrained_model_name_or_path, **options)
140
+
127
141
  if model_type == MODEL_TYPES[:DecoderOnly]
128
142
  info = [
129
- AutoConfig.from_pretrained(pretrained_model_name_or_path, **options),
130
143
  construct_session(pretrained_model_name_or_path, options[:model_file_name] || "decoder_model_merged", **options),
131
144
  Utils::Hub.get_model_json(pretrained_model_name_or_path, "generation_config.json", false, **options)
132
145
  ]
133
146
 
134
147
  elsif model_type == MODEL_TYPES[:Seq2Seq] || model_type == MODEL_TYPES[:Vision2Seq]
135
148
  info = [
136
- AutoConfig.from_pretrained(pretrained_model_name_or_path, **options),
137
149
  construct_session(pretrained_model_name_or_path, "encoder_model", **options),
138
150
  construct_session(pretrained_model_name_or_path, "decoder_model_merged", **options),
139
151
  Utils::Hub.get_model_json(pretrained_model_name_or_path, "generation_config.json", false, **options)
@@ -141,14 +153,12 @@ module Informers
141
153
 
142
154
  elsif model_type == MODEL_TYPES[:MaskGeneration]
143
155
  info = [
144
- AutoConfig.from_pretrained(pretrained_model_name_or_path, **options),
145
156
  construct_session(pretrained_model_name_or_path, "vision_encoder", **options),
146
157
  construct_session(pretrained_model_name_or_path, "prompt_encoder_mask_decoder", **options)
147
158
  ]
148
159
 
149
160
  elsif model_type == MODEL_TYPES[:EncoderDecoder]
150
161
  info = [
151
- AutoConfig.from_pretrained(pretrained_model_name_or_path, **options),
152
162
  construct_session(pretrained_model_name_or_path, "encoder_model", **options),
153
163
  construct_session(pretrained_model_name_or_path, "decoder_model_merged", **options)
154
164
  ]
@@ -158,12 +168,11 @@ module Informers
158
168
  warn "Model type for '#{model_name || config[:model_type]}' not found, assuming encoder-only architecture. Please report this."
159
169
  end
160
170
  info = [
161
- AutoConfig.from_pretrained(pretrained_model_name_or_path, **options),
162
171
  construct_session(pretrained_model_name_or_path, options[:model_file_name] || "model", **options)
163
172
  ]
164
173
  end
165
174
 
166
- new(*info)
175
+ new(config, *info)
167
176
  end
168
177
 
169
178
  def self.construct_session(pretrained_model_name_or_path, file_name, **options)
@@ -175,10 +184,27 @@ module Informers
175
184
  prefix = ""
176
185
  file_name = file_name[1..]
177
186
  end
178
- model_file_name = "#{prefix}#{file_name}#{options[:quantized] ? "_quantized" : ""}.onnx"
187
+ dtype = options[:dtype] || (options[:quantized] ? "q8" : "fp32")
188
+ suffix = Utils::DEFAULT_DTYPE_SUFFIX_MAPPING[dtype.to_sym]
189
+ if !suffix
190
+ raise ArgumentError, "Invalid dtype: #{dtype}. Should be one of: #{Utils::DEFAULT_DTYPE_SUFFIX_MAPPING.keys.join(", ")}"
191
+ end
192
+ model_file_name = "#{prefix}#{file_name}#{suffix}.onnx"
179
193
  path = Utils::Hub.get_model_file(pretrained_model_name_or_path, model_file_name, true, **options)
180
194
 
181
- OnnxRuntime::InferenceSession.new(path)
195
+ session_options = {
196
+ providers: Backends::Onnx.device_to_execution_providers(options[:device]),
197
+ log_severity_level: 4
198
+ }.merge(options[:session_options] || {})
199
+
200
+ begin
201
+ OnnxRuntime::InferenceSession.new(path, **session_options)
202
+ rescue OnnxRuntime::Error => e
203
+ raise e unless e.message.include?("No such file or directory") && e.message.include?(".onnx_data")
204
+
205
+ Utils::Hub.get_model_file(pretrained_model_name_or_path, "#{model_file_name}_data", true, **options)
206
+ OnnxRuntime::InferenceSession.new(path, **session_options)
207
+ end
182
208
  end
183
209
 
184
210
  def call(model_inputs, **kwargs)
@@ -783,12 +809,49 @@ module Informers
783
809
  end
784
810
  end
785
811
 
812
+ class ModernBertPreTrainedModel < PreTrainedModel
813
+ end
814
+
815
+ class ModernBertModel < ModernBertPreTrainedModel
816
+ end
817
+
818
+ class ModernBertForMaskedLM < ModernBertPreTrainedModel
819
+ def call(model_inputs)
820
+ MaskedLMOutput.new(*super(model_inputs))
821
+ end
822
+ end
823
+
824
+ class ModernBertForSequenceClassification < ModernBertPreTrainedModel
825
+ def call(model_inputs)
826
+ SequenceClassifierOutput.new(*super(model_inputs))
827
+ end
828
+ end
829
+
830
+ class ModernBertForTokenClassification < ModernBertPreTrainedModel
831
+ def call(model_inputs)
832
+ TokenClassifierOutput.new(*super(model_inputs))
833
+ end
834
+ end
835
+
786
836
  class NomicBertPreTrainedModel < PreTrainedModel
787
837
  end
788
838
 
789
839
  class NomicBertModel < NomicBertPreTrainedModel
790
840
  end
791
841
 
842
+ class ConvBertPreTrainedModel < PreTrainedModel
843
+ end
844
+
845
+ class ConvBertModel < ConvBertPreTrainedModel
846
+ end
847
+
848
+ class ElectraPreTrainedModel < PreTrainedModel
849
+ end
850
+
851
+ # TODO add ElectraForPreTraining
852
+ class ElectraModel < ElectraPreTrainedModel
853
+ end
854
+
792
855
  class DebertaV2PreTrainedModel < PreTrainedModel
793
856
  end
794
857
 
@@ -939,6 +1002,18 @@ module Informers
939
1002
  end
940
1003
  end
941
1004
 
1005
+ class RobertaForTokenClassification < RobertaPreTrainedModel
1006
+ def call(model_inputs)
1007
+ TokenClassifierOutput.new(*super(model_inputs))
1008
+ end
1009
+ end
1010
+
1011
+ class RobertaForSequenceClassification < RobertaPreTrainedModel
1012
+ def call(model_inputs)
1013
+ SequenceClassifierOutput.new(*super(model_inputs))
1014
+ end
1015
+ end
1016
+
942
1017
  class XLMRobertaPreTrainedModel < PreTrainedModel
943
1018
  end
944
1019
 
@@ -1147,7 +1222,10 @@ module Informers
1147
1222
 
1148
1223
  MODEL_MAPPING_NAMES_ENCODER_ONLY = {
1149
1224
  "bert" => ["BertModel", BertModel],
1225
+ "modernbert" => ["ModernBertModel", ModernBertModel],
1150
1226
  "nomic_bert" => ["NomicBertModel", NomicBertModel],
1227
+ "electra" => ["ElectraModel", ElectraModel],
1228
+ "convbert" => ["ConvBertModel", ConvBertModel],
1151
1229
  "deberta-v2" => ["DebertaV2Model", DebertaV2Model],
1152
1230
  "mpnet" => ["MPNetModel", MPNetModel],
1153
1231
  "distilbert" => ["DistilBertModel", DistilBertModel],
@@ -1182,13 +1260,17 @@ module Informers
1182
1260
 
1183
1261
  MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = {
1184
1262
  "bert" => ["BertForSequenceClassification", BertForSequenceClassification],
1263
+ "modernbert" => ["ModernBertForSequenceClassification", ModernBertForSequenceClassification],
1185
1264
  "distilbert" => ["DistilBertForSequenceClassification", DistilBertForSequenceClassification],
1265
+ "roberta" => ["RobertaForSequenceClassification", RobertaForSequenceClassification],
1186
1266
  "xlm-roberta" => ["XLMRobertaForSequenceClassification", XLMRobertaForSequenceClassification],
1187
1267
  "bart" => ["BartForSequenceClassification", BartForSequenceClassification]
1188
1268
  }
1189
1269
 
1190
1270
  MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = {
1191
- "bert" => ["BertForTokenClassification", BertForTokenClassification]
1271
+ "bert" => ["BertForTokenClassification", BertForTokenClassification],
1272
+ "modernbert" => ["ModernBertForTokenClassification", ModernBertForTokenClassification],
1273
+ "roberta" => ["RobertaForTokenClassification", RobertaForTokenClassification]
1192
1274
  }
1193
1275
 
1194
1276
  MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = {
@@ -1204,6 +1286,7 @@ module Informers
1204
1286
 
1205
1287
  MODEL_FOR_MASKED_LM_MAPPING_NAMES = {
1206
1288
  "bert" => ["BertForMaskedLM", BertForMaskedLM],
1289
+ "modernbert" => ["ModernBertForMaskedLM", ModernBertForMaskedLM],
1207
1290
  "roberta" => ["RobertaForMaskedLM", RobertaForMaskedLM]
1208
1291
  }
1209
1292
 
@@ -1336,6 +1336,7 @@ module Informers
1336
1336
  stream = $stderr
1337
1337
  tty = stream.tty?
1338
1338
  width = tty ? stream.winsize[1] : 80
1339
+ width = 80 if width == 0
1339
1340
 
1340
1341
  if msg[:status] == "progress" && tty
1341
1342
  stream.print "\r#{Utils::Hub.display_progress(msg[:file], width, msg[:size], msg[:total_size])}"
@@ -1360,13 +1361,16 @@ module Informers
1360
1361
  cache_dir: nil,
1361
1362
  local_files_only: false,
1362
1363
  revision: "main",
1363
- model_file_name: nil
1364
+ device: nil,
1365
+ dtype: nil,
1366
+ model_file_name: nil,
1367
+ session_options: {}
1364
1368
  )
1365
1369
  # Apply aliases
1366
1370
  task = TASK_ALIASES[task] || task
1367
1371
 
1368
1372
  if quantized == NO_DEFAULT
1369
- # TODO move default to task class
1373
+ # TODO no quantization by default in 2.0
1370
1374
  quantized = ["text-classification", "token-classification", "question-answering", "feature-extraction"].include?(task)
1371
1375
  end
1372
1376
 
@@ -1389,7 +1393,10 @@ module Informers
1389
1393
  cache_dir:,
1390
1394
  local_files_only:,
1391
1395
  revision:,
1392
- model_file_name:
1396
+ device:,
1397
+ dtype:,
1398
+ model_file_name:,
1399
+ session_options:
1393
1400
  }
1394
1401
 
1395
1402
  classes = {
@@ -261,7 +261,8 @@ module Informers
261
261
  "GPT2Tokenizer" => GPT2Tokenizer,
262
262
  "NllbTokenizer" => NllbTokenizer,
263
263
  "M2M100Tokenizer" => M2M100Tokenizer,
264
- "SpeechT5Tokenizer" => SpeechT5Tokenizer
264
+ "SpeechT5Tokenizer" => SpeechT5Tokenizer,
265
+ "PreTrainedTokenizer" => PreTrainedTokenizer
265
266
  }
266
267
 
267
268
  def self.from_pretrained(
@@ -0,0 +1,14 @@
1
+ module Informers
2
+ module Utils
3
+ DEFAULT_DTYPE_SUFFIX_MAPPING = {
4
+ fp32: "",
5
+ fp16: "_fp16",
6
+ int8: "_int8",
7
+ uint8: "_uint8",
8
+ q8: "_quantized",
9
+ q4: "_q4",
10
+ q4f16: "_q4f16",
11
+ bnb4: "_bnb4"
12
+ }
13
+ end
14
+ end
@@ -81,12 +81,18 @@ module Informers
81
81
  file if file.exists
82
82
  end
83
83
 
84
- def put(request, buffer)
84
+ def put(request, response)
85
85
  output_path = resolve_path(request)
86
86
 
87
87
  begin
88
+ tmp_path = "#{output_path}.incomplete"
88
89
  FileUtils.mkdir_p(File.dirname(output_path))
89
- File.binwrite(output_path, buffer)
90
+ File.open(tmp_path, "wb") do |f|
91
+ while !response.eof?
92
+ f.write(response.read(1024 * 1024))
93
+ end
94
+ end
95
+ FileUtils.move(tmp_path, output_path)
90
96
  rescue => e
91
97
  warn "An error occurred while writing the file to cache: #{e}"
92
98
  end
@@ -189,10 +195,8 @@ module Informers
189
195
  to_cache_response = cache && !response.is_a?(FileResponse) && response.status[0] == "200"
190
196
  end
191
197
 
192
- buffer = response.read
193
-
194
198
  if to_cache_response && cache_key && cache.match(cache_key).nil?
195
- cache.put(cache_key, buffer)
199
+ cache.put(cache_key, response)
196
200
  end
197
201
 
198
202
  Utils.dispatch_callback(options[:progress_callback], {
@@ -229,8 +233,8 @@ module Informers
229
233
  end
230
234
 
231
235
  def self.display_progress(filename, width, size, expected_size)
232
- bar_width = width - (filename.length + 3)
233
- progress = size / expected_size.to_f
236
+ bar_width = [width - (filename.length + 3), 1].max
237
+ progress = expected_size && expected_size > 0 ? size / expected_size.to_f : 0
234
238
  done = (progress * bar_width).round
235
239
  not_done = bar_width - done
236
240
  "#{filename} |#{"█" * done}#{" " * not_done}|"
@@ -1,3 +1,3 @@
1
1
  module Informers
2
- VERSION = "1.1.1"
2
+ VERSION = "1.2.1"
3
3
  end
data/lib/informers.rb CHANGED
@@ -11,8 +11,10 @@ require "stringio"
11
11
  require "uri"
12
12
 
13
13
  # modules
14
+ require_relative "informers/backends/onnx"
14
15
  require_relative "informers/utils/audio"
15
16
  require_relative "informers/utils/core"
17
+ require_relative "informers/utils/dtypes"
16
18
  require_relative "informers/utils/generation"
17
19
  require_relative "informers/utils/ffmpeg"
18
20
  require_relative "informers/utils/hub"
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: informers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-10-15 00:00:00.000000000 Z
10
+ date: 2025-02-01 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: onnxruntime
@@ -38,7 +37,6 @@ dependencies:
38
37
  - - ">="
39
38
  - !ruby/object:Gem::Version
40
39
  version: 0.5.3
41
- description:
42
40
  email: andrew@ankane.org
43
41
  executables: []
44
42
  extensions: []
@@ -48,6 +46,7 @@ files:
48
46
  - LICENSE.txt
49
47
  - README.md
50
48
  - lib/informers.rb
49
+ - lib/informers/backends/onnx.rb
51
50
  - lib/informers/configs.rb
52
51
  - lib/informers/env.rb
53
52
  - lib/informers/model.rb
@@ -57,6 +56,7 @@ files:
57
56
  - lib/informers/tokenizers.rb
58
57
  - lib/informers/utils/audio.rb
59
58
  - lib/informers/utils/core.rb
59
+ - lib/informers/utils/dtypes.rb
60
60
  - lib/informers/utils/ffmpeg.rb
61
61
  - lib/informers/utils/generation.rb
62
62
  - lib/informers/utils/hub.rb
@@ -68,7 +68,6 @@ homepage: https://github.com/ankane/informers
68
68
  licenses:
69
69
  - Apache-2.0
70
70
  metadata: {}
71
- post_install_message:
72
71
  rdoc_options: []
73
72
  require_paths:
74
73
  - lib
@@ -83,8 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
82
  - !ruby/object:Gem::Version
84
83
  version: '0'
85
84
  requirements: []
86
- rubygems_version: 3.5.16
87
- signing_key:
85
+ rubygems_version: 3.6.2
88
86
  specification_version: 4
89
87
  summary: Fast transformer inference for Ruby
90
88
  test_files: []