informers 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a17c099b091e97624f9b5e25cff8cd7b9655d731a957931bde7f8c8191edd644
4
- data.tar.gz: c20671b6301a5865c81db53c7a29f30a702712e43b39a3705d7e6c2f1d08d173
3
+ metadata.gz: dfeaf81a8a597d5c25d340e38d0f0be665d3737a1cc271ece883e5b83472da31
4
+ data.tar.gz: 36943e4c472c864e5951a02ea336c47192e77bcc51161113e022675cce552b82
5
5
  SHA512:
6
- metadata.gz: 061edee7577d44f2d00750a803b23d69584c09a6a84261b6981d3a9fbb7975b90f87a08815f032c5f21154fb3141d8fc1be405d0c1cda850bb8d11e58ec496cf
7
- data.tar.gz: 99806871f7962a89dd968aaf960c0aaf196b68cbfcaf1101a5fcbc1c9fe37f09745e3f585418703bd42ed861b32f6467cd4b95e4d79668e3f349ec2ada9a1525
6
+ metadata.gz: f6f9a35abcdcb57ddd24c3d81498fa517df554a48b722d7fcee0776e950762ef6412f90916688ca273b95bac79b2441a92a26bcecdc27e2e3c504eafc2b39793
7
+ data.tar.gz: b85b9a12439001996c4a1580bfdca650ae920791867a3245531b8b13de0b082ebca5ee76be70e8581947e38cf7ebd8c6474d537234ccef4cddb04cc2c6b3dfa5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 1.2.1 (2025-02-01)
2
+
3
+ - Fixed error when terminal width is zero
4
+
1
5
  ## 1.2.0 (2024-11-14)
2
6
 
3
7
  - Added support for models with external data
data/README.md CHANGED
@@ -53,12 +53,6 @@ model = Informers.pipeline("embedding", "sentence-transformers/all-MiniLM-L6-v2"
53
53
  embeddings = model.(sentences)
54
54
  ```
55
55
 
56
- For a quantized version, use:
57
-
58
- ```ruby
59
- model = Informers.pipeline("embedding", "Xenova/all-MiniLM-L6-v2", quantized: true)
60
- ```
61
-
62
56
  ### sentence-transformers/multi-qa-MiniLM-L6-cos-v1
63
57
 
64
58
  [Docs](https://huggingface.co/Xenova/multi-qa-MiniLM-L6-cos-v1)
@@ -254,7 +248,7 @@ result = model.(query, docs)
254
248
 
255
249
  ### Other
256
250
 
257
- The model must include a `.onnx` file ([example](https://huggingface.co/Xenova/all-MiniLM-L6-v2/tree/main/onnx)). If the file is not at `onnx/model.onnx` or `onnx/model_quantized.onnx`, use the `model_file_name` option to specify the location.
251
+ The model must include a `.onnx` file ([example](https://huggingface.co/Xenova/all-MiniLM-L6-v2/tree/main/onnx)). If the file is not at `onnx/model.onnx`, use the `model_file_name` option to specify the location.
258
252
 
259
253
  ## Pipelines
260
254
 
@@ -438,13 +432,13 @@ qa.("image.jpg", "What is the invoice number?")
438
432
 
439
433
  ## Reference
440
434
 
441
- Specify a variant of the model
435
+ Specify a variant of the model if available (`fp32`, `fp16`, `int8`, `uint8`, `q8`, `q4`, `q4f16`, or `bnb4`)
442
436
 
443
437
  ```ruby
444
438
  Informers.pipeline("embedding", "Xenova/all-MiniLM-L6-v2", dtype: "fp16")
445
439
  ```
446
440
 
447
- Specify a device (supports `cpu`, `cuda`, and `coreml`)
441
+ Specify a device (`cpu`, `cuda`, or `coreml`)
448
442
 
449
443
  ```ruby
450
444
  Informers.pipeline("embedding", device: "cuda")
@@ -809,12 +809,49 @@ module Informers
809
809
  end
810
810
  end
811
811
 
812
+ class ModernBertPreTrainedModel < PreTrainedModel
813
+ end
814
+
815
+ class ModernBertModel < ModernBertPreTrainedModel
816
+ end
817
+
818
+ class ModernBertForMaskedLM < ModernBertPreTrainedModel
819
+ def call(model_inputs)
820
+ MaskedLMOutput.new(*super(model_inputs))
821
+ end
822
+ end
823
+
824
+ class ModernBertForSequenceClassification < ModernBertPreTrainedModel
825
+ def call(model_inputs)
826
+ SequenceClassifierOutput.new(*super(model_inputs))
827
+ end
828
+ end
829
+
830
+ class ModernBertForTokenClassification < ModernBertPreTrainedModel
831
+ def call(model_inputs)
832
+ TokenClassifierOutput.new(*super(model_inputs))
833
+ end
834
+ end
835
+
812
836
  class NomicBertPreTrainedModel < PreTrainedModel
813
837
  end
814
838
 
815
839
  class NomicBertModel < NomicBertPreTrainedModel
816
840
  end
817
841
 
842
+ class ConvBertPreTrainedModel < PreTrainedModel
843
+ end
844
+
845
+ class ConvBertModel < ConvBertPreTrainedModel
846
+ end
847
+
848
+ class ElectraPreTrainedModel < PreTrainedModel
849
+ end
850
+
851
+ # TODO add ElectraForPreTraining
852
+ class ElectraModel < ElectraPreTrainedModel
853
+ end
854
+
818
855
  class DebertaV2PreTrainedModel < PreTrainedModel
819
856
  end
820
857
 
@@ -965,6 +1002,18 @@ module Informers
965
1002
  end
966
1003
  end
967
1004
 
1005
+ class RobertaForTokenClassification < RobertaPreTrainedModel
1006
+ def call(model_inputs)
1007
+ TokenClassifierOutput.new(*super(model_inputs))
1008
+ end
1009
+ end
1010
+
1011
+ class RobertaForSequenceClassification < RobertaPreTrainedModel
1012
+ def call(model_inputs)
1013
+ SequenceClassifierOutput.new(*super(model_inputs))
1014
+ end
1015
+ end
1016
+
968
1017
  class XLMRobertaPreTrainedModel < PreTrainedModel
969
1018
  end
970
1019
 
@@ -1173,7 +1222,10 @@ module Informers
1173
1222
 
1174
1223
  MODEL_MAPPING_NAMES_ENCODER_ONLY = {
1175
1224
  "bert" => ["BertModel", BertModel],
1225
+ "modernbert" => ["ModernBertModel", ModernBertModel],
1176
1226
  "nomic_bert" => ["NomicBertModel", NomicBertModel],
1227
+ "electra" => ["ElectraModel", ElectraModel],
1228
+ "convbert" => ["ConvBertModel", ConvBertModel],
1177
1229
  "deberta-v2" => ["DebertaV2Model", DebertaV2Model],
1178
1230
  "mpnet" => ["MPNetModel", MPNetModel],
1179
1231
  "distilbert" => ["DistilBertModel", DistilBertModel],
@@ -1208,13 +1260,17 @@ module Informers
1208
1260
 
1209
1261
  MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = {
1210
1262
  "bert" => ["BertForSequenceClassification", BertForSequenceClassification],
1263
+ "modernbert" => ["ModernBertForSequenceClassification", ModernBertForSequenceClassification],
1211
1264
  "distilbert" => ["DistilBertForSequenceClassification", DistilBertForSequenceClassification],
1265
+ "roberta" => ["RobertaForSequenceClassification", RobertaForSequenceClassification],
1212
1266
  "xlm-roberta" => ["XLMRobertaForSequenceClassification", XLMRobertaForSequenceClassification],
1213
1267
  "bart" => ["BartForSequenceClassification", BartForSequenceClassification]
1214
1268
  }
1215
1269
 
1216
1270
  MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = {
1217
- "bert" => ["BertForTokenClassification", BertForTokenClassification]
1271
+ "bert" => ["BertForTokenClassification", BertForTokenClassification],
1272
+ "modernbert" => ["ModernBertForTokenClassification", ModernBertForTokenClassification],
1273
+ "roberta" => ["RobertaForTokenClassification", RobertaForTokenClassification]
1218
1274
  }
1219
1275
 
1220
1276
  MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = {
@@ -1230,6 +1286,7 @@ module Informers
1230
1286
 
1231
1287
  MODEL_FOR_MASKED_LM_MAPPING_NAMES = {
1232
1288
  "bert" => ["BertForMaskedLM", BertForMaskedLM],
1289
+ "modernbert" => ["ModernBertForMaskedLM", ModernBertForMaskedLM],
1233
1290
  "roberta" => ["RobertaForMaskedLM", RobertaForMaskedLM]
1234
1291
  }
1235
1292
 
@@ -1336,6 +1336,7 @@ module Informers
1336
1336
  stream = $stderr
1337
1337
  tty = stream.tty?
1338
1338
  width = tty ? stream.winsize[1] : 80
1339
+ width = 80 if width == 0
1339
1340
 
1340
1341
  if msg[:status] == "progress" && tty
1341
1342
  stream.print "\r#{Utils::Hub.display_progress(msg[:file], width, msg[:size], msg[:total_size])}"
@@ -1369,7 +1370,7 @@ module Informers
1369
1370
  task = TASK_ALIASES[task] || task
1370
1371
 
1371
1372
  if quantized == NO_DEFAULT
1372
- # TODO move default to task class
1373
+ # TODO no quantization by default in 2.0
1373
1374
  quantized = ["text-classification", "token-classification", "question-answering", "feature-extraction"].include?(task)
1374
1375
  end
1375
1376
 
@@ -233,8 +233,8 @@ module Informers
233
233
  end
234
234
 
235
235
  def self.display_progress(filename, width, size, expected_size)
236
- bar_width = width - (filename.length + 3)
237
- progress = size / expected_size.to_f
236
+ bar_width = [width - (filename.length + 3), 1].max
237
+ progress = expected_size && expected_size > 0 ? size / expected_size.to_f : 0
238
238
  done = (progress * bar_width).round
239
239
  not_done = bar_width - done
240
240
  "#{filename} |#{"█" * done}#{" " * not_done}|"
@@ -1,3 +1,3 @@
1
1
  module Informers
2
- VERSION = "1.2.0"
2
+ VERSION = "1.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: informers
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-11-14 00:00:00.000000000 Z
10
+ date: 2025-02-01 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: onnxruntime
@@ -38,7 +37,6 @@ dependencies:
38
37
  - - ">="
39
38
  - !ruby/object:Gem::Version
40
39
  version: 0.5.3
41
- description:
42
40
  email: andrew@ankane.org
43
41
  executables: []
44
42
  extensions: []
@@ -70,7 +68,6 @@ homepage: https://github.com/ankane/informers
70
68
  licenses:
71
69
  - Apache-2.0
72
70
  metadata: {}
73
- post_install_message:
74
71
  rdoc_options: []
75
72
  require_paths:
76
73
  - lib
@@ -85,8 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
82
  - !ruby/object:Gem::Version
86
83
  version: '0'
87
84
  requirements: []
88
- rubygems_version: 3.5.22
89
- signing_key:
85
+ rubygems_version: 3.6.2
90
86
  specification_version: 4
91
87
  summary: Fast transformer inference for Ruby
92
88
  test_files: []