informers 1.0.3 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +137 -7
- data/lib/informers/configs.rb +10 -8
- data/lib/informers/model.rb +2 -9
- data/lib/informers/models.rb +1160 -15
- data/lib/informers/pipelines.rb +943 -11
- data/lib/informers/processors.rb +856 -0
- data/lib/informers/tokenizers.rb +159 -5
- data/lib/informers/utils/audio.rb +18 -0
- data/lib/informers/utils/core.rb +4 -0
- data/lib/informers/utils/ffmpeg.rb +45 -0
- data/lib/informers/utils/generation.rb +294 -0
- data/lib/informers/utils/image.rb +116 -0
- data/lib/informers/utils/math.rb +73 -0
- data/lib/informers/utils/tensor.rb +46 -0
- data/lib/informers/version.rb +1 -1
- data/lib/informers.rb +6 -0
- metadata +10 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a61f01755798e81a975641d60e5bfe09484ced7ce6a3453020c9978dc35b1942
|
4
|
+
data.tar.gz: 811f9c1dc4499ae7de8ebf8e02c0c4e98a0c0bc0af6aaca51025e42ba8165540
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 97b27363fab1e43895e368dbddc819fd4db23d42ce517359e5971347cd902b654f0c66700f07b36cd5f476bd3ea205a91e4f7e7ee0e7d8d455f0dce377bedb2b
|
7
|
+
data.tar.gz: dd1a7f795609423419ce213b00a5aca409f6b4a5bffb111250b4deffcbc6a8113fadf8d603c59fa78fa0f310904a0a3299e3bcdc48101f574171a024d13567e6
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -229,17 +229,17 @@ result = model.(query, docs)
|
|
229
229
|
|
230
230
|
### Other
|
231
231
|
|
232
|
-
You can use the feature extraction pipeline directly.
|
233
|
-
|
234
|
-
```ruby
|
235
|
-
model = Informers.pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2", quantized: false)
|
236
|
-
embeddings = model.(sentences, pooling: "mean", normalize: true)
|
237
|
-
```
|
238
|
-
|
239
232
|
The model must include a `.onnx` file ([example](https://huggingface.co/Xenova/all-MiniLM-L6-v2/tree/main/onnx)). If the file is not at `onnx/model.onnx` or `onnx/model_quantized.onnx`, use the `model_file_name` option to specify the location.
|
240
233
|
|
241
234
|
## Pipelines
|
242
235
|
|
236
|
+
- [Text](#text)
|
237
|
+
- [Vision](#vision)
|
238
|
+
- [Audio](#audio)
|
239
|
+
- [Multimodel](#multimodal)
|
240
|
+
|
241
|
+
### Text
|
242
|
+
|
243
243
|
Embedding
|
244
244
|
|
245
245
|
```ruby
|
@@ -275,6 +275,48 @@ qa = Informers.pipeline("question-answering")
|
|
275
275
|
qa.("Who invented Ruby?", "Ruby is a programming language created by Matz")
|
276
276
|
```
|
277
277
|
|
278
|
+
Zero-shot classification
|
279
|
+
|
280
|
+
```ruby
|
281
|
+
classifier = Informers.pipeline("zero-shot-classification")
|
282
|
+
classifier.("text", ["label1", "label2", "label3"])
|
283
|
+
```
|
284
|
+
|
285
|
+
Text generation
|
286
|
+
|
287
|
+
```ruby
|
288
|
+
generator = Informers.pipeline("text-generation")
|
289
|
+
generator.("I enjoy walking with my cute dog,")
|
290
|
+
```
|
291
|
+
|
292
|
+
Text-to-text generation
|
293
|
+
|
294
|
+
```ruby
|
295
|
+
text2text = Informers.pipeline("text2text-generation")
|
296
|
+
text2text.("translate from English to French: I'm very happy")
|
297
|
+
```
|
298
|
+
|
299
|
+
Translation
|
300
|
+
|
301
|
+
```ruby
|
302
|
+
translator = Informers.pipeline("translation", "Xenova/nllb-200-distilled-600M")
|
303
|
+
translator.("जीवन एक चॉकलेट बॉक्स की तरह है।", src_lang: "hin_Deva", tgt_lang: "fra_Latn")
|
304
|
+
```
|
305
|
+
|
306
|
+
Summarization
|
307
|
+
|
308
|
+
```ruby
|
309
|
+
summarizer = Informers.pipeline("summarization")
|
310
|
+
summarizer.("Many paragraphs of text")
|
311
|
+
```
|
312
|
+
|
313
|
+
Fill mask
|
314
|
+
|
315
|
+
```ruby
|
316
|
+
unmasker = Informers.pipeline("fill-mask")
|
317
|
+
unmasker.("Paris is the [MASK] of France.")
|
318
|
+
```
|
319
|
+
|
278
320
|
Feature extraction
|
279
321
|
|
280
322
|
```ruby
|
@@ -282,6 +324,93 @@ extractor = Informers.pipeline("feature-extraction")
|
|
282
324
|
extractor.("We are very happy to show you the 🤗 Transformers library.")
|
283
325
|
```
|
284
326
|
|
327
|
+
### Vision
|
328
|
+
|
329
|
+
Note: [ruby-vips](https://github.com/libvips/ruby-vips) is required to load images
|
330
|
+
|
331
|
+
Image classification
|
332
|
+
|
333
|
+
```ruby
|
334
|
+
classifier = Informers.pipeline("image-classification")
|
335
|
+
classifier.("image.jpg")
|
336
|
+
```
|
337
|
+
|
338
|
+
Zero-shot image classification
|
339
|
+
|
340
|
+
```ruby
|
341
|
+
classifier = Informers.pipeline("zero-shot-image-classification")
|
342
|
+
classifier.("image.jpg", ["label1", "label2", "label3"])
|
343
|
+
```
|
344
|
+
|
345
|
+
Image segmentation
|
346
|
+
|
347
|
+
```ruby
|
348
|
+
segmenter = Informers.pipeline("image-segmentation")
|
349
|
+
segmenter.("image.jpg")
|
350
|
+
```
|
351
|
+
|
352
|
+
Object detection
|
353
|
+
|
354
|
+
```ruby
|
355
|
+
detector = Informers.pipeline("object-detection")
|
356
|
+
detector.("image.jpg")
|
357
|
+
```
|
358
|
+
|
359
|
+
Zero-shot object detection
|
360
|
+
|
361
|
+
```ruby
|
362
|
+
detector = Informers.pipeline("zero-shot-object-detection")
|
363
|
+
detector.("image.jpg", ["label1", "label2", "label3"])
|
364
|
+
```
|
365
|
+
|
366
|
+
Depth estimation
|
367
|
+
|
368
|
+
```ruby
|
369
|
+
estimator = Informers.pipeline("depth-estimation")
|
370
|
+
estimator.("image.jpg")
|
371
|
+
```
|
372
|
+
|
373
|
+
Image-to-image
|
374
|
+
|
375
|
+
```ruby
|
376
|
+
upscaler = Informers.pipeline("image-to-image")
|
377
|
+
upscaler.("image.jpg")
|
378
|
+
```
|
379
|
+
|
380
|
+
Image feature extraction
|
381
|
+
|
382
|
+
```ruby
|
383
|
+
extractor = Informers.pipeline("image-feature-extraction")
|
384
|
+
extractor.("image.jpg")
|
385
|
+
```
|
386
|
+
|
387
|
+
### Audio
|
388
|
+
|
389
|
+
Note: [ffmpeg](https://www.ffmpeg.org/) is required to load audio files
|
390
|
+
|
391
|
+
Audio classification
|
392
|
+
|
393
|
+
```ruby
|
394
|
+
classifier = Informers.pipeline("audio-classification")
|
395
|
+
classifier.("audio.wav")
|
396
|
+
```
|
397
|
+
|
398
|
+
### Multimodal
|
399
|
+
|
400
|
+
Image captioning
|
401
|
+
|
402
|
+
```ruby
|
403
|
+
captioner = Informers.pipeline("image-to-text")
|
404
|
+
captioner.("image.jpg")
|
405
|
+
```
|
406
|
+
|
407
|
+
Document question answering
|
408
|
+
|
409
|
+
```ruby
|
410
|
+
qa = Informers.pipeline("document-question-answering")
|
411
|
+
qa.("image.jpg", "What is the invoice number?")
|
412
|
+
```
|
413
|
+
|
285
414
|
## Credits
|
286
415
|
|
287
416
|
This library was ported from [Transformers.js](https://github.com/xenova/transformers.js) and is available under the same license.
|
@@ -321,5 +450,6 @@ To get started with development:
|
|
321
450
|
git clone https://github.com/ankane/informers.git
|
322
451
|
cd informers
|
323
452
|
bundle install
|
453
|
+
bundle exec rake download:files
|
324
454
|
bundle exec rake test
|
325
455
|
```
|
data/lib/informers/configs.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
module Informers
|
2
2
|
class PretrainedConfig
|
3
|
-
attr_reader :model_type, :problem_type, :id2label
|
4
|
-
|
5
3
|
def initialize(config_json)
|
6
|
-
@
|
7
|
-
|
8
|
-
@model_type = config_json["model_type"]
|
9
|
-
@problem_type = config_json["problem_type"]
|
10
|
-
@id2label = config_json["id2label"]
|
4
|
+
@config_json = config_json.to_h
|
11
5
|
end
|
12
6
|
|
13
7
|
def [](key)
|
14
|
-
|
8
|
+
@config_json[key.to_s]
|
9
|
+
end
|
10
|
+
|
11
|
+
def []=(key, value)
|
12
|
+
@config_json[key.to_s] = value
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_h
|
16
|
+
@config_json.to_h
|
15
17
|
end
|
16
18
|
|
17
19
|
def self.from_pretrained(
|
data/lib/informers/model.rb
CHANGED
@@ -1,19 +1,12 @@
|
|
1
1
|
module Informers
|
2
2
|
class Model
|
3
3
|
def initialize(model_id, quantized: false)
|
4
|
-
@model_id = model_id
|
5
4
|
@model = Informers.pipeline("embedding", model_id, quantized: quantized)
|
5
|
+
@options = model_id == "mixedbread-ai/mxbai-embed-large-v1" ? {pooling: "cls", normalize: false} : {}
|
6
6
|
end
|
7
7
|
|
8
8
|
def embed(texts)
|
9
|
-
|
10
|
-
when "sentence-transformers/all-MiniLM-L6-v2", "Xenova/all-MiniLM-L6-v2", "Xenova/multi-qa-MiniLM-L6-cos-v1", "Supabase/gte-small"
|
11
|
-
@model.(texts)
|
12
|
-
when "mixedbread-ai/mxbai-embed-large-v1"
|
13
|
-
@model.(texts, pooling: "cls", normalize: false)
|
14
|
-
else
|
15
|
-
raise Error, "Use the embedding pipeline for this model: #{@model_id}"
|
16
|
-
end
|
9
|
+
@model.(texts, **@options)
|
17
10
|
end
|
18
11
|
end
|
19
12
|
end
|