informers 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2baf3ed7ae9b6bf6a1347f0dc880ae3a48f26daa518112e37d6bf03927faed67
4
- data.tar.gz: 03cd4f92aa6a062fc23ca712369a8cf1db5300bb53b1eb99ad8d71574a1a8ce6
3
+ metadata.gz: 22f7bcebf0670078b65fdf9cba4d2b937c853a3b10cf36e47f50781e2663225c
4
+ data.tar.gz: 940c96ec6b749b7e0b0c283456e40bfe9e6cbb3a58e8fa11f6367e87b05d8694
5
5
  SHA512:
6
- metadata.gz: cfef17a6c7b9a574c43f3f45cc4f20bb36c1d764f6c68f47036f41a7af9a54aecf1a678eda1e3f3f7b0da26ff8131e22dc13d56e16e461366b67d8b6b0d77e97
7
- data.tar.gz: 8c99136eb43350c118402e0ac076055d4ab563e6f185d06c9b826c5d592a3955f64b2ea5284d32583e4725229201514a30527401f454cde45944fb54f9dd0b97
6
+ metadata.gz: 4cd8b58aae6e885409e297bc1ba09aedd029bb3dc26a193251f33c2bf6c9f6a8da69cb3727f799296a8c6644b014afc715e783a1e19a1074982af531e40db57b
7
+ data.tar.gz: 6f63489d0b303e9a7de13df11d5074bd4cb2dfa44febee4061262d5c188eeb62a7c975e89567048f801fa183c8d56925275768fccc9a4b5a48255abeeb379345
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.0 (2022-09-06)
2
+
3
+ - Added support for `optimum` and `transformers.onnx` models
4
+ - Dropped support for Ruby < 2.7
5
+
1
6
  ## 0.1.3 (2021-09-25)
2
7
 
3
8
  - Added text generation
data/README.md CHANGED
@@ -8,8 +8,6 @@ Supports:
8
8
  - Question answering
9
9
  - Named-entity recognition
10
10
  - Text generation
11
- - Summarization - *in development*
12
- - Translation - *in development*
13
11
 
14
12
  [![Build Status](https://github.com/ankane/informers/workflows/build/badge.svg?branch=master)](https://github.com/ankane/informers/actions)
15
13
 
@@ -18,7 +16,7 @@ Supports:
18
16
  Add this line to your application’s Gemfile:
19
17
 
20
18
  ```ruby
21
- gem 'informers'
19
+ gem "informers"
22
20
  ```
23
21
 
24
22
  ## Getting Started
@@ -26,7 +24,9 @@ gem 'informers'
26
24
  - [Sentiment analysis](#sentiment-analysis)
27
25
  - [Question answering](#question-answering)
28
26
  - [Named-entity recognition](#named-entity-recognition)
29
- - [Text Generation](#text-generation)
27
+ - [Text generation](#text-generation)
28
+ - [Feature extraction](#feature-extraction)
29
+ - [Fill mask](#fill-mask)
30
30
 
31
31
  ### Sentiment Analysis
32
32
 
@@ -109,6 +109,24 @@ This returns
109
109
  As far as I am concerned, I will be the first to admit that I am not a fan of the idea of a "free market." I think that the idea of a free market is a bit of a stretch. I think that the idea
110
110
  ```
111
111
 
112
+ ### Feature Extraction
113
+
114
+ First, export a [pretrained model](tools/export.md).
115
+
116
+ ```ruby
117
+ model = Informers::FeatureExtraction.new("feature-extraction.onnx")
118
+ model.predict("This is super cool")
119
+ ```
120
+
121
+ ### Fill Mask
122
+
123
+ First, export a [pretrained model](tools/export.md).
124
+
125
+ ```ruby
126
+ model = Informers::FillMask.new("fill-mask.onnx")
127
+ model.predict("This is a great <mask>")
128
+ ```
129
+
112
130
  ## Models
113
131
 
114
132
  Task | Description | Contributor | License | Link
@@ -116,7 +134,7 @@ Task | Description | Contributor | License | Link
116
134
  Sentiment analysis | DistilBERT fine-tuned on SST-2 | Hugging Face | Apache-2.0 | [Link](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)
117
135
  Question answering | DistilBERT fine-tuned on SQuAD | Hugging Face | Apache-2.0 | [Link](https://huggingface.co/distilbert-base-cased-distilled-squad)
118
136
  Named-entity recognition | BERT fine-tuned on CoNLL03 | Bayerische Staatsbibliothek | In-progress | [Link](https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english)
119
- Text generation | GPT-2 | Hugging Face | [Custom](https://github.com/openai/gpt-2/blob/master/LICENSE) | [Link](https://huggingface.co/gpt2)
137
+ Text generation | GPT-2 | OpenAI | [Custom](https://github.com/openai/gpt-2/blob/master/LICENSE) | [Link](https://huggingface.co/gpt2)
120
138
 
121
139
  Some models are [quantized](https://medium.com/microsoftazure/faster-and-smaller-quantized-nlp-with-hugging-face-and-onnx-runtime-ec5525473bb7) to make them faster and smaller.
122
140
 
@@ -51,7 +51,7 @@ module Informers
51
51
  attention_mask: attention_mask
52
52
  }
53
53
  output = @model.predict(input)
54
- scores = output["output_0"]
54
+ scores = output["output_0"] || output["last_hidden_state"]
55
55
 
56
56
  singular ? scores.first : scores
57
57
  end
@@ -74,7 +74,8 @@ module Informers
74
74
  raise "More than one mask_token (<mask>) is not supported" if v.size > 1
75
75
  end
76
76
 
77
- outputs = @model.predict(input)["output_0"]
77
+ res = @model.predict(input)
78
+ outputs = res["output_0"] || res["logits"]
78
79
  batch_size = outputs.size
79
80
 
80
81
  results = []
data/lib/informers/ner.rb CHANGED
@@ -38,12 +38,12 @@ module Informers
38
38
  attention_mask: [[1] * tokens.size],
39
39
  token_type_ids: [[0] * tokens.size]
40
40
  }
41
- output = @model.predict(input)
41
+ res = @model.predict(input)
42
42
 
43
43
  # transform
44
- entities = output["output_0"][0]
44
+ output = res["output_0"] || res["logits"]
45
45
  score =
46
- entities.map do |e|
46
+ output[0].map do |e|
47
47
  values = e.map { |v| Math.exp(v) }
48
48
  sum = values.sum
49
49
  values.map { |v| v / sum }
@@ -67,8 +67,8 @@ module Informers
67
67
  }
68
68
  output = @model.predict(input)
69
69
 
70
- start = output["output_0"]
71
- stop = output["output_1"]
70
+ start = output["output_0"] || output["start_logits"]
71
+ stop = output["output_1"] || output["end_logits"]
72
72
 
73
73
  # transform
74
74
  answers = []
@@ -50,11 +50,12 @@ module Informers
50
50
  input_ids: input_ids,
51
51
  attention_mask: attention_mask
52
52
  }
53
- output = @model.predict(input)
53
+ res = @model.predict(input)
54
+ output = res["output_0"] || res["logits"]
54
55
 
55
56
  # transform
56
57
  scores =
57
- output["output_0"].map do |row|
58
+ output.map do |row|
58
59
  mapped = row.map { |v| Math.exp(v) }
59
60
  sum = mapped.sum
60
61
  mapped.map { |v| v / sum }
@@ -31,11 +31,21 @@ module Informers
31
31
  input = {
32
32
  input_ids: [tokens]
33
33
  }
34
+ if @model.inputs.any? { |i| i[:name] == "attention_mask" }
35
+ input[:attention_mask] = [[1] * tokens.size]
36
+ end
37
+
38
+ output_name =
39
+ if @model.outputs.any? { |o| o[:name] == "output_0" }
40
+ "output_0"
41
+ else
42
+ "logits"
43
+ end
34
44
 
35
45
  (max_length - tokens.size).times do |i|
36
- output = @model.predict(input, output_type: :numo, output_names: ["output_0"])
46
+ output = @model.predict(input, output_type: :numo, output_names: [output_name])
37
47
  # passed to input_ids
38
- tokens << output["output_0"][0, true, true][-1, true].max_index
48
+ tokens << output[output_name][0, true, true][-1, true].max_index
39
49
  end
40
50
 
41
51
  @decoder.ids_to_text(tokens)
@@ -1,3 +1,3 @@
1
1
  module Informers
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: informers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-25 00:00:00.000000000 Z
11
+ date: 2022-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: blingfire
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.5.1
55
55
  description:
56
- email: andrew@chartkick.com
56
+ email: andrew@ankane.org
57
57
  executables: []
58
58
  extensions: []
59
59
  extra_rdoc_files: []
@@ -91,14 +91,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
91
91
  requirements:
92
92
  - - ">="
93
93
  - !ruby/object:Gem::Version
94
- version: '2.5'
94
+ version: '2.7'
95
95
  required_rubygems_version: !ruby/object:Gem::Requirement
96
96
  requirements:
97
97
  - - ">="
98
98
  - !ruby/object:Gem::Version
99
99
  version: '0'
100
100
  requirements: []
101
- rubygems_version: 3.2.22
101
+ rubygems_version: 3.3.7
102
102
  signing_key:
103
103
  specification_version: 4
104
104
  summary: State-of-the-art natural language processing for Ruby