informers 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -5
- data/lib/informers/feature_extraction.rb +1 -1
- data/lib/informers/fill_mask.rb +2 -1
- data/lib/informers/ner.rb +3 -3
- data/lib/informers/question_answering.rb +2 -2
- data/lib/informers/sentiment_analysis.rb +3 -2
- data/lib/informers/text_generation.rb +12 -2
- data/lib/informers/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22f7bcebf0670078b65fdf9cba4d2b937c853a3b10cf36e47f50781e2663225c
|
4
|
+
data.tar.gz: 940c96ec6b749b7e0b0c283456e40bfe9e6cbb3a58e8fa11f6367e87b05d8694
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cd8b58aae6e885409e297bc1ba09aedd029bb3dc26a193251f33c2bf6c9f6a8da69cb3727f799296a8c6644b014afc715e783a1e19a1074982af531e40db57b
|
7
|
+
data.tar.gz: 6f63489d0b303e9a7de13df11d5074bd4cb2dfa44febee4061262d5c188eeb62a7c975e89567048f801fa183c8d56925275768fccc9a4b5a48255abeeb379345
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -8,8 +8,6 @@ Supports:
|
|
8
8
|
- Question answering
|
9
9
|
- Named-entity recognition
|
10
10
|
- Text generation
|
11
|
-
- Summarization - *in development*
|
12
|
-
- Translation - *in development*
|
13
11
|
|
14
12
|
[](https://github.com/ankane/informers/actions)
|
15
13
|
|
@@ -18,7 +16,7 @@ Supports:
|
|
18
16
|
Add this line to your application’s Gemfile:
|
19
17
|
|
20
18
|
```ruby
|
21
|
-
gem
|
19
|
+
gem "informers"
|
22
20
|
```
|
23
21
|
|
24
22
|
## Getting Started
|
@@ -26,7 +24,9 @@ gem 'informers'
|
|
26
24
|
- [Sentiment analysis](#sentiment-analysis)
|
27
25
|
- [Question answering](#question-answering)
|
28
26
|
- [Named-entity recognition](#named-entity-recognition)
|
29
|
-
- [Text
|
27
|
+
- [Text generation](#text-generation)
|
28
|
+
- [Feature extraction](#feature-extraction)
|
29
|
+
- [Fill mask](#fill-mask)
|
30
30
|
|
31
31
|
### Sentiment Analysis
|
32
32
|
|
@@ -109,6 +109,24 @@ This returns
|
|
109
109
|
As far as I am concerned, I will be the first to admit that I am not a fan of the idea of a "free market." I think that the idea of a free market is a bit of a stretch. I think that the idea
|
110
110
|
```
|
111
111
|
|
112
|
+
### Feature Extraction
|
113
|
+
|
114
|
+
First, export a [pretrained model](tools/export.md).
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
model = Informers::FeatureExtraction.new("feature-extraction.onnx")
|
118
|
+
model.predict("This is super cool")
|
119
|
+
```
|
120
|
+
|
121
|
+
### Fill Mask
|
122
|
+
|
123
|
+
First, export a [pretrained model](tools/export.md).
|
124
|
+
|
125
|
+
```ruby
|
126
|
+
model = Informers::FillMask.new("fill-mask.onnx")
|
127
|
+
model.predict("This is a great <mask>")
|
128
|
+
```
|
129
|
+
|
112
130
|
## Models
|
113
131
|
|
114
132
|
Task | Description | Contributor | License | Link
|
@@ -116,7 +134,7 @@ Task | Description | Contributor | License | Link
|
|
116
134
|
Sentiment analysis | DistilBERT fine-tuned on SST-2 | Hugging Face | Apache-2.0 | [Link](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)
|
117
135
|
Question answering | DistilBERT fine-tuned on SQuAD | Hugging Face | Apache-2.0 | [Link](https://huggingface.co/distilbert-base-cased-distilled-squad)
|
118
136
|
Named-entity recognition | BERT fine-tuned on CoNLL03 | Bayerische Staatsbibliothek | In-progress | [Link](https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english)
|
119
|
-
Text generation | GPT-2 |
|
137
|
+
Text generation | GPT-2 | OpenAI | [Custom](https://github.com/openai/gpt-2/blob/master/LICENSE) | [Link](https://huggingface.co/gpt2)
|
120
138
|
|
121
139
|
Some models are [quantized](https://medium.com/microsoftazure/faster-and-smaller-quantized-nlp-with-hugging-face-and-onnx-runtime-ec5525473bb7) to make them faster and smaller.
|
122
140
|
|
data/lib/informers/fill_mask.rb
CHANGED
@@ -74,7 +74,8 @@ module Informers
|
|
74
74
|
raise "More than one mask_token (<mask>) is not supported" if v.size > 1
|
75
75
|
end
|
76
76
|
|
77
|
-
|
77
|
+
res = @model.predict(input)
|
78
|
+
outputs = res["output_0"] || res["logits"]
|
78
79
|
batch_size = outputs.size
|
79
80
|
|
80
81
|
results = []
|
data/lib/informers/ner.rb
CHANGED
@@ -38,12 +38,12 @@ module Informers
|
|
38
38
|
attention_mask: [[1] * tokens.size],
|
39
39
|
token_type_ids: [[0] * tokens.size]
|
40
40
|
}
|
41
|
-
|
41
|
+
res = @model.predict(input)
|
42
42
|
|
43
43
|
# transform
|
44
|
-
|
44
|
+
output = res["output_0"] || res["logits"]
|
45
45
|
score =
|
46
|
-
|
46
|
+
output[0].map do |e|
|
47
47
|
values = e.map { |v| Math.exp(v) }
|
48
48
|
sum = values.sum
|
49
49
|
values.map { |v| v / sum }
|
@@ -67,8 +67,8 @@ module Informers
|
|
67
67
|
}
|
68
68
|
output = @model.predict(input)
|
69
69
|
|
70
|
-
start = output["output_0"]
|
71
|
-
stop = output["output_1"]
|
70
|
+
start = output["output_0"] || output["start_logits"]
|
71
|
+
stop = output["output_1"] || output["end_logits"]
|
72
72
|
|
73
73
|
# transform
|
74
74
|
answers = []
|
@@ -50,11 +50,12 @@ module Informers
|
|
50
50
|
input_ids: input_ids,
|
51
51
|
attention_mask: attention_mask
|
52
52
|
}
|
53
|
-
|
53
|
+
res = @model.predict(input)
|
54
|
+
output = res["output_0"] || res["logits"]
|
54
55
|
|
55
56
|
# transform
|
56
57
|
scores =
|
57
|
-
output
|
58
|
+
output.map do |row|
|
58
59
|
mapped = row.map { |v| Math.exp(v) }
|
59
60
|
sum = mapped.sum
|
60
61
|
mapped.map { |v| v / sum }
|
@@ -31,11 +31,21 @@ module Informers
|
|
31
31
|
input = {
|
32
32
|
input_ids: [tokens]
|
33
33
|
}
|
34
|
+
if @model.inputs.any? { |i| i[:name] == "attention_mask" }
|
35
|
+
input[:attention_mask] = [[1] * tokens.size]
|
36
|
+
end
|
37
|
+
|
38
|
+
output_name =
|
39
|
+
if @model.outputs.any? { |o| o[:name] == "output_0" }
|
40
|
+
"output_0"
|
41
|
+
else
|
42
|
+
"logits"
|
43
|
+
end
|
34
44
|
|
35
45
|
(max_length - tokens.size).times do |i|
|
36
|
-
output = @model.predict(input, output_type: :numo, output_names: [
|
46
|
+
output = @model.predict(input, output_type: :numo, output_names: [output_name])
|
37
47
|
# passed to input_ids
|
38
|
-
tokens << output[
|
48
|
+
tokens << output[output_name][0, true, true][-1, true].max_index
|
39
49
|
end
|
40
50
|
|
41
51
|
@decoder.ids_to_text(tokens)
|
data/lib/informers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: informers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: blingfire
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.5.1
|
55
55
|
description:
|
56
|
-
email: andrew@
|
56
|
+
email: andrew@ankane.org
|
57
57
|
executables: []
|
58
58
|
extensions: []
|
59
59
|
extra_rdoc_files: []
|
@@ -91,14 +91,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
91
|
requirements:
|
92
92
|
- - ">="
|
93
93
|
- !ruby/object:Gem::Version
|
94
|
-
version: '2.
|
94
|
+
version: '2.7'
|
95
95
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
96
|
requirements:
|
97
97
|
- - ">="
|
98
98
|
- !ruby/object:Gem::Version
|
99
99
|
version: '0'
|
100
100
|
requirements: []
|
101
|
-
rubygems_version: 3.
|
101
|
+
rubygems_version: 3.3.7
|
102
102
|
signing_key:
|
103
103
|
specification_version: 4
|
104
104
|
summary: State-of-the-art natural language processing for Ruby
|