ruby-spacy 0.1.0 → 0.1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +3 -1
- data/README.md +123 -77
- data/examples/get_started/lexeme.rb +2 -2
- data/examples/get_started/linguistic_annotations.rb +1 -1
- data/examples/get_started/morphology.rb +45 -0
- data/examples/get_started/most_similar.rb +28 -27
- data/examples/get_started/named_entities.rb +1 -1
- data/examples/get_started/pos_tags_and_dependencies.rb +18 -18
- data/examples/get_started/similarity.rb +2 -2
- data/examples/japanese/ancestors.rb +9 -11
- data/examples/japanese/entity_annotations_and_labels.rb +1 -1
- data/examples/japanese/lemmatization.rb +1 -1
- data/examples/japanese/most_similar.rb +28 -27
- data/examples/japanese/named_entity_recognition.rb +1 -1
- data/examples/japanese/navigating_parse_tree.rb +18 -18
- data/examples/japanese/noun_chunks.rb +1 -1
- data/examples/japanese/pos_tagging.rb +20 -20
- data/examples/japanese/visualizing_dependencies.rb +2 -2
- data/examples/japanese/visualizing_named_entities.rb +1 -1
- data/examples/linguistic_features/ancestors.rb +13 -10
- data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
- data/examples/linguistic_features/information_extraction.rb +2 -2
- data/examples/linguistic_features/iterating_children.rb +2 -2
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +5 -5
- data/examples/linguistic_features/lemmatization.rb +1 -1
- data/examples/linguistic_features/named_entity_recognition.rb +1 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
- data/examples/linguistic_features/noun_chunks.rb +1 -1
- data/examples/linguistic_features/pos_tagging.rb +1 -1
- data/examples/linguistic_features/retokenize_1.rb +1 -1
- data/examples/linguistic_features/retokenize_2.rb +2 -2
- data/examples/linguistic_features/rule_based_morphology.rb +1 -1
- data/examples/linguistic_features/similarity.rb +2 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
- data/examples/linguistic_features/similarity_between_spans.rb +2 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
- data/lib/ruby-spacy.rb +493 -300
- data/lib/ruby-spacy/version.rb +1 -1
- data/ruby-spacy.gemspec +1 -1
- metadata +6 -5
- data/examples/linguistic_features/morphology.rb +0 -17
- data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fc769c4257e78333c3d6dc114d76b39c31b57365d032d7b741358f34b37099e
|
4
|
+
data.tar.gz: 281a9997a325d16819574c96a0696eeedb59af0709d8f25814e6fa0d39646757
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b387962ee82b60499208225ab7cfca631a55a6eb305212f3b14a2c802f67cfa685b23e762a3877e4ba5ae01308c4909eb26286bcde2fc9683dedeee9059db88
|
7
|
+
data.tar.gz: d94e788a1458f6be22db486e43180f7cbcce516ad053baa8724ce6eacd7869c3123c9f292845fe7e400aa2115786617f5ad69bda5c71b403221c98c084dc9900
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Change Log
|
2
|
+
|
3
|
+
## 0.1.4.1 - 2021-07-06
|
4
|
+
- Test code refined
|
5
|
+
- `Spacy::Language::most_similar` returns an array of hash-based objects that accepts method calls
|
6
|
+
|
7
|
+
## 0.1.4 - 2021-06-26
|
8
|
+
### Added
|
9
|
+
- `Spacy::Lexeme` class
|
10
|
+
|
11
|
+
- `Spacy::Token#morpheme` method
|
12
|
+
## 0.1.3 - 2021-06-26
|
13
|
+
- Code cleanup
|
14
|
+
|
15
|
+
## 0.1.2 - 2021-06-26
|
16
|
+
### Added
|
17
|
+
- `Spacy::Token#morpheme` method
|
18
|
+
|
19
|
+
## 0.1.1 - 2021-06-26
|
20
|
+
- Project description fixed
|
21
|
+
|
22
|
+
## 0.1.0 - 2021-06-26
|
23
|
+
- Initial release
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.
|
4
|
+
ruby-spacy (0.1.4.1)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -23,6 +23,8 @@ GEM
|
|
23
23
|
|
24
24
|
PLATFORMS
|
25
25
|
arm64-darwin-20
|
26
|
+
x86_64-darwin-20
|
27
|
+
x86_64-linux
|
26
28
|
|
27
29
|
DEPENDENCIES
|
28
30
|
github-markup
|
data/README.md
CHANGED
@@ -111,12 +111,10 @@ Output:
|
|
111
111
|
|:-----:|:--:|:-------:|:--:|:------:|:----:|:-------:|:---:|:-:|:--:|:-------:|
|
112
112
|
| Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
113
113
|
|
114
|
-
### Part-of-speech
|
114
|
+
### Part-of-speech and dependency
|
115
115
|
|
116
116
|
→ [spaCy: Part-of-speech tags and dependencies](https://spacy.io/usage/spacy-101#annotations-pos-deps)
|
117
117
|
|
118
|
-
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
119
|
-
|
120
118
|
Ruby code:
|
121
119
|
|
122
120
|
```ruby
|
@@ -126,73 +124,117 @@ require "terminal-table"
|
|
126
124
|
nlp = Spacy::Language.new("en_core_web_sm")
|
127
125
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
128
126
|
|
127
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
129
128
|
rows = []
|
130
129
|
|
131
130
|
doc.each do |token|
|
132
|
-
rows << [token.text, token.
|
131
|
+
rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
|
133
132
|
end
|
134
133
|
|
135
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
136
134
|
table = Terminal::Table.new rows: rows, headings: headings
|
137
135
|
puts table
|
138
136
|
```
|
139
137
|
|
140
138
|
Output:
|
141
139
|
|
142
|
-
| text | lemma | pos | tag | dep |
|
143
|
-
|
144
|
-
| Apple | Apple | PROPN | NNP | nsubj |
|
145
|
-
| is | be | AUX | VBZ | aux |
|
146
|
-
| looking | look | VERB | VBG | ROOT |
|
147
|
-
| at | at | ADP | IN | prep |
|
148
|
-
| buying | buy | VERB | VBG | pcomp |
|
149
|
-
| U.K. | U.K. | PROPN | NNP | dobj |
|
150
|
-
| startup | startup | NOUN | NN | advcl |
|
151
|
-
| for | for | ADP | IN | prep |
|
152
|
-
| $ | $ | SYM | $ | quantmod |
|
153
|
-
| 1 | 1 | NUM | CD | compound |
|
154
|
-
| billion | billion | NUM | CD | pobj |
|
155
|
-
|
156
|
-
### Part-of-speech
|
140
|
+
| text | lemma | pos | tag | dep |
|
141
|
+
|:--------|:--------|:------|:----|:---------|
|
142
|
+
| Apple | Apple | PROPN | NNP | nsubj |
|
143
|
+
| is | be | AUX | VBZ | aux |
|
144
|
+
| looking | look | VERB | VBG | ROOT |
|
145
|
+
| at | at | ADP | IN | prep |
|
146
|
+
| buying | buy | VERB | VBG | pcomp |
|
147
|
+
| U.K. | U.K. | PROPN | NNP | dobj |
|
148
|
+
| startup | startup | NOUN | NN | advcl |
|
149
|
+
| for | for | ADP | IN | prep |
|
150
|
+
| $ | $ | SYM | $ | quantmod |
|
151
|
+
| 1 | 1 | NUM | CD | compound |
|
152
|
+
| billion | billion | NUM | CD | pobj |
|
153
|
+
|
154
|
+
### Part-of-speech and dependency (Japanese)
|
157
155
|
|
158
156
|
Ruby code:
|
159
157
|
|
160
158
|
```ruby
|
161
|
-
require
|
159
|
+
require "ruby-spacy"
|
162
160
|
require "terminal-table"
|
163
161
|
|
164
162
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
165
|
-
doc = nlp.read("任天堂は1983
|
163
|
+
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
166
164
|
|
165
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
167
166
|
rows = []
|
168
167
|
|
169
168
|
doc.each do |token|
|
170
|
-
rows << [token.text, token.
|
169
|
+
rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
|
171
170
|
end
|
172
171
|
|
173
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
174
172
|
table = Terminal::Table.new rows: rows, headings: headings
|
175
173
|
puts table
|
176
174
|
```
|
177
175
|
|
178
176
|
Output:
|
179
177
|
|
180
|
-
| text | lemma | pos | tag | dep |
|
181
|
-
|
182
|
-
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
183
|
-
| は | は | ADP | 助詞-係助詞 | case |
|
184
|
-
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
185
|
-
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
186
|
-
| に | に | ADP | 助詞-格助詞 | case |
|
187
|
-
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
188
|
-
| を | を | ADP | 助詞-格助詞 | case |
|
189
|
-
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
190
|
-
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
191
|
-
| で | で | ADP | 助詞-格助詞 | case |
|
192
|
-
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
193
|
-
| し | する | AUX | 動詞-非自立可能 | aux |
|
194
|
-
| た | た | AUX | 助動詞 | aux |
|
195
|
-
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
178
|
+
| text | lemma | pos | tag | dep |
|
179
|
+
|:-----------|:-----------|:------|:-------------------------|:-------|
|
180
|
+
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
181
|
+
| は | は | ADP | 助詞-係助詞 | case |
|
182
|
+
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
183
|
+
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
184
|
+
| に | に | ADP | 助詞-格助詞 | case |
|
185
|
+
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
186
|
+
| を | を | ADP | 助詞-格助詞 | case |
|
187
|
+
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
188
|
+
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
189
|
+
| で | で | ADP | 助詞-格助詞 | case |
|
190
|
+
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
191
|
+
| し | する | AUX | 動詞-非自立可能 | aux |
|
192
|
+
| た | た | AUX | 助動詞 | aux |
|
193
|
+
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
194
|
+
|
195
|
+
### Morphology
|
196
|
+
|
197
|
+
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
198
|
+
|
199
|
+
Ruby code:
|
200
|
+
|
201
|
+
```ruby
|
202
|
+
require "ruby-spacy"
|
203
|
+
require "terminal-table"
|
204
|
+
|
205
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
206
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
207
|
+
|
208
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
209
|
+
rows = []
|
210
|
+
|
211
|
+
doc.each do |token|
|
212
|
+
morph = token.morphology.map do |k, v|
|
213
|
+
"#{k} = #{v}"
|
214
|
+
end.join("\n")
|
215
|
+
rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
|
216
|
+
end
|
217
|
+
|
218
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
219
|
+
puts table
|
220
|
+
|
221
|
+
```
|
222
|
+
|
223
|
+
Output:
|
224
|
+
|
225
|
+
| text | shape | is_alpha | is_stop | morphology |
|
226
|
+
|:--------|:------|:---------|:--------|:------------------------------------------------------------------------------------|
|
227
|
+
| Apple | Xxxxx | true | false | NounType = Prop<br />Number = Sing |
|
228
|
+
| is | xx | true | true | Mood = Ind<br />Number = Sing<br />Person = 3<br />Tense = Pres<br />VerbForm = Fin |
|
229
|
+
| looking | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
230
|
+
| at | xx | true | true | |
|
231
|
+
| buying | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
232
|
+
| U.K. | X.X. | false | false | NounType = Prop<br />Number = Sing |
|
233
|
+
| startup | xxxx | true | false | Number = Sing |
|
234
|
+
| for | xxx | true | true | |
|
235
|
+
| $ | $ | false | false | |
|
236
|
+
| 1 | d | false | false | NumType = Card |
|
237
|
+
| billion | xxxx | true | false | NumType = Card |
|
196
238
|
|
197
239
|
### Visualizing dependency
|
198
240
|
|
@@ -258,7 +300,7 @@ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
|
258
300
|
rows = []
|
259
301
|
|
260
302
|
doc.ents.each do |ent|
|
261
|
-
rows << [ent.text, ent.start_char, ent.end_char, ent.
|
303
|
+
rows << [ent.text, ent.start_char, ent.end_char, ent.label]
|
262
304
|
end
|
263
305
|
|
264
306
|
headings = ["text", "start_char", "end_char", "label"]
|
@@ -290,7 +332,7 @@ doc = nlp.read(sentence)
|
|
290
332
|
rows = []
|
291
333
|
|
292
334
|
doc.ents.each do |ent|
|
293
|
-
rows << [ent.text, ent.start_char, ent.end_char, ent.
|
335
|
+
rows << [ent.text, ent.start_char, ent.end_char, ent.label]
|
294
336
|
end
|
295
337
|
|
296
338
|
headings = ["text", "start", "end", "label"]
|
@@ -351,8 +393,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
351
393
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
352
394
|
doc2 = nlp.read("Fast food tastes very good.")
|
353
395
|
|
354
|
-
puts "Doc 1: " + doc1
|
355
|
-
puts "Doc 2: " + doc2
|
396
|
+
puts "Doc 1: " + doc1.text
|
397
|
+
puts "Doc 2: " + doc2.text
|
356
398
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
357
399
|
|
358
400
|
```
|
@@ -406,32 +448,36 @@ france = nlp.get_lexeme("France")
|
|
406
448
|
|
407
449
|
query = tokyo.vector - japan.vector + france.vector
|
408
450
|
|
451
|
+
headings = ["rank", "text", "score"]
|
409
452
|
rows = []
|
410
453
|
|
411
|
-
results = nlp.most_similar(query,
|
412
|
-
results.
|
413
|
-
|
454
|
+
results = nlp.most_similar(query, 20)
|
455
|
+
results.each_with_index do |lexeme, i|
|
456
|
+
index = (i + 1).to_s
|
457
|
+
rows << [index, lexeme.text, lexeme.score]
|
414
458
|
end
|
415
459
|
|
416
|
-
headings = ["key", "text", "score"]
|
417
460
|
table = Terminal::Table.new rows: rows, headings: headings
|
418
461
|
puts table
|
419
462
|
```
|
420
463
|
|
421
464
|
Output:
|
422
465
|
|
423
|
-
|
|
424
|
-
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
466
|
+
| rank | text | score |
|
467
|
+
|:-----|:------------|:-------------------|
|
468
|
+
| 1 | FRANCE | 0.8346999883651733 |
|
469
|
+
| 2 | France | 0.8346999883651733 |
|
470
|
+
| 3 | france | 0.8346999883651733 |
|
471
|
+
| 4 | PARIS | 0.7703999876976013 |
|
472
|
+
| 5 | paris | 0.7703999876976013 |
|
473
|
+
| 6 | Paris | 0.7703999876976013 |
|
474
|
+
| 7 | TOULOUSE | 0.6381999850273132 |
|
475
|
+
| 8 | Toulouse | 0.6381999850273132 |
|
476
|
+
| 9 | toulouse | 0.6381999850273132 |
|
477
|
+
| 10 | marseille | 0.6370999813079834 |
|
478
|
+
|
479
|
+
|
480
|
+
|
435
481
|
|
436
482
|
|
437
483
|
### Word vector calculation (Japanese)
|
@@ -452,33 +498,33 @@ france = nlp.get_lexeme("フランス")
|
|
452
498
|
|
453
499
|
query = tokyo.vector - japan.vector + france.vector
|
454
500
|
|
501
|
+
headings = ["rank", "text", "score"]
|
455
502
|
rows = []
|
456
503
|
|
457
|
-
results = nlp.most_similar(query,
|
458
|
-
results.
|
459
|
-
|
504
|
+
results = nlp.most_similar(query, 20)
|
505
|
+
results.each_with_index do |lexeme, i|
|
506
|
+
index = (i + 1).to_s
|
507
|
+
rows << [index, lexeme.text, lexeme.score]
|
460
508
|
end
|
461
509
|
|
462
|
-
headings = ["key", "text", "score"]
|
463
510
|
table = Terminal::Table.new rows: rows, headings: headings
|
464
511
|
puts table
|
465
512
|
```
|
466
513
|
|
467
514
|
Output:
|
468
515
|
|
469
|
-
|
|
470
|
-
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
516
|
+
| rank | text | score |
|
517
|
+
|:-----|:---------------|:-------------------|
|
518
|
+
| 1 | パリ | 0.7376999855041504 |
|
519
|
+
| 2 | フランス | 0.7221999764442444 |
|
520
|
+
| 3 | 東京 | 0.6697999835014343 |
|
521
|
+
| 4 | ストラスブール | 0.631600022315979 |
|
522
|
+
| 5 | リヨン | 0.5939000248908997 |
|
523
|
+
| 6 | Paris | 0.574400007724762 |
|
524
|
+
| 7 | ベルギー | 0.5683000087738037 |
|
525
|
+
| 8 | ニース | 0.5679000020027161 |
|
526
|
+
| 9 | アルザス | 0.5644999742507935 |
|
527
|
+
| 10 | 南仏 | 0.5547999739646912 |
|
482
528
|
|
483
529
|
## Author
|
484
530
|
|
@@ -8,8 +8,8 @@ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
|
|
8
8
|
rows = []
|
9
9
|
|
10
10
|
doc.each do |word|
|
11
|
-
lexeme =
|
12
|
-
rows << [lexeme.text, lexeme.
|
11
|
+
lexeme = nlp.vocab(word.text)
|
12
|
+
rows << [lexeme.text, lexeme.shape, lexeme.prefix, lexeme.suffix, lexeme.is_alpha, lexeme.is_digit]
|
13
13
|
end
|
14
14
|
|
15
15
|
table = Terminal::Table.new rows: rows, headings: headings
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
morph = token.morphology.map do |k, v|
|
12
|
+
"#{k} = #{v}"
|
13
|
+
end.join("\n")
|
14
|
+
# end.join("<br />")
|
15
|
+
rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
|
16
|
+
end
|
17
|
+
|
18
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
19
|
+
puts table
|
20
|
+
|
21
|
+
# +---------+-------+----------+---------+-----------------+
|
22
|
+
# | text | shape | is_alpha | is_stop | morphology |
|
23
|
+
# +---------+-------+----------+---------+-----------------+
|
24
|
+
# | Apple | Xxxxx | true | false | NounType = Prop |
|
25
|
+
# | | | | | Number = Sing |
|
26
|
+
# | is | xx | true | true | Mood = Ind |
|
27
|
+
# | | | | | Number = Sing |
|
28
|
+
# | | | | | Person = 3 |
|
29
|
+
# | | | | | Tense = Pres |
|
30
|
+
# | | | | | VerbForm = Fin |
|
31
|
+
# | looking | xxxx | true | false | Aspect = Prog |
|
32
|
+
# | | | | | Tense = Pres |
|
33
|
+
# | | | | | VerbForm = Part |
|
34
|
+
# | at | xx | true | true | |
|
35
|
+
# | buying | xxxx | true | false | Aspect = Prog |
|
36
|
+
# | | | | | Tense = Pres |
|
37
|
+
# | | | | | VerbForm = Part |
|
38
|
+
# | U.K. | X.X. | false | false | NounType = Prop |
|
39
|
+
# | | | | | Number = Sing |
|
40
|
+
# | startup | xxxx | true | false | Number = Sing |
|
41
|
+
# | for | xxx | true | true | |
|
42
|
+
# | $ | $ | false | false | |
|
43
|
+
# | 1 | d | false | false | NumType = Card |
|
44
|
+
# | billion | xxxx | true | false | NumType = Card |
|
45
|
+
# +---------+-------+----------+---------+-----------------+
|
@@ -9,38 +9,39 @@ france = nlp.get_lexeme("France")
|
|
9
9
|
|
10
10
|
query = tokyo.vector - japan.vector + france.vector
|
11
11
|
|
12
|
-
headings = ["
|
12
|
+
headings = ["rank", "text", "score"]
|
13
13
|
rows = []
|
14
14
|
|
15
15
|
results = nlp.most_similar(query, 20)
|
16
|
-
results.
|
17
|
-
|
16
|
+
results.each_with_index do |lexeme, i|
|
17
|
+
index = (i + 1).to_s
|
18
|
+
rows << [index, lexeme.text, lexeme.score]
|
18
19
|
end
|
19
20
|
|
20
21
|
table = Terminal::Table.new rows: rows, headings: headings
|
21
22
|
puts table
|
22
23
|
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
# |
|
28
|
-
# |
|
29
|
-
# |
|
30
|
-
# |
|
31
|
-
# |
|
32
|
-
# |
|
33
|
-
# |
|
34
|
-
# |
|
35
|
-
# |
|
36
|
-
# |
|
37
|
-
# |
|
38
|
-
# |
|
39
|
-
# |
|
40
|
-
# |
|
41
|
-
# |
|
42
|
-
# |
|
43
|
-
# |
|
44
|
-
# |
|
45
|
-
# |
|
46
|
-
#
|
24
|
+
# +------+-------------+--------------------+
|
25
|
+
# | rank | text | score |
|
26
|
+
# +------+-------------+--------------------+
|
27
|
+
# | 1 | FRANCE | 0.8346999883651733 |
|
28
|
+
# | 2 | France | 0.8346999883651733 |
|
29
|
+
# | 3 | france | 0.8346999883651733 |
|
30
|
+
# | 4 | PARIS | 0.7703999876976013 |
|
31
|
+
# | 5 | paris | 0.7703999876976013 |
|
32
|
+
# | 6 | Paris | 0.7703999876976013 |
|
33
|
+
# | 7 | TOULOUSE | 0.6381999850273132 |
|
34
|
+
# | 8 | Toulouse | 0.6381999850273132 |
|
35
|
+
# | 9 | toulouse | 0.6381999850273132 |
|
36
|
+
# | 10 | marseille | 0.6370999813079834 |
|
37
|
+
# | 11 | Marseille | 0.6370999813079834 |
|
38
|
+
# | 12 | MARSEILLE | 0.6370999813079834 |
|
39
|
+
# | 13 | Bordeaux | 0.6096000075340271 |
|
40
|
+
# | 14 | BORDEAUX | 0.6096000075340271 |
|
41
|
+
# | 15 | bordeaux | 0.6096000075340271 |
|
42
|
+
# | 16 | prague | 0.6075000166893005 |
|
43
|
+
# | 17 | PRAGUE | 0.6075000166893005 |
|
44
|
+
# | 18 | Prague | 0.6075000166893005 |
|
45
|
+
# | 19 | SWITZERLAND | 0.6068000197410583 |
|
46
|
+
# | 20 | switzerland | 0.6068000197410583 |
|
47
|
+
# +------+-------------+--------------------+
|