ruby-spacy 0.1.0 → 0.1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Gemfile.lock +3 -1
- data/README.md +123 -77
- data/examples/get_started/lexeme.rb +2 -2
- data/examples/get_started/linguistic_annotations.rb +1 -1
- data/examples/get_started/morphology.rb +45 -0
- data/examples/get_started/most_similar.rb +28 -27
- data/examples/get_started/named_entities.rb +1 -1
- data/examples/get_started/pos_tags_and_dependencies.rb +18 -18
- data/examples/get_started/similarity.rb +2 -2
- data/examples/japanese/ancestors.rb +9 -11
- data/examples/japanese/entity_annotations_and_labels.rb +1 -1
- data/examples/japanese/lemmatization.rb +1 -1
- data/examples/japanese/most_similar.rb +28 -27
- data/examples/japanese/named_entity_recognition.rb +1 -1
- data/examples/japanese/navigating_parse_tree.rb +18 -18
- data/examples/japanese/noun_chunks.rb +1 -1
- data/examples/japanese/pos_tagging.rb +20 -20
- data/examples/japanese/visualizing_dependencies.rb +2 -2
- data/examples/japanese/visualizing_named_entities.rb +1 -1
- data/examples/linguistic_features/ancestors.rb +13 -10
- data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
- data/examples/linguistic_features/information_extraction.rb +2 -2
- data/examples/linguistic_features/iterating_children.rb +2 -2
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +5 -5
- data/examples/linguistic_features/lemmatization.rb +1 -1
- data/examples/linguistic_features/named_entity_recognition.rb +1 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
- data/examples/linguistic_features/noun_chunks.rb +1 -1
- data/examples/linguistic_features/pos_tagging.rb +1 -1
- data/examples/linguistic_features/retokenize_1.rb +1 -1
- data/examples/linguistic_features/retokenize_2.rb +2 -2
- data/examples/linguistic_features/rule_based_morphology.rb +1 -1
- data/examples/linguistic_features/similarity.rb +2 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
- data/examples/linguistic_features/similarity_between_spans.rb +2 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
- data/lib/ruby-spacy.rb +493 -300
- data/lib/ruby-spacy/version.rb +1 -1
- data/ruby-spacy.gemspec +1 -1
- metadata +6 -5
- data/examples/linguistic_features/morphology.rb +0 -17
- data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fc769c4257e78333c3d6dc114d76b39c31b57365d032d7b741358f34b37099e
|
4
|
+
data.tar.gz: 281a9997a325d16819574c96a0696eeedb59af0709d8f25814e6fa0d39646757
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b387962ee82b60499208225ab7cfca631a55a6eb305212f3b14a2c802f67cfa685b23e762a3877e4ba5ae01308c4909eb26286bcde2fc9683dedeee9059db88
|
7
|
+
data.tar.gz: d94e788a1458f6be22db486e43180f7cbcce516ad053baa8724ce6eacd7869c3123c9f292845fe7e400aa2115786617f5ad69bda5c71b403221c98c084dc9900
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Change Log
|
2
|
+
|
3
|
+
## 0.1.4.1 - 2021-07-06
|
4
|
+
- Test code refined
|
5
|
+
- `Spacy::Language::most_similar` returns an array of hash-based objects that accepts method calls
|
6
|
+
|
7
|
+
## 0.1.4 - 2021-06-26
|
8
|
+
### Added
|
9
|
+
- `Spacy::Lexeme` class
|
10
|
+
|
11
|
+
- `Spacy::Token#morpheme` method
|
12
|
+
## 0.1.3 - 2021-06-26
|
13
|
+
- Code cleanup
|
14
|
+
|
15
|
+
## 0.1.2 - 2021-06-26
|
16
|
+
### Added
|
17
|
+
- `Spacy::Token#morpheme` method
|
18
|
+
|
19
|
+
## 0.1.1 - 2021-06-26
|
20
|
+
- Project description fixed
|
21
|
+
|
22
|
+
## 0.1.0 - 2021-06-26
|
23
|
+
- Initial release
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.
|
4
|
+
ruby-spacy (0.1.4.1)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -23,6 +23,8 @@ GEM
|
|
23
23
|
|
24
24
|
PLATFORMS
|
25
25
|
arm64-darwin-20
|
26
|
+
x86_64-darwin-20
|
27
|
+
x86_64-linux
|
26
28
|
|
27
29
|
DEPENDENCIES
|
28
30
|
github-markup
|
data/README.md
CHANGED
@@ -111,12 +111,10 @@ Output:
|
|
111
111
|
|:-----:|:--:|:-------:|:--:|:------:|:----:|:-------:|:---:|:-:|:--:|:-------:|
|
112
112
|
| Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
113
113
|
|
114
|
-
### Part-of-speech
|
114
|
+
### Part-of-speech and dependency
|
115
115
|
|
116
116
|
→ [spaCy: Part-of-speech tags and dependencies](https://spacy.io/usage/spacy-101#annotations-pos-deps)
|
117
117
|
|
118
|
-
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
119
|
-
|
120
118
|
Ruby code:
|
121
119
|
|
122
120
|
```ruby
|
@@ -126,73 +124,117 @@ require "terminal-table"
|
|
126
124
|
nlp = Spacy::Language.new("en_core_web_sm")
|
127
125
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
128
126
|
|
127
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
129
128
|
rows = []
|
130
129
|
|
131
130
|
doc.each do |token|
|
132
|
-
rows << [token.text, token.
|
131
|
+
rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
|
133
132
|
end
|
134
133
|
|
135
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
136
134
|
table = Terminal::Table.new rows: rows, headings: headings
|
137
135
|
puts table
|
138
136
|
```
|
139
137
|
|
140
138
|
Output:
|
141
139
|
|
142
|
-
| text | lemma | pos | tag | dep |
|
143
|
-
|
144
|
-
| Apple | Apple | PROPN | NNP | nsubj |
|
145
|
-
| is | be | AUX | VBZ | aux |
|
146
|
-
| looking | look | VERB | VBG | ROOT |
|
147
|
-
| at | at | ADP | IN | prep |
|
148
|
-
| buying | buy | VERB | VBG | pcomp |
|
149
|
-
| U.K. | U.K. | PROPN | NNP | dobj |
|
150
|
-
| startup | startup | NOUN | NN | advcl |
|
151
|
-
| for | for | ADP | IN | prep |
|
152
|
-
| $ | $ | SYM | $ | quantmod |
|
153
|
-
| 1 | 1 | NUM | CD | compound |
|
154
|
-
| billion | billion | NUM | CD | pobj |
|
155
|
-
|
156
|
-
### Part-of-speech
|
140
|
+
| text | lemma | pos | tag | dep |
|
141
|
+
|:--------|:--------|:------|:----|:---------|
|
142
|
+
| Apple | Apple | PROPN | NNP | nsubj |
|
143
|
+
| is | be | AUX | VBZ | aux |
|
144
|
+
| looking | look | VERB | VBG | ROOT |
|
145
|
+
| at | at | ADP | IN | prep |
|
146
|
+
| buying | buy | VERB | VBG | pcomp |
|
147
|
+
| U.K. | U.K. | PROPN | NNP | dobj |
|
148
|
+
| startup | startup | NOUN | NN | advcl |
|
149
|
+
| for | for | ADP | IN | prep |
|
150
|
+
| $ | $ | SYM | $ | quantmod |
|
151
|
+
| 1 | 1 | NUM | CD | compound |
|
152
|
+
| billion | billion | NUM | CD | pobj |
|
153
|
+
|
154
|
+
### Part-of-speech and dependency (Japanese)
|
157
155
|
|
158
156
|
Ruby code:
|
159
157
|
|
160
158
|
```ruby
|
161
|
-
require
|
159
|
+
require "ruby-spacy"
|
162
160
|
require "terminal-table"
|
163
161
|
|
164
162
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
165
|
-
doc = nlp.read("任天堂は1983
|
163
|
+
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
166
164
|
|
165
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
167
166
|
rows = []
|
168
167
|
|
169
168
|
doc.each do |token|
|
170
|
-
rows << [token.text, token.
|
169
|
+
rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
|
171
170
|
end
|
172
171
|
|
173
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
174
172
|
table = Terminal::Table.new rows: rows, headings: headings
|
175
173
|
puts table
|
176
174
|
```
|
177
175
|
|
178
176
|
Output:
|
179
177
|
|
180
|
-
| text | lemma | pos | tag | dep |
|
181
|
-
|
182
|
-
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
183
|
-
| は | は | ADP | 助詞-係助詞 | case |
|
184
|
-
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
185
|
-
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
186
|
-
| に | に | ADP | 助詞-格助詞 | case |
|
187
|
-
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
188
|
-
| を | を | ADP | 助詞-格助詞 | case |
|
189
|
-
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
190
|
-
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
191
|
-
| で | で | ADP | 助詞-格助詞 | case |
|
192
|
-
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
193
|
-
| し | する | AUX | 動詞-非自立可能 | aux |
|
194
|
-
| た | た | AUX | 助動詞 | aux |
|
195
|
-
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
178
|
+
| text | lemma | pos | tag | dep |
|
179
|
+
|:-----------|:-----------|:------|:-------------------------|:-------|
|
180
|
+
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
181
|
+
| は | は | ADP | 助詞-係助詞 | case |
|
182
|
+
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
183
|
+
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
184
|
+
| に | に | ADP | 助詞-格助詞 | case |
|
185
|
+
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
186
|
+
| を | を | ADP | 助詞-格助詞 | case |
|
187
|
+
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
188
|
+
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
189
|
+
| で | で | ADP | 助詞-格助詞 | case |
|
190
|
+
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
191
|
+
| し | する | AUX | 動詞-非自立可能 | aux |
|
192
|
+
| た | た | AUX | 助動詞 | aux |
|
193
|
+
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
194
|
+
|
195
|
+
### Morphology
|
196
|
+
|
197
|
+
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
198
|
+
|
199
|
+
Ruby code:
|
200
|
+
|
201
|
+
```ruby
|
202
|
+
require "ruby-spacy"
|
203
|
+
require "terminal-table"
|
204
|
+
|
205
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
206
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
207
|
+
|
208
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
209
|
+
rows = []
|
210
|
+
|
211
|
+
doc.each do |token|
|
212
|
+
morph = token.morphology.map do |k, v|
|
213
|
+
"#{k} = #{v}"
|
214
|
+
end.join("\n")
|
215
|
+
rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
|
216
|
+
end
|
217
|
+
|
218
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
219
|
+
puts table
|
220
|
+
|
221
|
+
```
|
222
|
+
|
223
|
+
Output:
|
224
|
+
|
225
|
+
| text | shape | is_alpha | is_stop | morphology |
|
226
|
+
|:--------|:------|:---------|:--------|:------------------------------------------------------------------------------------|
|
227
|
+
| Apple | Xxxxx | true | false | NounType = Prop<br />Number = Sing |
|
228
|
+
| is | xx | true | true | Mood = Ind<br />Number = Sing<br />Person = 3<br />Tense = Pres<br />VerbForm = Fin |
|
229
|
+
| looking | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
230
|
+
| at | xx | true | true | |
|
231
|
+
| buying | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
232
|
+
| U.K. | X.X. | false | false | NounType = Prop<br />Number = Sing |
|
233
|
+
| startup | xxxx | true | false | Number = Sing |
|
234
|
+
| for | xxx | true | true | |
|
235
|
+
| $ | $ | false | false | |
|
236
|
+
| 1 | d | false | false | NumType = Card |
|
237
|
+
| billion | xxxx | true | false | NumType = Card |
|
196
238
|
|
197
239
|
### Visualizing dependency
|
198
240
|
|
@@ -258,7 +300,7 @@ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
|
258
300
|
rows = []
|
259
301
|
|
260
302
|
doc.ents.each do |ent|
|
261
|
-
rows << [ent.text, ent.start_char, ent.end_char, ent.
|
303
|
+
rows << [ent.text, ent.start_char, ent.end_char, ent.label]
|
262
304
|
end
|
263
305
|
|
264
306
|
headings = ["text", "start_char", "end_char", "label"]
|
@@ -290,7 +332,7 @@ doc = nlp.read(sentence)
|
|
290
332
|
rows = []
|
291
333
|
|
292
334
|
doc.ents.each do |ent|
|
293
|
-
rows << [ent.text, ent.start_char, ent.end_char, ent.
|
335
|
+
rows << [ent.text, ent.start_char, ent.end_char, ent.label]
|
294
336
|
end
|
295
337
|
|
296
338
|
headings = ["text", "start", "end", "label"]
|
@@ -351,8 +393,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
351
393
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
352
394
|
doc2 = nlp.read("Fast food tastes very good.")
|
353
395
|
|
354
|
-
puts "Doc 1: " + doc1
|
355
|
-
puts "Doc 2: " + doc2
|
396
|
+
puts "Doc 1: " + doc1.text
|
397
|
+
puts "Doc 2: " + doc2.text
|
356
398
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
357
399
|
|
358
400
|
```
|
@@ -406,32 +448,36 @@ france = nlp.get_lexeme("France")
|
|
406
448
|
|
407
449
|
query = tokyo.vector - japan.vector + france.vector
|
408
450
|
|
451
|
+
headings = ["rank", "text", "score"]
|
409
452
|
rows = []
|
410
453
|
|
411
|
-
results = nlp.most_similar(query,
|
412
|
-
results.
|
413
|
-
|
454
|
+
results = nlp.most_similar(query, 20)
|
455
|
+
results.each_with_index do |lexeme, i|
|
456
|
+
index = (i + 1).to_s
|
457
|
+
rows << [index, lexeme.text, lexeme.score]
|
414
458
|
end
|
415
459
|
|
416
|
-
headings = ["key", "text", "score"]
|
417
460
|
table = Terminal::Table.new rows: rows, headings: headings
|
418
461
|
puts table
|
419
462
|
```
|
420
463
|
|
421
464
|
Output:
|
422
465
|
|
423
|
-
|
|
424
|
-
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
466
|
+
| rank | text | score |
|
467
|
+
|:-----|:------------|:-------------------|
|
468
|
+
| 1 | FRANCE | 0.8346999883651733 |
|
469
|
+
| 2 | France | 0.8346999883651733 |
|
470
|
+
| 3 | france | 0.8346999883651733 |
|
471
|
+
| 4 | PARIS | 0.7703999876976013 |
|
472
|
+
| 5 | paris | 0.7703999876976013 |
|
473
|
+
| 6 | Paris | 0.7703999876976013 |
|
474
|
+
| 7 | TOULOUSE | 0.6381999850273132 |
|
475
|
+
| 8 | Toulouse | 0.6381999850273132 |
|
476
|
+
| 9 | toulouse | 0.6381999850273132 |
|
477
|
+
| 10 | marseille | 0.6370999813079834 |
|
478
|
+
|
479
|
+
|
480
|
+
|
435
481
|
|
436
482
|
|
437
483
|
### Word vector calculation (Japanese)
|
@@ -452,33 +498,33 @@ france = nlp.get_lexeme("フランス")
|
|
452
498
|
|
453
499
|
query = tokyo.vector - japan.vector + france.vector
|
454
500
|
|
501
|
+
headings = ["rank", "text", "score"]
|
455
502
|
rows = []
|
456
503
|
|
457
|
-
results = nlp.most_similar(query,
|
458
|
-
results.
|
459
|
-
|
504
|
+
results = nlp.most_similar(query, 20)
|
505
|
+
results.each_with_index do |lexeme, i|
|
506
|
+
index = (i + 1).to_s
|
507
|
+
rows << [index, lexeme.text, lexeme.score]
|
460
508
|
end
|
461
509
|
|
462
|
-
headings = ["key", "text", "score"]
|
463
510
|
table = Terminal::Table.new rows: rows, headings: headings
|
464
511
|
puts table
|
465
512
|
```
|
466
513
|
|
467
514
|
Output:
|
468
515
|
|
469
|
-
|
|
470
|
-
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
516
|
+
| rank | text | score |
|
517
|
+
|:-----|:---------------|:-------------------|
|
518
|
+
| 1 | パリ | 0.7376999855041504 |
|
519
|
+
| 2 | フランス | 0.7221999764442444 |
|
520
|
+
| 3 | 東京 | 0.6697999835014343 |
|
521
|
+
| 4 | ストラスブール | 0.631600022315979 |
|
522
|
+
| 5 | リヨン | 0.5939000248908997 |
|
523
|
+
| 6 | Paris | 0.574400007724762 |
|
524
|
+
| 7 | ベルギー | 0.5683000087738037 |
|
525
|
+
| 8 | ニース | 0.5679000020027161 |
|
526
|
+
| 9 | アルザス | 0.5644999742507935 |
|
527
|
+
| 10 | 南仏 | 0.5547999739646912 |
|
482
528
|
|
483
529
|
## Author
|
484
530
|
|
@@ -8,8 +8,8 @@ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
|
|
8
8
|
rows = []
|
9
9
|
|
10
10
|
doc.each do |word|
|
11
|
-
lexeme =
|
12
|
-
rows << [lexeme.text, lexeme.
|
11
|
+
lexeme = nlp.vocab(word.text)
|
12
|
+
rows << [lexeme.text, lexeme.shape, lexeme.prefix, lexeme.suffix, lexeme.is_alpha, lexeme.is_digit]
|
13
13
|
end
|
14
14
|
|
15
15
|
table = Terminal::Table.new rows: rows, headings: headings
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
morph = token.morphology.map do |k, v|
|
12
|
+
"#{k} = #{v}"
|
13
|
+
end.join("\n")
|
14
|
+
# end.join("<br />")
|
15
|
+
rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
|
16
|
+
end
|
17
|
+
|
18
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
19
|
+
puts table
|
20
|
+
|
21
|
+
# +---------+-------+----------+---------+-----------------+
|
22
|
+
# | text | shape | is_alpha | is_stop | morphology |
|
23
|
+
# +---------+-------+----------+---------+-----------------+
|
24
|
+
# | Apple | Xxxxx | true | false | NounType = Prop |
|
25
|
+
# | | | | | Number = Sing |
|
26
|
+
# | is | xx | true | true | Mood = Ind |
|
27
|
+
# | | | | | Number = Sing |
|
28
|
+
# | | | | | Person = 3 |
|
29
|
+
# | | | | | Tense = Pres |
|
30
|
+
# | | | | | VerbForm = Fin |
|
31
|
+
# | looking | xxxx | true | false | Aspect = Prog |
|
32
|
+
# | | | | | Tense = Pres |
|
33
|
+
# | | | | | VerbForm = Part |
|
34
|
+
# | at | xx | true | true | |
|
35
|
+
# | buying | xxxx | true | false | Aspect = Prog |
|
36
|
+
# | | | | | Tense = Pres |
|
37
|
+
# | | | | | VerbForm = Part |
|
38
|
+
# | U.K. | X.X. | false | false | NounType = Prop |
|
39
|
+
# | | | | | Number = Sing |
|
40
|
+
# | startup | xxxx | true | false | Number = Sing |
|
41
|
+
# | for | xxx | true | true | |
|
42
|
+
# | $ | $ | false | false | |
|
43
|
+
# | 1 | d | false | false | NumType = Card |
|
44
|
+
# | billion | xxxx | true | false | NumType = Card |
|
45
|
+
# +---------+-------+----------+---------+-----------------+
|
@@ -9,38 +9,39 @@ france = nlp.get_lexeme("France")
|
|
9
9
|
|
10
10
|
query = tokyo.vector - japan.vector + france.vector
|
11
11
|
|
12
|
-
headings = ["
|
12
|
+
headings = ["rank", "text", "score"]
|
13
13
|
rows = []
|
14
14
|
|
15
15
|
results = nlp.most_similar(query, 20)
|
16
|
-
results.
|
17
|
-
|
16
|
+
results.each_with_index do |lexeme, i|
|
17
|
+
index = (i + 1).to_s
|
18
|
+
rows << [index, lexeme.text, lexeme.score]
|
18
19
|
end
|
19
20
|
|
20
21
|
table = Terminal::Table.new rows: rows, headings: headings
|
21
22
|
puts table
|
22
23
|
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
# |
|
28
|
-
# |
|
29
|
-
# |
|
30
|
-
# |
|
31
|
-
# |
|
32
|
-
# |
|
33
|
-
# |
|
34
|
-
# |
|
35
|
-
# |
|
36
|
-
# |
|
37
|
-
# |
|
38
|
-
# |
|
39
|
-
# |
|
40
|
-
# |
|
41
|
-
# |
|
42
|
-
# |
|
43
|
-
# |
|
44
|
-
# |
|
45
|
-
# |
|
46
|
-
#
|
24
|
+
# +------+-------------+--------------------+
|
25
|
+
# | rank | text | score |
|
26
|
+
# +------+-------------+--------------------+
|
27
|
+
# | 1 | FRANCE | 0.8346999883651733 |
|
28
|
+
# | 2 | France | 0.8346999883651733 |
|
29
|
+
# | 3 | france | 0.8346999883651733 |
|
30
|
+
# | 4 | PARIS | 0.7703999876976013 |
|
31
|
+
# | 5 | paris | 0.7703999876976013 |
|
32
|
+
# | 6 | Paris | 0.7703999876976013 |
|
33
|
+
# | 7 | TOULOUSE | 0.6381999850273132 |
|
34
|
+
# | 8 | Toulouse | 0.6381999850273132 |
|
35
|
+
# | 9 | toulouse | 0.6381999850273132 |
|
36
|
+
# | 10 | marseille | 0.6370999813079834 |
|
37
|
+
# | 11 | Marseille | 0.6370999813079834 |
|
38
|
+
# | 12 | MARSEILLE | 0.6370999813079834 |
|
39
|
+
# | 13 | Bordeaux | 0.6096000075340271 |
|
40
|
+
# | 14 | BORDEAUX | 0.6096000075340271 |
|
41
|
+
# | 15 | bordeaux | 0.6096000075340271 |
|
42
|
+
# | 16 | prague | 0.6075000166893005 |
|
43
|
+
# | 17 | PRAGUE | 0.6075000166893005 |
|
44
|
+
# | 18 | Prague | 0.6075000166893005 |
|
45
|
+
# | 19 | SWITZERLAND | 0.6068000197410583 |
|
46
|
+
# | 20 | switzerland | 0.6068000197410583 |
|
47
|
+
# +------+-------------+--------------------+
|