ruby-spacy 0.1.0 → 0.1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Gemfile.lock +3 -1
  4. data/README.md +123 -77
  5. data/examples/get_started/lexeme.rb +2 -2
  6. data/examples/get_started/linguistic_annotations.rb +1 -1
  7. data/examples/get_started/morphology.rb +45 -0
  8. data/examples/get_started/most_similar.rb +28 -27
  9. data/examples/get_started/named_entities.rb +1 -1
  10. data/examples/get_started/pos_tags_and_dependencies.rb +18 -18
  11. data/examples/get_started/similarity.rb +2 -2
  12. data/examples/japanese/ancestors.rb +9 -11
  13. data/examples/japanese/entity_annotations_and_labels.rb +1 -1
  14. data/examples/japanese/lemmatization.rb +1 -1
  15. data/examples/japanese/most_similar.rb +28 -27
  16. data/examples/japanese/named_entity_recognition.rb +1 -1
  17. data/examples/japanese/navigating_parse_tree.rb +18 -18
  18. data/examples/japanese/noun_chunks.rb +1 -1
  19. data/examples/japanese/pos_tagging.rb +20 -20
  20. data/examples/japanese/visualizing_dependencies.rb +2 -2
  21. data/examples/japanese/visualizing_named_entities.rb +1 -1
  22. data/examples/linguistic_features/ancestors.rb +13 -10
  23. data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
  24. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
  25. data/examples/linguistic_features/information_extraction.rb +2 -2
  26. data/examples/linguistic_features/iterating_children.rb +2 -2
  27. data/examples/linguistic_features/iterating_lefts_and_rights.rb +5 -5
  28. data/examples/linguistic_features/lemmatization.rb +1 -1
  29. data/examples/linguistic_features/named_entity_recognition.rb +1 -1
  30. data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
  31. data/examples/linguistic_features/noun_chunks.rb +1 -1
  32. data/examples/linguistic_features/pos_tagging.rb +1 -1
  33. data/examples/linguistic_features/retokenize_1.rb +1 -1
  34. data/examples/linguistic_features/retokenize_2.rb +2 -2
  35. data/examples/linguistic_features/rule_based_morphology.rb +1 -1
  36. data/examples/linguistic_features/similarity.rb +2 -2
  37. data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
  38. data/examples/linguistic_features/similarity_between_spans.rb +2 -2
  39. data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
  40. data/lib/ruby-spacy.rb +493 -300
  41. data/lib/ruby-spacy/version.rb +1 -1
  42. data/ruby-spacy.gemspec +1 -1
  43. metadata +6 -5
  44. data/examples/linguistic_features/morphology.rb +0 -17
  45. data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c149833c6cc16782d7964c27989535ee681f9816f58231d1eecc57f2c8f99c1
4
- data.tar.gz: 5ac0417c29eea0dfa7a48c394e832bcbd7567fd5e4783f8a6de4d15132c479a3
3
+ metadata.gz: 5fc769c4257e78333c3d6dc114d76b39c31b57365d032d7b741358f34b37099e
4
+ data.tar.gz: 281a9997a325d16819574c96a0696eeedb59af0709d8f25814e6fa0d39646757
5
5
  SHA512:
6
- metadata.gz: bbf0271475ebab0f6f64621be98bca42a45fbf0b76a6285d17e3593bf4c6e53bd91c55bd6664ea7dd6bc23448d64cb3035bee55eb9e525662580618a7d5bbab6
7
- data.tar.gz: 0dd4301b1d9272dcc22ad172b8fb9363c46b52b0c58a34d8bf25499a77b4e96e3617a49d9d4c03a34d2b185d2830c6644a9df8968f81fde69a9f94b45691faf3
6
+ metadata.gz: 8b387962ee82b60499208225ab7cfca631a55a6eb305212f3b14a2c802f67cfa685b23e762a3877e4ba5ae01308c4909eb26286bcde2fc9683dedeee9059db88
7
+ data.tar.gz: d94e788a1458f6be22db486e43180f7cbcce516ad053baa8724ce6eacd7869c3123c9f292845fe7e400aa2115786617f5ad69bda5c71b403221c98c084dc9900
data/CHANGELOG.md ADDED
@@ -0,0 +1,23 @@
1
+ # Change Log
2
+
3
+ ## 0.1.4.1 - 2021-07-06
4
+ - Test code refined
5
+ - `Spacy::Language::most_similar` returns an array of hash-based objects that accepts method calls
6
+
7
+ ## 0.1.4 - 2021-06-26
8
+ ### Added
9
+ - `Spacy::Lexeme` class
10
+
11
+ - `Spacy::Token#morpheme` method
12
+ ## 0.1.3 - 2021-06-26
13
+ - Code cleanup
14
+
15
+ ## 0.1.2 - 2021-06-26
16
+ ### Added
17
+ - `Spacy::Token#morpheme` method
18
+
19
+ ## 0.1.1 - 2021-06-26
20
+ - Project description fixed
21
+
22
+ ## 0.1.0 - 2021-06-26
23
+ - Initial release
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-spacy (0.1.0)
4
+ ruby-spacy (0.1.4.1)
5
5
  numpy (~> 0.4.0)
6
6
  pycall (~> 1.4.0)
7
7
  terminal-table (~> 3.0.1)
@@ -23,6 +23,8 @@ GEM
23
23
 
24
24
  PLATFORMS
25
25
  arm64-darwin-20
26
+ x86_64-darwin-20
27
+ x86_64-linux
26
28
 
27
29
  DEPENDENCIES
28
30
  github-markup
data/README.md CHANGED
@@ -111,12 +111,10 @@ Output:
111
111
  |:-----:|:--:|:-------:|:--:|:------:|:----:|:-------:|:---:|:-:|:--:|:-------:|
112
112
  | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
113
113
 
114
- ### Part-of-speech tagging
114
+ ### Part-of-speech and dependency
115
115
 
116
116
  → [spaCy: Part-of-speech tags and dependencies](https://spacy.io/usage/spacy-101#annotations-pos-deps)
117
117
 
118
- → [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
119
-
120
118
  Ruby code:
121
119
 
122
120
  ```ruby
@@ -126,73 +124,117 @@ require "terminal-table"
126
124
  nlp = Spacy::Language.new("en_core_web_sm")
127
125
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
128
126
 
127
+ headings = ["text", "lemma", "pos", "tag", "dep"]
129
128
  rows = []
130
129
 
131
130
  doc.each do |token|
132
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
131
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
133
132
  end
134
133
 
135
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
136
134
  table = Terminal::Table.new rows: rows, headings: headings
137
135
  puts table
138
136
  ```
139
137
 
140
138
  Output:
141
139
 
142
- | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
143
- |:--------|:--------|:------|:----|:---------|:------|:---------|:--------|
144
- | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
145
- | is | be | AUX | VBZ | aux | xx | true | true |
146
- | looking | look | VERB | VBG | ROOT | xxxx | true | false |
147
- | at | at | ADP | IN | prep | xx | true | true |
148
- | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
149
- | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
150
- | startup | startup | NOUN | NN | advcl | xxxx | true | false |
151
- | for | for | ADP | IN | prep | xxx | true | true |
152
- | $ | $ | SYM | $ | quantmod | $ | false | false |
153
- | 1 | 1 | NUM | CD | compound | d | false | false |
154
- | billion | billion | NUM | CD | pobj | xxxx | true | false |
155
-
156
- ### Part-of-speech tagging (Japanese)
140
+ | text | lemma | pos | tag | dep |
141
+ |:--------|:--------|:------|:----|:---------|
142
+ | Apple | Apple | PROPN | NNP | nsubj |
143
+ | is | be | AUX | VBZ | aux |
144
+ | looking | look | VERB | VBG | ROOT |
145
+ | at | at | ADP | IN | prep |
146
+ | buying | buy | VERB | VBG | pcomp |
147
+ | U.K. | U.K. | PROPN | NNP | dobj |
148
+ | startup | startup | NOUN | NN | advcl |
149
+ | for | for | ADP | IN | prep |
150
+ | $ | $ | SYM | $ | quantmod |
151
+ | 1 | 1 | NUM | CD | compound |
152
+ | billion | billion | NUM | CD | pobj |
153
+
154
+ ### Part-of-speech and dependency (Japanese)
157
155
 
158
156
  Ruby code:
159
157
 
160
158
  ```ruby
161
- require( "ruby-spacy")
159
+ require "ruby-spacy"
162
160
  require "terminal-table"
163
161
 
164
162
  nlp = Spacy::Language.new("ja_core_news_lg")
165
- doc = nlp.read("任天堂は1983年にファミリー・コンピュータを14,800円で発売した。")
163
+ doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
166
164
 
165
+ headings = ["text", "lemma", "pos", "tag", "dep"]
167
166
  rows = []
168
167
 
169
168
  doc.each do |token|
170
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
169
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
171
170
  end
172
171
 
173
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
174
172
  table = Terminal::Table.new rows: rows, headings: headings
175
173
  puts table
176
174
  ```
177
175
 
178
176
  Output:
179
177
 
180
- | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
181
- |:-----------|:-----------|:------|:-------------------------|:-------|:-------|:---------|:--------|
182
- | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj | xxx | true | false |
183
- | は | は | ADP | 助詞-係助詞 | case | x | true | true |
184
- | 1983 | 1983 | NUM | 名詞-数詞 | nummod | dddd | false | false |
185
- | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
186
- | に | に | ADP | 助詞-格助詞 | case | x | true | true |
187
- | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj | xxxx | true | false |
188
- | を | を | ADP | 助詞-格助詞 | case | x | true | true |
189
- | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed | dd,ddd | false | false |
190
- | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
191
- | で | で | ADP | 助詞-格助詞 | case | x | true | true |
192
- | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT | xx | true | false |
193
- | し | する | AUX | 動詞-非自立可能 | aux | x | true | true |
194
- | た | た | AUX | 助動詞 | aux | x | true | true |
195
- | 。 | 。 | PUNCT | 補助記号-句点 | punct | 。 | false | false |
178
+ | text | lemma | pos | tag | dep |
179
+ |:-----------|:-----------|:------|:-------------------------|:-------|
180
+ | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
181
+ | は | は | ADP | 助詞-係助詞 | case |
182
+ | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
183
+ | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
184
+ | に | に | ADP | 助詞-格助詞 | case |
185
+ | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
186
+ | を | を | ADP | 助詞-格助詞 | case |
187
+ | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
188
+ | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
189
+ | で | で | ADP | 助詞-格助詞 | case |
190
+ | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
191
+ | し | する | AUX | 動詞-非自立可能 | aux |
192
+ | た | た | AUX | 助動詞 | aux |
193
+ | 。 | 。 | PUNCT | 補助記号-句点 | punct |
194
+
195
+ ### Morphology
196
+
197
+ → [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
198
+
199
+ Ruby code:
200
+
201
+ ```ruby
202
+ require "ruby-spacy"
203
+ require "terminal-table"
204
+
205
+ nlp = Spacy::Language.new("en_core_web_sm")
206
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
207
+
208
+ headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
209
+ rows = []
210
+
211
+ doc.each do |token|
212
+ morph = token.morphology.map do |k, v|
213
+ "#{k} = #{v}"
214
+ end.join("\n")
215
+ rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
216
+ end
217
+
218
+ table = Terminal::Table.new rows: rows, headings: headings
219
+ puts table
220
+
221
+ ```
222
+
223
+ Output:
224
+
225
+ | text | shape | is_alpha | is_stop | morphology |
226
+ |:--------|:------|:---------|:--------|:------------------------------------------------------------------------------------|
227
+ | Apple | Xxxxx | true | false | NounType = Prop<br />Number = Sing |
228
+ | is | xx | true | true | Mood = Ind<br />Number = Sing<br />Person = 3<br />Tense = Pres<br />VerbForm = Fin |
229
+ | looking | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
230
+ | at | xx | true | true | |
231
+ | buying | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
232
+ | U.K. | X.X. | false | false | NounType = Prop<br />Number = Sing |
233
+ | startup | xxxx | true | false | Number = Sing |
234
+ | for | xxx | true | true | |
235
+ | $ | $ | false | false | |
236
+ | 1 | d | false | false | NumType = Card |
237
+ | billion | xxxx | true | false | NumType = Card |
196
238
 
197
239
  ### Visualizing dependency
198
240
 
@@ -258,7 +300,7 @@ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
258
300
  rows = []
259
301
 
260
302
  doc.ents.each do |ent|
261
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
303
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
262
304
  end
263
305
 
264
306
  headings = ["text", "start_char", "end_char", "label"]
@@ -290,7 +332,7 @@ doc = nlp.read(sentence)
290
332
  rows = []
291
333
 
292
334
  doc.ents.each do |ent|
293
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
335
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
294
336
  end
295
337
 
296
338
  headings = ["text", "start", "end", "label"]
@@ -351,8 +393,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
351
393
  doc1 = nlp.read("I like salty fries and hamburgers.")
352
394
  doc2 = nlp.read("Fast food tastes very good.")
353
395
 
354
- puts "Doc 1: " + doc1
355
- puts "Doc 2: " + doc2
396
+ puts "Doc 1: " + doc1.text
397
+ puts "Doc 2: " + doc2.text
356
398
  puts "Similarity: #{doc1.similarity(doc2)}"
357
399
 
358
400
  ```
@@ -406,32 +448,36 @@ france = nlp.get_lexeme("France")
406
448
 
407
449
  query = tokyo.vector - japan.vector + france.vector
408
450
 
451
+ headings = ["rank", "text", "score"]
409
452
  rows = []
410
453
 
411
- results = nlp.most_similar(query, 10)
412
- results.each do |lexeme|
413
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
454
+ results = nlp.most_similar(query, 20)
455
+ results.each_with_index do |lexeme, i|
456
+ index = (i + 1).to_s
457
+ rows << [index, lexeme.text, lexeme.score]
414
458
  end
415
459
 
416
- headings = ["key", "text", "score"]
417
460
  table = Terminal::Table.new rows: rows, headings: headings
418
461
  puts table
419
462
  ```
420
463
 
421
464
  Output:
422
465
 
423
- | key | text | score |
424
- |:---------------------|:------------|:-------------------|
425
- | 1432967385481565694 | FRANCE | 0.8346999883651733 |
426
- | 6613816697677965370 | France | 0.8346999883651733 |
427
- | 4362406852232399325 | france | 0.8346999883651733 |
428
- | 1637573253267610771 | PARIS | 0.7703999876976013 |
429
- | 15322182186497800017 | paris | 0.7703999876976013 |
430
- | 10427160276079242800 | Paris | 0.7703999876976013 |
431
- | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
432
- | 7944504257273452052 | Toulouse | 0.6381999850273132 |
433
- | 9614730213792621885 | toulouse | 0.6381999850273132 |
434
- | 8515538464606421210 | marseille | 0.6370999813079834 |
466
+ | rank | text | score |
467
+ |:-----|:------------|:-------------------|
468
+ | 1 | FRANCE | 0.8346999883651733 |
469
+ | 2 | France | 0.8346999883651733 |
470
+ | 3 | france | 0.8346999883651733 |
471
+ | 4 | PARIS | 0.7703999876976013 |
472
+ | 5 | paris | 0.7703999876976013 |
473
+ | 6 | Paris | 0.7703999876976013 |
474
+ | 7 | TOULOUSE | 0.6381999850273132 |
475
+ | 8 | Toulouse | 0.6381999850273132 |
476
+ | 9 | toulouse | 0.6381999850273132 |
477
+ | 10 | marseille | 0.6370999813079834 |
478
+
479
+
480
+
435
481
 
436
482
 
437
483
  ### Word vector calculation (Japanese)
@@ -452,33 +498,33 @@ france = nlp.get_lexeme("フランス")
452
498
 
453
499
  query = tokyo.vector - japan.vector + france.vector
454
500
 
501
+ headings = ["rank", "text", "score"]
455
502
  rows = []
456
503
 
457
- results = nlp.most_similar(query, 10)
458
- results.each do |lexeme|
459
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
504
+ results = nlp.most_similar(query, 20)
505
+ results.each_with_index do |lexeme, i|
506
+ index = (i + 1).to_s
507
+ rows << [index, lexeme.text, lexeme.score]
460
508
  end
461
509
 
462
- headings = ["key", "text", "score"]
463
510
  table = Terminal::Table.new rows: rows, headings: headings
464
511
  puts table
465
512
  ```
466
513
 
467
514
  Output:
468
515
 
469
- | key | text | score |
470
- |:---------------------|:---------------|:-------------------|
471
- | 12090003238699662352 | パリ | 0.7376999855041504 |
472
- | 18290786970454458111 | フランス | 0.7221999764442444 |
473
- | 9360021637096476946 | 東京 | 0.6697999835014343 |
474
- | 2437546359230213520 | ストラスブール | 0.631600022315979 |
475
- | 13988178952745813186 | リヨン | 0.5939000248908997 |
476
- | 10427160276079242800 | Paris | 0.574400007724762 |
477
- | 5562396768860926997 | ベルギー | 0.5683000087738037 |
478
- | 15029176915627965481 | ニース | 0.5679000020027161 |
479
- | 9750625950625019690 | アルザス | 0.5644999742507935 |
480
- | 2381640614569534741 | 南仏 | 0.5547999739646912 |
481
-
516
+ | rank | text | score |
517
+ |:-----|:---------------|:-------------------|
518
+ | 1 | パリ | 0.7376999855041504 |
519
+ | 2 | フランス | 0.7221999764442444 |
520
+ | 3 | 東京 | 0.6697999835014343 |
521
+ | 4 | ストラスブール | 0.631600022315979 |
522
+ | 5 | リヨン | 0.5939000248908997 |
523
+ | 6 | Paris | 0.574400007724762 |
524
+ | 7 | ベルギー | 0.5683000087738037 |
525
+ | 8 | ニース | 0.5679000020027161 |
526
+ | 9 | アルザス | 0.5644999742507935 |
527
+ | 10 | 南仏 | 0.5547999739646912 |
482
528
 
483
529
  ## Author
484
530
 
@@ -8,8 +8,8 @@ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |word|
11
- lexeme = doc.vocab[word.text]
12
- rows << [lexeme.text, lexeme.shape_, lexeme.prefix_, lexeme.suffix_, lexeme.is_alpha, lexeme.is_digit]
11
+ lexeme = nlp.vocab(word.text)
12
+ rows << [lexeme.text, lexeme.shape, lexeme.prefix, lexeme.suffix, lexeme.is_alpha, lexeme.is_digit]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
@@ -8,7 +8,7 @@ headings = ["text", "pos", "dep"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |token|
11
- rows << [token.text, token.pos_, token.dep_]
11
+ rows << [token.text, token.pos, token.dep]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
@@ -0,0 +1,45 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
8
+ rows = []
9
+
10
+ doc.each do |token|
11
+ morph = token.morphology.map do |k, v|
12
+ "#{k} = #{v}"
13
+ end.join("\n")
14
+ # end.join("<br />")
15
+ rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
16
+ end
17
+
18
+ table = Terminal::Table.new rows: rows, headings: headings
19
+ puts table
20
+
21
+ # +---------+-------+----------+---------+-----------------+
22
+ # | text | shape | is_alpha | is_stop | morphology |
23
+ # +---------+-------+----------+---------+-----------------+
24
+ # | Apple | Xxxxx | true | false | NounType = Prop |
25
+ # | | | | | Number = Sing |
26
+ # | is | xx | true | true | Mood = Ind |
27
+ # | | | | | Number = Sing |
28
+ # | | | | | Person = 3 |
29
+ # | | | | | Tense = Pres |
30
+ # | | | | | VerbForm = Fin |
31
+ # | looking | xxxx | true | false | Aspect = Prog |
32
+ # | | | | | Tense = Pres |
33
+ # | | | | | VerbForm = Part |
34
+ # | at | xx | true | true | |
35
+ # | buying | xxxx | true | false | Aspect = Prog |
36
+ # | | | | | Tense = Pres |
37
+ # | | | | | VerbForm = Part |
38
+ # | U.K. | X.X. | false | false | NounType = Prop |
39
+ # | | | | | Number = Sing |
40
+ # | startup | xxxx | true | false | Number = Sing |
41
+ # | for | xxx | true | true | |
42
+ # | $ | $ | false | false | |
43
+ # | 1 | d | false | false | NumType = Card |
44
+ # | billion | xxxx | true | false | NumType = Card |
45
+ # +---------+-------+----------+---------+-----------------+
@@ -9,38 +9,39 @@ france = nlp.get_lexeme("France")
9
9
 
10
10
  query = tokyo.vector - japan.vector + france.vector
11
11
 
12
- headings = ["key", "text", "score"]
12
+ headings = ["rank", "text", "score"]
13
13
  rows = []
14
14
 
15
15
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
16
+ results.each_with_index do |lexeme, i|
17
+ index = (i + 1).to_s
18
+ rows << [index, lexeme.text, lexeme.score]
18
19
  end
19
20
 
20
21
  table = Terminal::Table.new rows: rows, headings: headings
21
22
  puts table
22
23
 
23
- # +----------------------+-------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+-------------+--------------------+
26
- # | 1432967385481565694 | FRANCE | 0.8346999883651733 |
27
- # | 6613816697677965370 | France | 0.8346999883651733 |
28
- # | 4362406852232399325 | france | 0.8346999883651733 |
29
- # | 1637573253267610771 | PARIS | 0.7703999876976013 |
30
- # | 15322182186497800017 | paris | 0.7703999876976013 |
31
- # | 10427160276079242800 | Paris | 0.7703999876976013 |
32
- # | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
33
- # | 7944504257273452052 | Toulouse | 0.6381999850273132 |
34
- # | 9614730213792621885 | toulouse | 0.6381999850273132 |
35
- # | 8515538464606421210 | marseille | 0.6370999813079834 |
36
- # | 8215995793762630878 | Marseille | 0.6370999813079834 |
37
- # | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
38
- # | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
39
- # | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
40
- # | 13936807859007616770 | bordeaux | 0.6096000075340271 |
41
- # | 8731576325682930212 | prague | 0.6075000166893005 |
42
- # | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
43
- # | 1133963107690000953 | Prague | 0.6075000166893005 |
44
- # | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
45
- # | 6936121537367717968 | switzerland | 0.6068000197410583 |
46
- # +----------------------+-------------+--------------------+
24
+ # +------+-------------+--------------------+
25
+ # | rank | text | score |
26
+ # +------+-------------+--------------------+
27
+ # | 1 | FRANCE | 0.8346999883651733 |
28
+ # | 2 | France | 0.8346999883651733 |
29
+ # | 3 | france | 0.8346999883651733 |
30
+ # | 4 | PARIS | 0.7703999876976013 |
31
+ # | 5 | paris | 0.7703999876976013 |
32
+ # | 6 | Paris | 0.7703999876976013 |
33
+ # | 7 | TOULOUSE | 0.6381999850273132 |
34
+ # | 8 | Toulouse | 0.6381999850273132 |
35
+ # | 9 | toulouse | 0.6381999850273132 |
36
+ # | 10 | marseille | 0.6370999813079834 |
37
+ # | 11 | Marseille | 0.6370999813079834 |
38
+ # | 12 | MARSEILLE | 0.6370999813079834 |
39
+ # | 13 | Bordeaux | 0.6096000075340271 |
40
+ # | 14 | BORDEAUX | 0.6096000075340271 |
41
+ # | 15 | bordeaux | 0.6096000075340271 |
42
+ # | 16 | prague | 0.6075000166893005 |
43
+ # | 17 | PRAGUE | 0.6075000166893005 |
44
+ # | 18 | Prague | 0.6075000166893005 |
45
+ # | 19 | SWITZERLAND | 0.6068000197410583 |
46
+ # | 20 | switzerland | 0.6068000197410583 |
47
+ # +------+-------------+--------------------+