ruby-spacy 0.1.0 → 0.1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Gemfile.lock +3 -1
  4. data/README.md +123 -77
  5. data/examples/get_started/lexeme.rb +2 -2
  6. data/examples/get_started/linguistic_annotations.rb +1 -1
  7. data/examples/get_started/morphology.rb +45 -0
  8. data/examples/get_started/most_similar.rb +28 -27
  9. data/examples/get_started/named_entities.rb +1 -1
  10. data/examples/get_started/pos_tags_and_dependencies.rb +18 -18
  11. data/examples/get_started/similarity.rb +2 -2
  12. data/examples/japanese/ancestors.rb +9 -11
  13. data/examples/japanese/entity_annotations_and_labels.rb +1 -1
  14. data/examples/japanese/lemmatization.rb +1 -1
  15. data/examples/japanese/most_similar.rb +28 -27
  16. data/examples/japanese/named_entity_recognition.rb +1 -1
  17. data/examples/japanese/navigating_parse_tree.rb +18 -18
  18. data/examples/japanese/noun_chunks.rb +1 -1
  19. data/examples/japanese/pos_tagging.rb +20 -20
  20. data/examples/japanese/visualizing_dependencies.rb +2 -2
  21. data/examples/japanese/visualizing_named_entities.rb +1 -1
  22. data/examples/linguistic_features/ancestors.rb +13 -10
  23. data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
  24. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
  25. data/examples/linguistic_features/information_extraction.rb +2 -2
  26. data/examples/linguistic_features/iterating_children.rb +2 -2
  27. data/examples/linguistic_features/iterating_lefts_and_rights.rb +5 -5
  28. data/examples/linguistic_features/lemmatization.rb +1 -1
  29. data/examples/linguistic_features/named_entity_recognition.rb +1 -1
  30. data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
  31. data/examples/linguistic_features/noun_chunks.rb +1 -1
  32. data/examples/linguistic_features/pos_tagging.rb +1 -1
  33. data/examples/linguistic_features/retokenize_1.rb +1 -1
  34. data/examples/linguistic_features/retokenize_2.rb +2 -2
  35. data/examples/linguistic_features/rule_based_morphology.rb +1 -1
  36. data/examples/linguistic_features/similarity.rb +2 -2
  37. data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
  38. data/examples/linguistic_features/similarity_between_spans.rb +2 -2
  39. data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
  40. data/lib/ruby-spacy.rb +493 -300
  41. data/lib/ruby-spacy/version.rb +1 -1
  42. data/ruby-spacy.gemspec +1 -1
  43. metadata +6 -5
  44. data/examples/linguistic_features/morphology.rb +0 -17
  45. data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c149833c6cc16782d7964c27989535ee681f9816f58231d1eecc57f2c8f99c1
4
- data.tar.gz: 5ac0417c29eea0dfa7a48c394e832bcbd7567fd5e4783f8a6de4d15132c479a3
3
+ metadata.gz: 5fc769c4257e78333c3d6dc114d76b39c31b57365d032d7b741358f34b37099e
4
+ data.tar.gz: 281a9997a325d16819574c96a0696eeedb59af0709d8f25814e6fa0d39646757
5
5
  SHA512:
6
- metadata.gz: bbf0271475ebab0f6f64621be98bca42a45fbf0b76a6285d17e3593bf4c6e53bd91c55bd6664ea7dd6bc23448d64cb3035bee55eb9e525662580618a7d5bbab6
7
- data.tar.gz: 0dd4301b1d9272dcc22ad172b8fb9363c46b52b0c58a34d8bf25499a77b4e96e3617a49d9d4c03a34d2b185d2830c6644a9df8968f81fde69a9f94b45691faf3
6
+ metadata.gz: 8b387962ee82b60499208225ab7cfca631a55a6eb305212f3b14a2c802f67cfa685b23e762a3877e4ba5ae01308c4909eb26286bcde2fc9683dedeee9059db88
7
+ data.tar.gz: d94e788a1458f6be22db486e43180f7cbcce516ad053baa8724ce6eacd7869c3123c9f292845fe7e400aa2115786617f5ad69bda5c71b403221c98c084dc9900
data/CHANGELOG.md ADDED
@@ -0,0 +1,23 @@
1
+ # Change Log
2
+
3
+ ## 0.1.4.1 - 2021-07-06
4
+ - Test code refined
5
+ - `Spacy::Language::most_similar` returns an array of hash-based objects that accepts method calls
6
+
7
+ ## 0.1.4 - 2021-06-26
8
+ ### Added
9
+ - `Spacy::Lexeme` class
10
+
11
+ - `Spacy::Token#morpheme` method
12
+ ## 0.1.3 - 2021-06-26
13
+ - Code cleanup
14
+
15
+ ## 0.1.2 - 2021-06-26
16
+ ### Added
17
+ - `Spacy::Token#morpheme` method
18
+
19
+ ## 0.1.1 - 2021-06-26
20
+ - Project description fixed
21
+
22
+ ## 0.1.0 - 2021-06-26
23
+ - Initial release
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-spacy (0.1.0)
4
+ ruby-spacy (0.1.4.1)
5
5
  numpy (~> 0.4.0)
6
6
  pycall (~> 1.4.0)
7
7
  terminal-table (~> 3.0.1)
@@ -23,6 +23,8 @@ GEM
23
23
 
24
24
  PLATFORMS
25
25
  arm64-darwin-20
26
+ x86_64-darwin-20
27
+ x86_64-linux
26
28
 
27
29
  DEPENDENCIES
28
30
  github-markup
data/README.md CHANGED
@@ -111,12 +111,10 @@ Output:
111
111
  |:-----:|:--:|:-------:|:--:|:------:|:----:|:-------:|:---:|:-:|:--:|:-------:|
112
112
  | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
113
113
 
114
- ### Part-of-speech tagging
114
+ ### Part-of-speech and dependency
115
115
 
116
116
  → [spaCy: Part-of-speech tags and dependencies](https://spacy.io/usage/spacy-101#annotations-pos-deps)
117
117
 
118
- → [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
119
-
120
118
  Ruby code:
121
119
 
122
120
  ```ruby
@@ -126,73 +124,117 @@ require "terminal-table"
126
124
  nlp = Spacy::Language.new("en_core_web_sm")
127
125
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
128
126
 
127
+ headings = ["text", "lemma", "pos", "tag", "dep"]
129
128
  rows = []
130
129
 
131
130
  doc.each do |token|
132
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
131
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
133
132
  end
134
133
 
135
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
136
134
  table = Terminal::Table.new rows: rows, headings: headings
137
135
  puts table
138
136
  ```
139
137
 
140
138
  Output:
141
139
 
142
- | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
143
- |:--------|:--------|:------|:----|:---------|:------|:---------|:--------|
144
- | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
145
- | is | be | AUX | VBZ | aux | xx | true | true |
146
- | looking | look | VERB | VBG | ROOT | xxxx | true | false |
147
- | at | at | ADP | IN | prep | xx | true | true |
148
- | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
149
- | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
150
- | startup | startup | NOUN | NN | advcl | xxxx | true | false |
151
- | for | for | ADP | IN | prep | xxx | true | true |
152
- | $ | $ | SYM | $ | quantmod | $ | false | false |
153
- | 1 | 1 | NUM | CD | compound | d | false | false |
154
- | billion | billion | NUM | CD | pobj | xxxx | true | false |
155
-
156
- ### Part-of-speech tagging (Japanese)
140
+ | text | lemma | pos | tag | dep |
141
+ |:--------|:--------|:------|:----|:---------|
142
+ | Apple | Apple | PROPN | NNP | nsubj |
143
+ | is | be | AUX | VBZ | aux |
144
+ | looking | look | VERB | VBG | ROOT |
145
+ | at | at | ADP | IN | prep |
146
+ | buying | buy | VERB | VBG | pcomp |
147
+ | U.K. | U.K. | PROPN | NNP | dobj |
148
+ | startup | startup | NOUN | NN | advcl |
149
+ | for | for | ADP | IN | prep |
150
+ | $ | $ | SYM | $ | quantmod |
151
+ | 1 | 1 | NUM | CD | compound |
152
+ | billion | billion | NUM | CD | pobj |
153
+
154
+ ### Part-of-speech and dependency (Japanese)
157
155
 
158
156
  Ruby code:
159
157
 
160
158
  ```ruby
161
- require( "ruby-spacy")
159
+ require "ruby-spacy"
162
160
  require "terminal-table"
163
161
 
164
162
  nlp = Spacy::Language.new("ja_core_news_lg")
165
- doc = nlp.read("任天堂は1983年にファミリー・コンピュータを14,800円で発売した。")
163
+ doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
166
164
 
165
+ headings = ["text", "lemma", "pos", "tag", "dep"]
167
166
  rows = []
168
167
 
169
168
  doc.each do |token|
170
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
169
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
171
170
  end
172
171
 
173
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
174
172
  table = Terminal::Table.new rows: rows, headings: headings
175
173
  puts table
176
174
  ```
177
175
 
178
176
  Output:
179
177
 
180
- | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
181
- |:-----------|:-----------|:------|:-------------------------|:-------|:-------|:---------|:--------|
182
- | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj | xxx | true | false |
183
- | は | は | ADP | 助詞-係助詞 | case | x | true | true |
184
- | 1983 | 1983 | NUM | 名詞-数詞 | nummod | dddd | false | false |
185
- | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
186
- | に | に | ADP | 助詞-格助詞 | case | x | true | true |
187
- | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj | xxxx | true | false |
188
- | を | を | ADP | 助詞-格助詞 | case | x | true | true |
189
- | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed | dd,ddd | false | false |
190
- | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
191
- | で | で | ADP | 助詞-格助詞 | case | x | true | true |
192
- | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT | xx | true | false |
193
- | し | する | AUX | 動詞-非自立可能 | aux | x | true | true |
194
- | た | た | AUX | 助動詞 | aux | x | true | true |
195
- | 。 | 。 | PUNCT | 補助記号-句点 | punct | 。 | false | false |
178
+ | text | lemma | pos | tag | dep |
179
+ |:-----------|:-----------|:------|:-------------------------|:-------|
180
+ | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
181
+ | は | は | ADP | 助詞-係助詞 | case |
182
+ | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
183
+ | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
184
+ | に | に | ADP | 助詞-格助詞 | case |
185
+ | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
186
+ | を | を | ADP | 助詞-格助詞 | case |
187
+ | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
188
+ | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
189
+ | で | で | ADP | 助詞-格助詞 | case |
190
+ | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
191
+ | し | する | AUX | 動詞-非自立可能 | aux |
192
+ | た | た | AUX | 助動詞 | aux |
193
+ | 。 | 。 | PUNCT | 補助記号-句点 | punct |
194
+
195
+ ### Morphology
196
+
197
+ → [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
198
+
199
+ Ruby code:
200
+
201
+ ```ruby
202
+ require "ruby-spacy"
203
+ require "terminal-table"
204
+
205
+ nlp = Spacy::Language.new("en_core_web_sm")
206
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
207
+
208
+ headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
209
+ rows = []
210
+
211
+ doc.each do |token|
212
+ morph = token.morphology.map do |k, v|
213
+ "#{k} = #{v}"
214
+ end.join("\n")
215
+ rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
216
+ end
217
+
218
+ table = Terminal::Table.new rows: rows, headings: headings
219
+ puts table
220
+
221
+ ```
222
+
223
+ Output:
224
+
225
+ | text | shape | is_alpha | is_stop | morphology |
226
+ |:--------|:------|:---------|:--------|:------------------------------------------------------------------------------------|
227
+ | Apple | Xxxxx | true | false | NounType = Prop<br />Number = Sing |
228
+ | is | xx | true | true | Mood = Ind<br />Number = Sing<br />Person = 3<br />Tense = Pres<br />VerbForm = Fin |
229
+ | looking | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
230
+ | at | xx | true | true | |
231
+ | buying | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
232
+ | U.K. | X.X. | false | false | NounType = Prop<br />Number = Sing |
233
+ | startup | xxxx | true | false | Number = Sing |
234
+ | for | xxx | true | true | |
235
+ | $ | $ | false | false | |
236
+ | 1 | d | false | false | NumType = Card |
237
+ | billion | xxxx | true | false | NumType = Card |
196
238
 
197
239
  ### Visualizing dependency
198
240
 
@@ -258,7 +300,7 @@ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
258
300
  rows = []
259
301
 
260
302
  doc.ents.each do |ent|
261
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
303
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
262
304
  end
263
305
 
264
306
  headings = ["text", "start_char", "end_char", "label"]
@@ -290,7 +332,7 @@ doc = nlp.read(sentence)
290
332
  rows = []
291
333
 
292
334
  doc.ents.each do |ent|
293
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
335
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
294
336
  end
295
337
 
296
338
  headings = ["text", "start", "end", "label"]
@@ -351,8 +393,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
351
393
  doc1 = nlp.read("I like salty fries and hamburgers.")
352
394
  doc2 = nlp.read("Fast food tastes very good.")
353
395
 
354
- puts "Doc 1: " + doc1
355
- puts "Doc 2: " + doc2
396
+ puts "Doc 1: " + doc1.text
397
+ puts "Doc 2: " + doc2.text
356
398
  puts "Similarity: #{doc1.similarity(doc2)}"
357
399
 
358
400
  ```
@@ -406,32 +448,36 @@ france = nlp.get_lexeme("France")
406
448
 
407
449
  query = tokyo.vector - japan.vector + france.vector
408
450
 
451
+ headings = ["rank", "text", "score"]
409
452
  rows = []
410
453
 
411
- results = nlp.most_similar(query, 10)
412
- results.each do |lexeme|
413
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
454
+ results = nlp.most_similar(query, 20)
455
+ results.each_with_index do |lexeme, i|
456
+ index = (i + 1).to_s
457
+ rows << [index, lexeme.text, lexeme.score]
414
458
  end
415
459
 
416
- headings = ["key", "text", "score"]
417
460
  table = Terminal::Table.new rows: rows, headings: headings
418
461
  puts table
419
462
  ```
420
463
 
421
464
  Output:
422
465
 
423
- | key | text | score |
424
- |:---------------------|:------------|:-------------------|
425
- | 1432967385481565694 | FRANCE | 0.8346999883651733 |
426
- | 6613816697677965370 | France | 0.8346999883651733 |
427
- | 4362406852232399325 | france | 0.8346999883651733 |
428
- | 1637573253267610771 | PARIS | 0.7703999876976013 |
429
- | 15322182186497800017 | paris | 0.7703999876976013 |
430
- | 10427160276079242800 | Paris | 0.7703999876976013 |
431
- | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
432
- | 7944504257273452052 | Toulouse | 0.6381999850273132 |
433
- | 9614730213792621885 | toulouse | 0.6381999850273132 |
434
- | 8515538464606421210 | marseille | 0.6370999813079834 |
466
+ | rank | text | score |
467
+ |:-----|:------------|:-------------------|
468
+ | 1 | FRANCE | 0.8346999883651733 |
469
+ | 2 | France | 0.8346999883651733 |
470
+ | 3 | france | 0.8346999883651733 |
471
+ | 4 | PARIS | 0.7703999876976013 |
472
+ | 5 | paris | 0.7703999876976013 |
473
+ | 6 | Paris | 0.7703999876976013 |
474
+ | 7 | TOULOUSE | 0.6381999850273132 |
475
+ | 8 | Toulouse | 0.6381999850273132 |
476
+ | 9 | toulouse | 0.6381999850273132 |
477
+ | 10 | marseille | 0.6370999813079834 |
478
+
479
+
480
+
435
481
 
436
482
 
437
483
  ### Word vector calculation (Japanese)
@@ -452,33 +498,33 @@ france = nlp.get_lexeme("フランス")
452
498
 
453
499
  query = tokyo.vector - japan.vector + france.vector
454
500
 
501
+ headings = ["rank", "text", "score"]
455
502
  rows = []
456
503
 
457
- results = nlp.most_similar(query, 10)
458
- results.each do |lexeme|
459
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
504
+ results = nlp.most_similar(query, 20)
505
+ results.each_with_index do |lexeme, i|
506
+ index = (i + 1).to_s
507
+ rows << [index, lexeme.text, lexeme.score]
460
508
  end
461
509
 
462
- headings = ["key", "text", "score"]
463
510
  table = Terminal::Table.new rows: rows, headings: headings
464
511
  puts table
465
512
  ```
466
513
 
467
514
  Output:
468
515
 
469
- | key | text | score |
470
- |:---------------------|:---------------|:-------------------|
471
- | 12090003238699662352 | パリ | 0.7376999855041504 |
472
- | 18290786970454458111 | フランス | 0.7221999764442444 |
473
- | 9360021637096476946 | 東京 | 0.6697999835014343 |
474
- | 2437546359230213520 | ストラスブール | 0.631600022315979 |
475
- | 13988178952745813186 | リヨン | 0.5939000248908997 |
476
- | 10427160276079242800 | Paris | 0.574400007724762 |
477
- | 5562396768860926997 | ベルギー | 0.5683000087738037 |
478
- | 15029176915627965481 | ニース | 0.5679000020027161 |
479
- | 9750625950625019690 | アルザス | 0.5644999742507935 |
480
- | 2381640614569534741 | 南仏 | 0.5547999739646912 |
481
-
516
+ | rank | text | score |
517
+ |:-----|:---------------|:-------------------|
518
+ | 1 | パリ | 0.7376999855041504 |
519
+ | 2 | フランス | 0.7221999764442444 |
520
+ | 3 | 東京 | 0.6697999835014343 |
521
+ | 4 | ストラスブール | 0.631600022315979 |
522
+ | 5 | リヨン | 0.5939000248908997 |
523
+ | 6 | Paris | 0.574400007724762 |
524
+ | 7 | ベルギー | 0.5683000087738037 |
525
+ | 8 | ニース | 0.5679000020027161 |
526
+ | 9 | アルザス | 0.5644999742507935 |
527
+ | 10 | 南仏 | 0.5547999739646912 |
482
528
 
483
529
  ## Author
484
530
 
@@ -8,8 +8,8 @@ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |word|
11
- lexeme = doc.vocab[word.text]
12
- rows << [lexeme.text, lexeme.shape_, lexeme.prefix_, lexeme.suffix_, lexeme.is_alpha, lexeme.is_digit]
11
+ lexeme = nlp.vocab(word.text)
12
+ rows << [lexeme.text, lexeme.shape, lexeme.prefix, lexeme.suffix, lexeme.is_alpha, lexeme.is_digit]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
@@ -8,7 +8,7 @@ headings = ["text", "pos", "dep"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |token|
11
- rows << [token.text, token.pos_, token.dep_]
11
+ rows << [token.text, token.pos, token.dep]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
@@ -0,0 +1,45 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
8
+ rows = []
9
+
10
+ doc.each do |token|
11
+ morph = token.morphology.map do |k, v|
12
+ "#{k} = #{v}"
13
+ end.join("\n")
14
+ # end.join("<br />")
15
+ rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
16
+ end
17
+
18
+ table = Terminal::Table.new rows: rows, headings: headings
19
+ puts table
20
+
21
+ # +---------+-------+----------+---------+-----------------+
22
+ # | text | shape | is_alpha | is_stop | morphology |
23
+ # +---------+-------+----------+---------+-----------------+
24
+ # | Apple | Xxxxx | true | false | NounType = Prop |
25
+ # | | | | | Number = Sing |
26
+ # | is | xx | true | true | Mood = Ind |
27
+ # | | | | | Number = Sing |
28
+ # | | | | | Person = 3 |
29
+ # | | | | | Tense = Pres |
30
+ # | | | | | VerbForm = Fin |
31
+ # | looking | xxxx | true | false | Aspect = Prog |
32
+ # | | | | | Tense = Pres |
33
+ # | | | | | VerbForm = Part |
34
+ # | at | xx | true | true | |
35
+ # | buying | xxxx | true | false | Aspect = Prog |
36
+ # | | | | | Tense = Pres |
37
+ # | | | | | VerbForm = Part |
38
+ # | U.K. | X.X. | false | false | NounType = Prop |
39
+ # | | | | | Number = Sing |
40
+ # | startup | xxxx | true | false | Number = Sing |
41
+ # | for | xxx | true | true | |
42
+ # | $ | $ | false | false | |
43
+ # | 1 | d | false | false | NumType = Card |
44
+ # | billion | xxxx | true | false | NumType = Card |
45
+ # +---------+-------+----------+---------+-----------------+
@@ -9,38 +9,39 @@ france = nlp.get_lexeme("France")
9
9
 
10
10
  query = tokyo.vector - japan.vector + france.vector
11
11
 
12
- headings = ["key", "text", "score"]
12
+ headings = ["rank", "text", "score"]
13
13
  rows = []
14
14
 
15
15
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
16
+ results.each_with_index do |lexeme, i|
17
+ index = (i + 1).to_s
18
+ rows << [index, lexeme.text, lexeme.score]
18
19
  end
19
20
 
20
21
  table = Terminal::Table.new rows: rows, headings: headings
21
22
  puts table
22
23
 
23
- # +----------------------+-------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+-------------+--------------------+
26
- # | 1432967385481565694 | FRANCE | 0.8346999883651733 |
27
- # | 6613816697677965370 | France | 0.8346999883651733 |
28
- # | 4362406852232399325 | france | 0.8346999883651733 |
29
- # | 1637573253267610771 | PARIS | 0.7703999876976013 |
30
- # | 15322182186497800017 | paris | 0.7703999876976013 |
31
- # | 10427160276079242800 | Paris | 0.7703999876976013 |
32
- # | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
33
- # | 7944504257273452052 | Toulouse | 0.6381999850273132 |
34
- # | 9614730213792621885 | toulouse | 0.6381999850273132 |
35
- # | 8515538464606421210 | marseille | 0.6370999813079834 |
36
- # | 8215995793762630878 | Marseille | 0.6370999813079834 |
37
- # | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
38
- # | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
39
- # | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
40
- # | 13936807859007616770 | bordeaux | 0.6096000075340271 |
41
- # | 8731576325682930212 | prague | 0.6075000166893005 |
42
- # | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
43
- # | 1133963107690000953 | Prague | 0.6075000166893005 |
44
- # | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
45
- # | 6936121537367717968 | switzerland | 0.6068000197410583 |
46
- # +----------------------+-------------+--------------------+
24
+ # +------+-------------+--------------------+
25
+ # | rank | text | score |
26
+ # +------+-------------+--------------------+
27
+ # | 1 | FRANCE | 0.8346999883651733 |
28
+ # | 2 | France | 0.8346999883651733 |
29
+ # | 3 | france | 0.8346999883651733 |
30
+ # | 4 | PARIS | 0.7703999876976013 |
31
+ # | 5 | paris | 0.7703999876976013 |
32
+ # | 6 | Paris | 0.7703999876976013 |
33
+ # | 7 | TOULOUSE | 0.6381999850273132 |
34
+ # | 8 | Toulouse | 0.6381999850273132 |
35
+ # | 9 | toulouse | 0.6381999850273132 |
36
+ # | 10 | marseille | 0.6370999813079834 |
37
+ # | 11 | Marseille | 0.6370999813079834 |
38
+ # | 12 | MARSEILLE | 0.6370999813079834 |
39
+ # | 13 | Bordeaux | 0.6096000075340271 |
40
+ # | 14 | BORDEAUX | 0.6096000075340271 |
41
+ # | 15 | bordeaux | 0.6096000075340271 |
42
+ # | 16 | prague | 0.6075000166893005 |
43
+ # | 17 | PRAGUE | 0.6075000166893005 |
44
+ # | 18 | Prague | 0.6075000166893005 |
45
+ # | 19 | SWITZERLAND | 0.6068000197410583 |
46
+ # | 20 | switzerland | 0.6068000197410583 |
47
+ # +------+-------------+--------------------+