ruby-spacy 0.1.4 → 0.1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/CHANGELOG.md +5 -1
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +3 -3
  7. data/README.md +40 -39
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +30 -27
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +30 -27
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +142 -136
  58. data/ruby-spacy.gemspec +15 -17
  59. data/tags +132 -0
  60. metadata +69 -10
data/lib/ruby-spacy.rb CHANGED
@@ -1,17 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "ruby-spacy/version"
4
- require 'enumerator'
5
- require 'strscan'
6
- require 'numpy'
7
- require 'pycall/import'
8
- include PyCall::Import
4
+ require "strscan"
5
+ require "numpy"
6
+ require "pycall/import"
9
7
 
10
8
  # This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
11
9
  module Spacy
12
-
13
10
  extend PyCall::Import
14
- spacy = PyCall.import_module('spacy')
11
+ spacy = PyCall.import_module("spacy")
15
12
 
16
13
  # Python `Language` class
17
14
  PyLanguage = spacy.language.Language
@@ -24,23 +21,22 @@ module Spacy
24
21
 
25
22
  # Python `Token` class object
26
23
  PyToken = spacy.tokens.Token
27
-
24
+
28
25
  # Python `Matcher` class object
29
26
  PyMatcher = spacy.matcher.Matcher
30
27
 
31
28
  # Python `displacy` object
32
29
  PyDisplacy = spacy.displacy
33
30
 
34
- # A utility module method to convert Python's generator object to a Ruby array,
31
+ # A utility module method to convert Python's generator object to a Ruby array,
35
32
  # mainly used on the items inside the array returned from dependency-related methods
36
33
  # such as {Span#rights}, {Span#lefts} and {Span#subtree}.
37
34
  def self.generator_to_array(py_generator)
38
- PyCall::List.(py_generator)
35
+ PyCall::List.call(py_generator)
39
36
  end
40
37
 
41
38
  # See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
42
39
  class Doc
43
-
44
40
  # @return [Object] a Python `Language` instance accessible via `PyCall`
45
41
  attr_reader :py_nlp
46
42
 
@@ -52,23 +48,19 @@ module Spacy
52
48
 
53
49
  include Enumerable
54
50
 
55
- alias_method :length, :count
56
- alias_method :len, :count
57
- alias_method :size, :count
51
+ alias length count
52
+ alias len count
53
+ alias size count
58
54
 
59
- # It is recommended to use {Language#read} method to create a doc. If you need to
60
- # create one using {Doc#initialize}, there are two method signatures:
55
+ # It is recommended to use {Language#read} method to create a doc. If you need to
56
+ # create one using {Doc#initialize}, there are two method signatures:
61
57
  # `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
62
58
  # @param nlp [Language] an instance of {Language} class
63
59
  # @param py_doc [Object] an instance of Python `Doc` class
64
60
  # @param text [String] the text string to be analyzed
65
61
  def initialize(nlp, py_doc: nil, text: nil)
66
62
  @py_nlp = nlp
67
- if py_doc
68
- @py_doc = py_doc
69
- else
70
- @py_doc = nlp.(text)
71
- end
63
+ @py_doc = py_doc || @py_doc = nlp.call(text)
72
64
  @text = @py_doc.text
73
65
  end
74
66
 
@@ -77,25 +69,25 @@ module Spacy
77
69
  # @param end_index [Integer] the end position of the span to be retokenized in the document
78
70
  # @param attributes [Hash] attributes to set on the merged token
79
71
  def retokenize(start_index, end_index, attributes = {})
80
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
81
- retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
72
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
73
+ retokenizer.merge(@py_doc[start_index..end_index], attrs: attributes)
82
74
  end
83
75
  end
84
76
 
85
77
  # Retokenizes the text splitting the specified token.
86
78
  # @param pos_in_doc [Integer] the position of the span to be retokenized in the document
87
- # @param split_array [Array<String>] text strings of the split results
79
+ # @param split_array [Array<String>] text strings of the split results
88
80
  # @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
89
81
  # @param attributes [Hash] the attributes of the split elements
90
82
  def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
91
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
83
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
92
84
  heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
93
85
  retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
94
86
  end
95
87
  end
96
88
 
97
89
  # String representation of the document.
98
- # @return [String]
90
+ # @return [String]
99
91
  def to_s
100
92
  @text
101
93
  end
@@ -104,7 +96,7 @@ module Spacy
104
96
  # @return [Array<Token>]
105
97
  def tokens
106
98
  results = []
107
- PyCall::List.(@py_doc).each do |py_token|
99
+ PyCall::List.call(@py_doc).each do |py_token|
108
100
  results << Token.new(py_token)
109
101
  end
110
102
  results
@@ -112,12 +104,12 @@ module Spacy
112
104
 
113
105
  # Iterates over the elements in the doc yielding a token instance each time.
114
106
  def each
115
- PyCall::List.(@py_doc).each do |py_token|
107
+ PyCall::List.call(@py_doc).each do |py_token|
116
108
  yield Token.new(py_token)
117
109
  end
118
110
  end
119
111
 
120
- # Returns a span of the specified range within the doc.
112
+ # Returns a span of the specified range within the doc.
121
113
  # The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
122
114
  # @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
123
115
  # @param optional_size [Integer] an integer representing the size of the span
@@ -125,7 +117,7 @@ module Spacy
125
117
  def span(range_or_start, optional_size = nil)
126
118
  if optional_size
127
119
  start_index = range_or_start
128
- temp = tokens[start_index ... start_index + optional_size]
120
+ temp = tokens[start_index...start_index + optional_size]
129
121
  else
130
122
  start_index = range_or_start.first
131
123
  range = range_or_start
@@ -141,7 +133,7 @@ module Spacy
141
133
  # @return [Array<Span>]
142
134
  def noun_chunks
143
135
  chunk_array = []
144
- py_chunks = PyCall::List.(@py_doc.noun_chunks)
136
+ py_chunks = PyCall::List.call(@py_doc.noun_chunks)
145
137
  py_chunks.each do |py_chunk|
146
138
  chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
147
139
  end
@@ -152,7 +144,7 @@ module Spacy
152
144
  # @return [Array<Span>]
153
145
  def sents
154
146
  sentence_array = []
155
- py_sentences = PyCall::List.(@py_doc.sents)
147
+ py_sentences = PyCall::List.call(@py_doc.sents)
156
148
  py_sentences.each do |py_sent|
157
149
  sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
158
150
  end
@@ -164,9 +156,9 @@ module Spacy
164
156
  def ents
165
157
  # so that ents canbe "each"-ed in Ruby
166
158
  ent_array = []
167
- PyCall::List.(@py_doc.ents).each do |ent|
159
+ PyCall::List.call(@py_doc.ents).each do |ent|
168
160
  ent.define_singleton_method :label do
169
- return self.label_
161
+ label_
170
162
  end
171
163
  ent_array << ent
172
164
  end
@@ -178,15 +170,15 @@ module Spacy
178
170
  def [](range)
179
171
  if range.is_a?(Range)
180
172
  py_span = @py_doc[range]
181
- return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
173
+ Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
182
174
  else
183
- return Token.new(@py_doc[range])
175
+ Token.new(@py_doc[range])
184
176
  end
185
177
  end
186
178
 
187
179
  # Returns a semantic similarity estimate.
188
180
  # @param other [Doc] the other doc to which a similarity estimation is made
189
- # @return [Float]
181
+ # @return [Float]
190
182
  def similarity(other)
191
183
  py_doc.similarity(other.py_doc)
192
184
  end
@@ -196,18 +188,21 @@ module Spacy
196
188
  # @param compact [Boolean] only relevant to the `dep' style
197
189
  # @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
198
190
  def displacy(style: "dep", compact: false)
199
- PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
191
+ PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
200
192
  end
201
193
 
202
194
  # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
203
195
  def method_missing(name, *args)
204
196
  @py_doc.send(name, *args)
205
197
  end
198
+
199
+ def respond_to_missing?(sym)
200
+ sym ? true : super
201
+ end
206
202
  end
207
203
 
208
204
  # See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
209
205
  class Language
210
-
211
206
  # @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
212
207
  attr_reader :spacy_nlp_id
213
208
 
@@ -245,7 +240,7 @@ module Spacy
245
240
  # @return [Array<String>] An array of text strings representing pipeline components
246
241
  def pipe_names
247
242
  pipe_array = []
248
- PyCall::List.(@py_nlp.pipe_names).each do |pipe|
243
+ PyCall::List.call(@py_nlp.pipe_names).each do |pipe|
249
244
  pipe_array << pipe
250
245
  end
251
246
  pipe_array
@@ -268,18 +263,25 @@ module Spacy
268
263
  # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
269
264
  # @param vector [Object] A vector representation of a word (whether existing or non-existing)
270
265
  # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
271
- def most_similar(vector, n)
266
+ def most_similar(vector, num)
272
267
  vec_array = Numpy.asarray([vector])
273
- py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
274
- key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
275
- keys = key_texts.map{|kt| kt[0]}
276
- texts = key_texts.map{|kt| kt[1]}
277
- best_rows = PyCall::List.(py_result[1])[0]
278
- scores = PyCall::List.(py_result[2])[0]
268
+ py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: num)
269
+ key_texts = PyCall.eval("[[str(num), #{@spacy_nlp_id}.vocab[num].text] for num in #{py_result[0][0].tolist}]")
270
+ keys = key_texts.map { |kt| kt[0] }
271
+ texts = key_texts.map { |kt| kt[1] }
272
+ best_rows = PyCall::List.call(py_result[1])[0]
273
+ scores = PyCall::List.call(py_result[2])[0]
279
274
 
280
275
  results = []
281
- n.times do |i|
282
- results << {key: keys[i].to_i, text: texts[i], best_row: best_rows[i], score: scores[i]}
276
+ num.times do |i|
277
+ result = { key: keys[i].to_i,
278
+ text: texts[i],
279
+ best_row: best_rows[i],
280
+ score: scores[i] }
281
+ result.each_key do |key|
282
+ result.define_singleton_method(key) { result[key] }
283
+ end
284
+ results << result
283
285
  end
284
286
  results
285
287
  end
@@ -289,9 +291,9 @@ module Spacy
289
291
  # @param disable [Array<String>]
290
292
  # @param batch_size [Integer]
291
293
  # @return [Array<Doc>]
292
- def pipe(texts, disable: [], batch_size: 50)
294
+ def pipe(texts, disable: [], batch_size: 50)
293
295
  docs = []
294
- PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
296
+ PyCall::List.call(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
295
297
  docs << Doc.new(@py_nlp, py_doc: py_doc)
296
298
  end
297
299
  docs
@@ -301,18 +303,21 @@ module Spacy
301
303
  def method_missing(name, *args)
302
304
  @py_nlp.send(name, *args)
303
305
  end
306
+
307
+ def respond_to_missing?(sym)
308
+ sym ? true : super
309
+ end
304
310
  end
305
311
 
306
312
  # See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
307
313
  class Matcher
308
-
309
314
  # @return [Object] a Python `Matcher` instance accessible via `PyCall`
310
315
  attr_reader :py_matcher
311
316
 
312
317
  # Creates a {Matcher} instance
313
318
  # @param nlp [Language] an instance of {Language} class
314
319
  def initialize(nlp)
315
- @py_matcher = PyMatcher.(nlp.vocab)
320
+ @py_matcher = PyMatcher.call(nlp.vocab)
316
321
  end
317
322
 
318
323
  # Adds a label string and a text pattern.
@@ -326,16 +331,17 @@ module Spacy
326
331
  # @param doc [Doc] an {Doc} instance
327
332
  # @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
328
333
  def match(doc)
329
- str_results = @py_matcher.(doc.py_doc).to_s
334
+ str_results = @py_matcher.call(doc.py_doc).to_s
330
335
  s = StringScanner.new(str_results[1..-2])
331
336
  results = []
332
337
  while s.scan_until(/(\d+), (\d+), (\d+)/)
333
338
  next unless s.matched
339
+
334
340
  triple = s.matched.split(", ")
335
341
  match_id = triple[0].to_i
336
342
  start_index = triple[1].to_i
337
343
  end_index = triple[2].to_i - 1
338
- results << {match_id: match_id, start_index: start_index, end_index: end_index}
344
+ results << { match_id: match_id, start_index: start_index, end_index: end_index }
339
345
  end
340
346
  results
341
347
  end
@@ -343,7 +349,6 @@ module Spacy
343
349
 
344
350
  # See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
345
351
  class Span
346
-
347
352
  # @return [Object] a Python `Span` instance accessible via `PyCall`
348
353
  attr_reader :py_span
349
354
 
@@ -352,11 +357,11 @@ module Spacy
352
357
 
353
358
  include Enumerable
354
359
 
355
- alias_method :length, :count
356
- alias_method :len, :count
357
- alias_method :size, :count
360
+ alias length count
361
+ alias len count
362
+ alias size count
358
363
 
359
- # It is recommended to use {Doc#span} method to create a span. If you need to
364
+ # It is recommended to use {Doc#span} method to create a span. If you need to
360
365
  # create one using {Span#initialize}, there are two method signatures:
361
366
  # `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
362
367
  # @param doc [Doc] the document to which this span belongs to
@@ -365,18 +370,14 @@ module Spacy
365
370
  # @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
366
371
  def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
367
372
  @doc = doc
368
- if py_span
369
- @py_span = py_span
370
- else
371
- @py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
372
- end
373
+ @py_span = py_span || @py_span = PySpan.call(@doc.py_doc, start_index, end_index + 1, options)
373
374
  end
374
375
 
375
376
  # Returns an array of tokens contained in the span.
376
377
  # @return [Array<Token>]
377
378
  def tokens
378
379
  results = []
379
- PyCall::List.(@py_span).each do |py_token|
380
+ PyCall::List.call(@py_span).each do |py_token|
380
381
  results << Token.new(py_token)
381
382
  end
382
383
  results
@@ -384,7 +385,7 @@ module Spacy
384
385
 
385
386
  # Iterates over the elements in the span yielding a token instance each time.
386
387
  def each
387
- PyCall::List.(@py_span).each do |py_token|
388
+ PyCall::List.call(@py_span).each do |py_token|
388
389
  yield Token.new(py_token)
389
390
  end
390
391
  end
@@ -393,7 +394,7 @@ module Spacy
393
394
  # @return [Array<Span>]
394
395
  def noun_chunks
395
396
  chunk_array = []
396
- py_chunks = PyCall::List.(@py_span.noun_chunks)
397
+ py_chunks = PyCall::List.call(@py_span.noun_chunks)
397
398
  py_chunks.each do |py_span|
398
399
  chunk_array << Span.new(@doc, py_span: py_span)
399
400
  end
@@ -402,7 +403,7 @@ module Spacy
402
403
 
403
404
  # Returns the head token
404
405
  # @return [Token]
405
- def root
406
+ def root
406
407
  Token.new(@py_span.root)
407
408
  end
408
409
 
@@ -410,7 +411,7 @@ module Spacy
410
411
  # @return [Array<Span>]
411
412
  def sents
412
413
  sentence_array = []
413
- py_sentences = PyCall::List.(@py_span.sents)
414
+ py_sentences = PyCall::List.call(@py_span.sents)
414
415
  py_sentences.each do |py_span|
415
416
  sentence_array << Span.new(@doc, py_span: py_span)
416
417
  end
@@ -421,7 +422,7 @@ module Spacy
421
422
  # @return [Array<Span>]
422
423
  def ents
423
424
  ent_array = []
424
- PyCall::List.(@py_span.ents).each do |py_span|
425
+ PyCall::List.call(@py_span.ents).each do |py_span|
425
426
  ent_array << Span.new(@doc, py_span: py_span)
426
427
  end
427
428
  ent_array
@@ -430,8 +431,8 @@ module Spacy
430
431
  # Returns a span that represents the sentence that the given span is part of.
431
432
  # @return [Span]
432
433
  def sent
433
- py_span = @py_span.sent
434
- return Span.new(@doc, py_span: py_span)
434
+ py_span = @py_span.sent
435
+ Span.new(@doc, py_span: py_span)
435
436
  end
436
437
 
437
438
  # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -439,67 +440,67 @@ module Spacy
439
440
  def [](range)
440
441
  if range.is_a?(Range)
441
442
  py_span = @py_span[range]
442
- return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
443
+ Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
443
444
  else
444
- return Token.new(@py_span[range])
445
+ Token.new(@py_span[range])
445
446
  end
446
447
  end
447
448
 
448
449
  # Returns a semantic similarity estimate.
449
450
  # @param other [Span] the other span to which a similarity estimation is conducted
450
- # @return [Float]
451
+ # @return [Float]
451
452
  def similarity(other)
452
453
  py_span.similarity(other.py_span)
453
454
  end
454
455
 
455
456
  # Creates a document instance from the span
456
- # @return [Doc]
457
+ # @return [Doc]
457
458
  def as_doc
458
- Doc.new(@doc.py_nlp, text: self.text)
459
+ Doc.new(@doc.py_nlp, text: text)
459
460
  end
460
461
 
461
462
  # Returns tokens conjugated to the root of the span.
462
463
  # @return [Array<Token>] an array of tokens
463
464
  def conjuncts
464
465
  conjunct_array = []
465
- PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
466
+ PyCall::List.call(@py_span.conjuncts).each do |py_conjunct|
466
467
  conjunct_array << Token.new(py_conjunct)
467
468
  end
468
469
  conjunct_array
469
470
  end
470
471
 
471
472
  # Returns tokens that are to the left of the span, whose heads are within the span.
472
- # @return [Array<Token>] an array of tokens
473
+ # @return [Array<Token>] an array of tokens
473
474
  def lefts
474
475
  left_array = []
475
- PyCall::List.(@py_span.lefts).each do |py_left|
476
+ PyCall::List.call(@py_span.lefts).each do |py_left|
476
477
  left_array << Token.new(py_left)
477
478
  end
478
479
  left_array
479
480
  end
480
481
 
481
482
  # Returns Tokens that are to the right of the span, whose heads are within the span.
482
- # @return [Array<Token>] an array of Tokens
483
+ # @return [Array<Token>] an array of Tokens
483
484
  def rights
484
485
  right_array = []
485
- PyCall::List.(@py_span.rights).each do |py_right|
486
+ PyCall::List.call(@py_span.rights).each do |py_right|
486
487
  right_array << Token.new(py_right)
487
488
  end
488
489
  right_array
489
490
  end
490
491
 
491
492
  # Returns Tokens that are within the span and tokens that descend from them.
492
- # @return [Array<Token>] an array of tokens
493
+ # @return [Array<Token>] an array of tokens
493
494
  def subtree
494
495
  subtree_array = []
495
- PyCall::List.(@py_span.subtree).each do |py_subtree|
496
+ PyCall::List.call(@py_span.subtree).each do |py_subtree|
496
497
  subtree_array << Token.new(py_subtree)
497
498
  end
498
499
  subtree_array
499
500
  end
500
501
 
501
502
  # Returns the label
502
- # @return [String]
503
+ # @return [String]
503
504
  def label
504
505
  @py_span.label_
505
506
  end
@@ -508,11 +509,14 @@ module Spacy
508
509
  def method_missing(name, *args)
509
510
  @py_span.send(name, *args)
510
511
  end
512
+
513
+ def respond_to_missing?(sym)
514
+ sym ? true : super
515
+ end
511
516
  end
512
517
 
513
518
  # See also spaCy Python API document for [`Token`](https://spacy.io/api/token).
514
519
  class Token
515
-
516
520
  # @return [Object] a Python `Token` instance accessible via `PyCall`
517
521
  attr_reader :py_token
518
522
 
@@ -520,17 +524,16 @@ module Spacy
520
524
  attr_reader :text
521
525
 
522
526
  # It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
523
- # There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
527
+ # There is no way to generate a token from scratch but relying on a pre-exising Python `Token` object.
524
528
  # @param py_token [Object] Python `Token` object
525
529
  def initialize(py_token)
526
530
  @py_token = py_token
527
531
  @text = @py_token.text
528
532
  end
529
533
 
530
-
531
534
  # Returns the head token
532
535
  # @return [Token]
533
- def head
536
+ def head
534
537
  Token.new(@py_token.head)
535
538
  end
536
539
 
@@ -538,7 +541,7 @@ module Spacy
538
541
  # @return [Array<Token>] an array of tokens
539
542
  def subtree
540
543
  descendant_array = []
541
- PyCall::List.(@py_token.subtree).each do |descendant|
544
+ PyCall::List.call(@py_token.subtree).each do |descendant|
542
545
  descendant_array << Token.new(descendant)
543
546
  end
544
547
  descendant_array
@@ -548,7 +551,7 @@ module Spacy
548
551
  # @return [Array<Token>] an array of tokens
549
552
  def ancestors
550
553
  ancestor_array = []
551
- PyCall::List.(@py_token.ancestors).each do |ancestor|
554
+ PyCall::List.call(@py_token.ancestors).each do |ancestor|
552
555
  ancestor_array << Token.new(ancestor)
553
556
  end
554
557
  ancestor_array
@@ -558,7 +561,7 @@ module Spacy
558
561
  # @return [Array<Token>] an array of tokens
559
562
  def children
560
563
  child_array = []
561
- PyCall::List.(@py_token.children).each do |child|
564
+ PyCall::List.call(@py_token.children).each do |child|
562
565
  child_array << Token.new(child)
563
566
  end
564
567
  child_array
@@ -568,7 +571,7 @@ module Spacy
568
571
  # @return [Array<Token>] an array of tokens
569
572
  def lefts
570
573
  token_array = []
571
- PyCall::List.(@py_token.lefts).each do |token|
574
+ PyCall::List.call(@py_token.lefts).each do |token|
572
575
  token_array << Token.new(token)
573
576
  end
574
577
  token_array
@@ -578,89 +581,87 @@ module Spacy
578
581
  # @return [Array<Token>] an array of tokens
579
582
  def rights
580
583
  token_array = []
581
- PyCall::List.(@py_token.rights).each do |token|
584
+ PyCall::List.call(@py_token.rights).each do |token|
582
585
  token_array << Token.new(token)
583
586
  end
584
587
  token_array
585
588
  end
586
589
 
587
590
  # String representation of the token.
588
- # @return [String]
591
+ # @return [String]
589
592
  def to_s
590
593
  @text
591
594
  end
592
595
 
593
596
  # Returns a hash or string of morphological information
594
597
  # @param hash [Boolean] if true, a hash will be returned instead of a string
595
- # @return [Hash, String]
596
- def morphology(hash = true)
598
+ # @return [Hash, String]
599
+ def morphology(hash: true)
597
600
  if @py_token.has_morph
598
601
  morph_analysis = @py_token.morph
599
- if hash
600
- return morph_analysis.to_dict
601
- else
602
- return morph_analysis.to_s
603
- end
604
- else
605
602
  if hash
606
- results = {}
603
+ morph_analysis.to_dict
607
604
  else
608
- return ""
605
+ morph_analysis.to_s
609
606
  end
607
+ elsif hash
608
+ {}
609
+ else
610
+ ""
610
611
  end
611
612
  end
612
613
 
613
614
  # Returns the lemma by calling `lemma_' of `@py_token` object
614
- # @return [String]
615
+ # @return [String]
615
616
  def lemma
616
617
  @py_token.lemma_
617
618
  end
618
619
 
619
620
  # Returns the lowercase form by calling `lower_' of `@py_token` object
620
- # @return [String]
621
+ # @return [String]
621
622
  def lower
622
623
  @py_token.lower_
623
624
  end
624
625
 
625
626
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
626
- # @return [String]
627
+ # @return [String]
627
628
  def shape
628
629
  @py_token.shape_
629
630
  end
630
631
 
631
632
  # Returns the pos by calling `pos_' of `@py_token` object
632
- # @return [String]
633
+ # @return [String]
633
634
  def pos
634
635
  @py_token.pos_
635
636
  end
636
637
 
637
638
  # Returns the fine-grained pos by calling `tag_' of `@py_token` object
638
- # @return [String]
639
- def tag
639
+ # @return [String]
640
+ def tag
640
641
  @py_token.tag_
641
642
  end
642
643
 
643
644
  # Returns the dependency relation by calling `dep_' of `@py_token` object
644
- # @return [String]
645
+ # @return [String]
645
646
  def dep
646
647
  @py_token.dep_
647
648
  end
648
-
649
+
649
650
  # Returns the language by calling `lang_' of `@py_token` object
650
- # @return [String]
651
- def lang
651
+ # @return [String]
652
+ def lang
652
653
  @py_token.lang_
653
654
  end
654
655
 
655
656
  # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
656
- # @return [String]
657
- def whitespace
657
+ # @return [String]
658
+ def whitespace
658
659
  @py_token.whitespace_
659
660
  end
660
661
 
661
662
  # Returns the named entity type by calling `ent_type_' of `@py_token` object
662
- # @return [String]
663
- def ent_type
663
+ # @return [String]
664
+ def ent_type
664
665
  @py_token.ent_type_
665
666
  end
666
667
 
@@ -674,11 +675,14 @@ module Spacy
674
675
  def method_missing(name, *args)
675
676
  @py_token.send(name, *args)
676
677
  end
678
+
679
+ def respond_to_missing?(sym)
680
+ sym ? true : super
681
+ end
677
682
  end
678
683
 
679
684
  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
680
- class Lexeme
681
-
685
+ class Lexeme
682
686
  # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
683
687
  attr_reader :py_lexeme
684
688
 
@@ -694,50 +698,50 @@ module Spacy
694
698
  end
695
699
 
696
700
  # String representation of the token.
697
- # @return [String]
701
+ # @return [String]
698
702
  def to_s
699
703
  @text
700
704
  end
701
705
 
702
706
  # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
703
- # @return [String]
707
+ # @return [String]
704
708
  def lower
705
709
  @py_lexeme.lower_
706
710
  end
707
711
 
708
712
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
709
- # @return [String]
713
+ # @return [String]
710
714
  def shape
711
715
  @py_lexeme.shape_
712
716
  end
713
717
 
714
718
  # Returns the language by calling `lang_' of `@py_lexeme` object
715
- # @return [String]
716
- def lang
719
+ # @return [String]
720
+ def lang
717
721
  @py_lexeme.lang_
718
722
  end
719
723
 
720
724
  # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
721
- # @return [String]
722
- def prefix
725
+ # @return [String]
726
+ def prefix
723
727
  @py_lexeme.prefix_
724
728
  end
725
- #
729
+
726
730
  # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
727
- # @return [String]
731
+ # @return [String]
728
732
  def suffix
729
733
  @py_lexeme.suffix_
730
734
  end
731
735
 
732
736
  # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
733
- # @return [String]
737
+ # @return [String]
734
738
  def norm
735
739
  @py_lexeme.norm_
736
740
  end
737
741
 
738
742
  # Returns a semantic similarity estimate.
739
- # @param other [Lexeme] the other doc to which a similarity estimation is made
740
- # @return [Float]
743
+ # @param other [Lexeme] the other lexeme to which a similarity estimation is made
744
+ # @return [Float]
741
745
  def similarity(other)
742
746
  @py_lexeme.similarity(other.py_lexeme)
743
747
  end
@@ -746,7 +750,9 @@ module Spacy
746
750
  def method_missing(name, *args)
747
751
  @py_lexeme.send(name, *args)
748
752
  end
749
- end
750
753
 
754
+ def respond_to_missing?(sym)
755
+ sym ? true : super
756
+ end
757
+ end
751
758
  end
752
-