ruby-spacy 0.1.4.1 → 0.1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/Gemfile +7 -7
  5. data/Gemfile.lock +2 -2
  6. data/README.md +7 -10
  7. data/examples/get_started/lexeme.rb +3 -1
  8. data/examples/get_started/linguistic_annotations.rb +3 -1
  9. data/examples/get_started/morphology.rb +3 -1
  10. data/examples/get_started/most_similar.rb +3 -1
  11. data/examples/get_started/named_entities.rb +4 -2
  12. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  13. data/examples/get_started/similarity.rb +4 -2
  14. data/examples/get_started/tokenization.rb +3 -1
  15. data/examples/get_started/visualizing_dependencies.rb +2 -2
  16. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  17. data/examples/get_started/visualizing_named_entities.rb +4 -2
  18. data/examples/get_started/vocab.rb +3 -1
  19. data/examples/get_started/word_vectors.rb +3 -1
  20. data/examples/japanese/ancestors.rb +6 -4
  21. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  22. data/examples/japanese/information_extraction.rb +6 -6
  23. data/examples/japanese/lemmatization.rb +3 -1
  24. data/examples/japanese/most_similar.rb +3 -1
  25. data/examples/japanese/named_entity_recognition.rb +3 -2
  26. data/examples/japanese/navigating_parse_tree.rb +19 -17
  27. data/examples/japanese/noun_chunks.rb +2 -0
  28. data/examples/japanese/pos_tagging.rb +3 -1
  29. data/examples/japanese/sentence_segmentation.rb +3 -2
  30. data/examples/japanese/similarity.rb +2 -0
  31. data/examples/japanese/tokenization.rb +2 -0
  32. data/examples/japanese/visualizing_dependencies.rb +3 -1
  33. data/examples/japanese/visualizing_named_entities.rb +4 -2
  34. data/examples/linguistic_features/ancestors.rb +7 -5
  35. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  36. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  37. data/examples/linguistic_features/information_extraction.rb +9 -9
  38. data/examples/linguistic_features/iterating_children.rb +6 -8
  39. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  40. data/examples/linguistic_features/lemmatization.rb +3 -1
  41. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  42. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  43. data/examples/linguistic_features/noun_chunks.rb +3 -1
  44. data/examples/linguistic_features/pos_tagging.rb +3 -1
  45. data/examples/linguistic_features/retokenize_1.rb +2 -0
  46. data/examples/linguistic_features/retokenize_2.rb +4 -2
  47. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  48. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  49. data/examples/linguistic_features/similarity.rb +4 -2
  50. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  51. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  52. data/examples/linguistic_features/tokenization.rb +3 -2
  53. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  54. data/examples/rule_based_matching/matcher.rb +4 -2
  55. data/lib/ruby-spacy/version.rb +1 -1
  56. data/lib/ruby-spacy.rb +139 -141
  57. data/ruby-spacy.gemspec +15 -17
  58. data/tags +132 -0
  59. metadata +69 -10
data/lib/ruby-spacy.rb CHANGED
@@ -1,17 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "ruby-spacy/version"
4
- require 'enumerator'
5
- require 'strscan'
6
- require 'numpy'
7
- require 'pycall/import'
8
- include PyCall::Import
4
+ require "strscan"
5
+ require "numpy"
6
+ require "pycall/import"
9
7
 
10
8
  # This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
11
9
  module Spacy
12
-
13
10
  extend PyCall::Import
14
- spacy = PyCall.import_module('spacy')
11
+ spacy = PyCall.import_module("spacy")
15
12
 
16
13
  # Python `Language` class
17
14
  PyLanguage = spacy.language.Language
@@ -24,23 +21,22 @@ module Spacy
24
21
 
25
22
  # Python `Token` class object
26
23
  PyToken = spacy.tokens.Token
27
-
24
+
28
25
  # Python `Matcher` class object
29
26
  PyMatcher = spacy.matcher.Matcher
30
27
 
31
28
  # Python `displacy` object
32
29
  PyDisplacy = spacy.displacy
33
30
 
34
- # A utility module method to convert Python's generator object to a Ruby array,
31
+ # A utility module method to convert Python's generator object to a Ruby array,
35
32
  # mainly used on the items inside the array returned from dependency-related methods
36
33
  # such as {Span#rights}, {Span#lefts} and {Span#subtree}.
37
34
  def self.generator_to_array(py_generator)
38
- PyCall::List.(py_generator)
35
+ PyCall::List.call(py_generator)
39
36
  end
40
37
 
41
38
  # See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
42
39
  class Doc
43
-
44
40
  # @return [Object] a Python `Language` instance accessible via `PyCall`
45
41
  attr_reader :py_nlp
46
42
 
@@ -52,23 +48,19 @@ module Spacy
52
48
 
53
49
  include Enumerable
54
50
 
55
- alias_method :length, :count
56
- alias_method :len, :count
57
- alias_method :size, :count
51
+ alias length count
52
+ alias len count
53
+ alias size count
58
54
 
59
- # It is recommended to use {Language#read} method to create a doc. If you need to
60
- # create one using {Doc#initialize}, there are two method signatures:
55
+ # It is recommended to use {Language#read} method to create a doc. If you need to
56
+ # create one using {Doc#initialize}, there are two method signatures:
61
57
  # `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
62
58
  # @param nlp [Language] an instance of {Language} class
63
59
  # @param py_doc [Object] an instance of Python `Doc` class
64
60
  # @param text [String] the text string to be analyzed
65
61
  def initialize(nlp, py_doc: nil, text: nil)
66
62
  @py_nlp = nlp
67
- if py_doc
68
- @py_doc = py_doc
69
- else
70
- @py_doc = nlp.(text)
71
- end
63
+ @py_doc = py_doc || @py_doc = nlp.call(text)
72
64
  @text = @py_doc.text
73
65
  end
74
66
 
@@ -77,25 +69,25 @@ module Spacy
77
69
  # @param end_index [Integer] the end position of the span to be retokenized in the document
78
70
  # @param attributes [Hash] attributes to set on the merged token
79
71
  def retokenize(start_index, end_index, attributes = {})
80
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
81
- retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
72
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
73
+ retokenizer.merge(@py_doc[start_index..end_index], attrs: attributes)
82
74
  end
83
75
  end
84
76
 
85
77
  # Retokenizes the text splitting the specified token.
86
78
  # @param pos_in_doc [Integer] the position of the span to be retokenized in the document
87
- # @param split_array [Array<String>] text strings of the split results
79
+ # @param split_array [Array<String>] text strings of the split results
88
80
  # @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
89
81
  # @param attributes [Hash] the attributes of the split elements
90
82
  def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
91
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
83
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
92
84
  heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
93
85
  retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
94
86
  end
95
87
  end
96
88
 
97
89
  # String representation of the document.
98
- # @return [String]
90
+ # @return [String]
99
91
  def to_s
100
92
  @text
101
93
  end
@@ -104,7 +96,7 @@ module Spacy
104
96
  # @return [Array<Token>]
105
97
  def tokens
106
98
  results = []
107
- PyCall::List.(@py_doc).each do |py_token|
99
+ PyCall::List.call(@py_doc).each do |py_token|
108
100
  results << Token.new(py_token)
109
101
  end
110
102
  results
@@ -112,12 +104,12 @@ module Spacy
112
104
 
113
105
  # Iterates over the elements in the doc yielding a token instance each time.
114
106
  def each
115
- PyCall::List.(@py_doc).each do |py_token|
107
+ PyCall::List.call(@py_doc).each do |py_token|
116
108
  yield Token.new(py_token)
117
109
  end
118
110
  end
119
111
 
120
- # Returns a span of the specified range within the doc.
112
+ # Returns a span of the specified range within the doc.
121
113
  # The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
122
114
  # @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
123
115
  # @param optional_size [Integer] an integer representing the size of the span
@@ -125,7 +117,7 @@ module Spacy
125
117
  def span(range_or_start, optional_size = nil)
126
118
  if optional_size
127
119
  start_index = range_or_start
128
- temp = tokens[start_index ... start_index + optional_size]
120
+ temp = tokens[start_index...start_index + optional_size]
129
121
  else
130
122
  start_index = range_or_start.first
131
123
  range = range_or_start
@@ -141,7 +133,7 @@ module Spacy
141
133
  # @return [Array<Span>]
142
134
  def noun_chunks
143
135
  chunk_array = []
144
- py_chunks = PyCall::List.(@py_doc.noun_chunks)
136
+ py_chunks = PyCall::List.call(@py_doc.noun_chunks)
145
137
  py_chunks.each do |py_chunk|
146
138
  chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
147
139
  end
@@ -152,7 +144,7 @@ module Spacy
152
144
  # @return [Array<Span>]
153
145
  def sents
154
146
  sentence_array = []
155
- py_sentences = PyCall::List.(@py_doc.sents)
147
+ py_sentences = PyCall::List.call(@py_doc.sents)
156
148
  py_sentences.each do |py_sent|
157
149
  sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
158
150
  end
@@ -164,9 +156,9 @@ module Spacy
164
156
  def ents
165
157
  # so that ents canbe "each"-ed in Ruby
166
158
  ent_array = []
167
- PyCall::List.(@py_doc.ents).each do |ent|
159
+ PyCall::List.call(@py_doc.ents).each do |ent|
168
160
  ent.define_singleton_method :label do
169
- return self.label_
161
+ label_
170
162
  end
171
163
  ent_array << ent
172
164
  end
@@ -178,15 +170,15 @@ module Spacy
178
170
  def [](range)
179
171
  if range.is_a?(Range)
180
172
  py_span = @py_doc[range]
181
- return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
173
+ Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
182
174
  else
183
- return Token.new(@py_doc[range])
175
+ Token.new(@py_doc[range])
184
176
  end
185
177
  end
186
178
 
187
179
  # Returns a semantic similarity estimate.
188
180
  # @param other [Doc] the other doc to which a similarity estimation is made
189
- # @return [Float]
181
+ # @return [Float]
190
182
  def similarity(other)
191
183
  py_doc.similarity(other.py_doc)
192
184
  end
@@ -196,18 +188,21 @@ module Spacy
196
188
  # @param compact [Boolean] only relevant to the `dep' style
197
189
  # @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
198
190
  def displacy(style: "dep", compact: false)
199
- PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
191
+ PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
200
192
  end
201
193
 
202
194
  # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
203
195
  def method_missing(name, *args)
204
196
  @py_doc.send(name, *args)
205
197
  end
198
+
199
+ def respond_to_missing?(sym)
200
+ sym ? true : super
201
+ end
206
202
  end
207
203
 
208
204
  # See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
209
205
  class Language
210
-
211
206
  # @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
212
207
  attr_reader :spacy_nlp_id
213
208
 
@@ -245,7 +240,7 @@ module Spacy
245
240
  # @return [Array<String>] An array of text strings representing pipeline components
246
241
  def pipe_names
247
242
  pipe_array = []
248
- PyCall::List.(@py_nlp.pipe_names).each do |pipe|
243
+ PyCall::List.call(@py_nlp.pipe_names).each do |pipe|
249
244
  pipe_array << pipe
250
245
  end
251
246
  pipe_array
@@ -268,24 +263,23 @@ module Spacy
268
263
  # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
269
264
  # @param vector [Object] A vector representation of a word (whether existing or non-existing)
270
265
  # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
271
- def most_similar(vector, n)
266
+ def most_similar(vector, num)
272
267
  vec_array = Numpy.asarray([vector])
273
- py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
274
- key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
275
- keys = key_texts.map{|kt| kt[0]}
276
- texts = key_texts.map{|kt| kt[1]}
277
- best_rows = PyCall::List.(py_result[1])[0]
278
- scores = PyCall::List.(py_result[2])[0]
268
+ py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: num)
269
+ key_texts = PyCall.eval("[[str(num), #{@spacy_nlp_id}.vocab[num].text] for num in #{py_result[0][0].tolist}]")
270
+ keys = key_texts.map { |kt| kt[0] }
271
+ texts = key_texts.map { |kt| kt[1] }
272
+ best_rows = PyCall::List.call(py_result[1])[0]
273
+ scores = PyCall::List.call(py_result[2])[0]
279
274
 
280
275
  results = []
281
- n.times do |i|
282
- result = {key: keys[i].to_i,
283
- text: texts[i],
284
- best_row: best_rows[i],
285
- score: scores[i]
286
- }
276
+ num.times do |i|
277
+ result = { key: keys[i].to_i,
278
+ text: texts[i],
279
+ best_row: best_rows[i],
280
+ score: scores[i] }
287
281
  result.each_key do |key|
288
- result.define_singleton_method(key){ result[key] }
282
+ result.define_singleton_method(key) { result[key] }
289
283
  end
290
284
  results << result
291
285
  end
@@ -297,9 +291,9 @@ module Spacy
297
291
  # @param disable [Array<String>]
298
292
  # @param batch_size [Integer]
299
293
  # @return [Array<Doc>]
300
- def pipe(texts, disable: [], batch_size: 50)
294
+ def pipe(texts, disable: [], batch_size: 50)
301
295
  docs = []
302
- PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
296
+ PyCall::List.call(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
303
297
  docs << Doc.new(@py_nlp, py_doc: py_doc)
304
298
  end
305
299
  docs
@@ -309,18 +303,21 @@ module Spacy
309
303
  def method_missing(name, *args)
310
304
  @py_nlp.send(name, *args)
311
305
  end
306
+
307
+ def respond_to_missing?(sym)
308
+ sym ? true : super
309
+ end
312
310
  end
313
311
 
314
312
  # See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
315
313
  class Matcher
316
-
317
314
  # @return [Object] a Python `Matcher` instance accessible via `PyCall`
318
315
  attr_reader :py_matcher
319
316
 
320
317
  # Creates a {Matcher} instance
321
318
  # @param nlp [Language] an instance of {Language} class
322
319
  def initialize(nlp)
323
- @py_matcher = PyMatcher.(nlp.vocab)
320
+ @py_matcher = PyMatcher.call(nlp.vocab)
324
321
  end
325
322
 
326
323
  # Adds a label string and a text pattern.
@@ -334,16 +331,17 @@ module Spacy
334
331
  # @param doc [Doc] an {Doc} instance
335
332
  # @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
336
333
  def match(doc)
337
- str_results = @py_matcher.(doc.py_doc).to_s
334
+ str_results = @py_matcher.call(doc.py_doc).to_s
338
335
  s = StringScanner.new(str_results[1..-2])
339
336
  results = []
340
337
  while s.scan_until(/(\d+), (\d+), (\d+)/)
341
338
  next unless s.matched
339
+
342
340
  triple = s.matched.split(", ")
343
341
  match_id = triple[0].to_i
344
342
  start_index = triple[1].to_i
345
343
  end_index = triple[2].to_i - 1
346
- results << {match_id: match_id, start_index: start_index, end_index: end_index}
344
+ results << { match_id: match_id, start_index: start_index, end_index: end_index }
347
345
  end
348
346
  results
349
347
  end
@@ -351,7 +349,6 @@ module Spacy
351
349
 
352
350
  # See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
353
351
  class Span
354
-
355
352
  # @return [Object] a Python `Span` instance accessible via `PyCall`
356
353
  attr_reader :py_span
357
354
 
@@ -360,11 +357,11 @@ module Spacy
360
357
 
361
358
  include Enumerable
362
359
 
363
- alias_method :length, :count
364
- alias_method :len, :count
365
- alias_method :size, :count
360
+ alias length count
361
+ alias len count
362
+ alias size count
366
363
 
367
- # It is recommended to use {Doc#span} method to create a span. If you need to
364
+ # It is recommended to use {Doc#span} method to create a span. If you need to
368
365
  # create one using {Span#initialize}, there are two method signatures:
369
366
  # `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
370
367
  # @param doc [Doc] the document to which this span belongs to
@@ -373,18 +370,14 @@ module Spacy
373
370
  # @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
374
371
  def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
375
372
  @doc = doc
376
- if py_span
377
- @py_span = py_span
378
- else
379
- @py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
380
- end
373
+ @py_span = py_span || @py_span = PySpan.call(@doc.py_doc, start_index, end_index + 1, options)
381
374
  end
382
375
 
383
376
  # Returns an array of tokens contained in the span.
384
377
  # @return [Array<Token>]
385
378
  def tokens
386
379
  results = []
387
- PyCall::List.(@py_span).each do |py_token|
380
+ PyCall::List.call(@py_span).each do |py_token|
388
381
  results << Token.new(py_token)
389
382
  end
390
383
  results
@@ -392,7 +385,7 @@ module Spacy
392
385
 
393
386
  # Iterates over the elements in the span yielding a token instance each time.
394
387
  def each
395
- PyCall::List.(@py_span).each do |py_token|
388
+ PyCall::List.call(@py_span).each do |py_token|
396
389
  yield Token.new(py_token)
397
390
  end
398
391
  end
@@ -401,7 +394,7 @@ module Spacy
401
394
  # @return [Array<Span>]
402
395
  def noun_chunks
403
396
  chunk_array = []
404
- py_chunks = PyCall::List.(@py_span.noun_chunks)
397
+ py_chunks = PyCall::List.call(@py_span.noun_chunks)
405
398
  py_chunks.each do |py_span|
406
399
  chunk_array << Span.new(@doc, py_span: py_span)
407
400
  end
@@ -410,7 +403,7 @@ module Spacy
410
403
 
411
404
  # Returns the head token
412
405
  # @return [Token]
413
- def root
406
+ def root
414
407
  Token.new(@py_span.root)
415
408
  end
416
409
 
@@ -418,7 +411,7 @@ module Spacy
418
411
  # @return [Array<Span>]
419
412
  def sents
420
413
  sentence_array = []
421
- py_sentences = PyCall::List.(@py_span.sents)
414
+ py_sentences = PyCall::List.call(@py_span.sents)
422
415
  py_sentences.each do |py_span|
423
416
  sentence_array << Span.new(@doc, py_span: py_span)
424
417
  end
@@ -429,7 +422,7 @@ module Spacy
429
422
  # @return [Array<Span>]
430
423
  def ents
431
424
  ent_array = []
432
- PyCall::List.(@py_span.ents).each do |py_span|
425
+ PyCall::List.call(@py_span.ents).each do |py_span|
433
426
  ent_array << Span.new(@doc, py_span: py_span)
434
427
  end
435
428
  ent_array
@@ -438,8 +431,8 @@ module Spacy
438
431
  # Returns a span that represents the sentence that the given span is part of.
439
432
  # @return [Span]
440
433
  def sent
441
- py_span = @py_span.sent
442
- return Span.new(@doc, py_span: py_span)
434
+ py_span = @py_span.sent
435
+ Span.new(@doc, py_span: py_span)
443
436
  end
444
437
 
445
438
  # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -447,67 +440,67 @@ module Spacy
447
440
  def [](range)
448
441
  if range.is_a?(Range)
449
442
  py_span = @py_span[range]
450
- return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
443
+ Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
451
444
  else
452
- return Token.new(@py_span[range])
445
+ Token.new(@py_span[range])
453
446
  end
454
447
  end
455
448
 
456
449
  # Returns a semantic similarity estimate.
457
450
  # @param other [Span] the other span to which a similarity estimation is conducted
458
- # @return [Float]
451
+ # @return [Float]
459
452
  def similarity(other)
460
453
  py_span.similarity(other.py_span)
461
454
  end
462
455
 
463
456
  # Creates a document instance from the span
464
- # @return [Doc]
457
+ # @return [Doc]
465
458
  def as_doc
466
- Doc.new(@doc.py_nlp, text: self.text)
459
+ Doc.new(@doc.py_nlp, text: text)
467
460
  end
468
461
 
469
462
  # Returns tokens conjugated to the root of the span.
470
463
  # @return [Array<Token>] an array of tokens
471
464
  def conjuncts
472
465
  conjunct_array = []
473
- PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
466
+ PyCall::List.call(@py_span.conjuncts).each do |py_conjunct|
474
467
  conjunct_array << Token.new(py_conjunct)
475
468
  end
476
469
  conjunct_array
477
470
  end
478
471
 
479
472
  # Returns tokens that are to the left of the span, whose heads are within the span.
480
- # @return [Array<Token>] an array of tokens
473
+ # @return [Array<Token>] an array of tokens
481
474
  def lefts
482
475
  left_array = []
483
- PyCall::List.(@py_span.lefts).each do |py_left|
476
+ PyCall::List.call(@py_span.lefts).each do |py_left|
484
477
  left_array << Token.new(py_left)
485
478
  end
486
479
  left_array
487
480
  end
488
481
 
489
482
  # Returns Tokens that are to the right of the span, whose heads are within the span.
490
- # @return [Array<Token>] an array of Tokens
483
+ # @return [Array<Token>] an array of Tokens
491
484
  def rights
492
485
  right_array = []
493
- PyCall::List.(@py_span.rights).each do |py_right|
486
+ PyCall::List.call(@py_span.rights).each do |py_right|
494
487
  right_array << Token.new(py_right)
495
488
  end
496
489
  right_array
497
490
  end
498
491
 
499
492
  # Returns Tokens that are within the span and tokens that descend from them.
500
- # @return [Array<Token>] an array of tokens
493
+ # @return [Array<Token>] an array of tokens
501
494
  def subtree
502
495
  subtree_array = []
503
- PyCall::List.(@py_span.subtree).each do |py_subtree|
496
+ PyCall::List.call(@py_span.subtree).each do |py_subtree|
504
497
  subtree_array << Token.new(py_subtree)
505
498
  end
506
499
  subtree_array
507
500
  end
508
501
 
509
502
  # Returns the label
510
- # @return [String]
503
+ # @return [String]
511
504
  def label
512
505
  @py_span.label_
513
506
  end
@@ -516,11 +509,14 @@ module Spacy
516
509
  def method_missing(name, *args)
517
510
  @py_span.send(name, *args)
518
511
  end
512
+
513
+ def respond_to_missing?(sym)
514
+ sym ? true : super
515
+ end
519
516
  end
520
517
 
521
518
  # See also spaCy Python API document for [`Token`](https://spacy.io/api/token).
522
519
  class Token
523
-
524
520
  # @return [Object] a Python `Token` instance accessible via `PyCall`
525
521
  attr_reader :py_token
526
522
 
@@ -528,17 +524,16 @@ module Spacy
528
524
  attr_reader :text
529
525
 
530
526
  # It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
531
- # There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
527
+ # There is no way to generate a token from scratch but relying on a pre-exising Python `Token` object.
532
528
  # @param py_token [Object] Python `Token` object
533
529
  def initialize(py_token)
534
530
  @py_token = py_token
535
531
  @text = @py_token.text
536
532
  end
537
533
 
538
-
539
534
  # Returns the head token
540
535
  # @return [Token]
541
- def head
536
+ def head
542
537
  Token.new(@py_token.head)
543
538
  end
544
539
 
@@ -546,7 +541,7 @@ module Spacy
546
541
  # @return [Array<Token>] an array of tokens
547
542
  def subtree
548
543
  descendant_array = []
549
- PyCall::List.(@py_token.subtree).each do |descendant|
544
+ PyCall::List.call(@py_token.subtree).each do |descendant|
550
545
  descendant_array << Token.new(descendant)
551
546
  end
552
547
  descendant_array
@@ -556,7 +551,7 @@ module Spacy
556
551
  # @return [Array<Token>] an array of tokens
557
552
  def ancestors
558
553
  ancestor_array = []
559
- PyCall::List.(@py_token.ancestors).each do |ancestor|
554
+ PyCall::List.call(@py_token.ancestors).each do |ancestor|
560
555
  ancestor_array << Token.new(ancestor)
561
556
  end
562
557
  ancestor_array
@@ -566,7 +561,7 @@ module Spacy
566
561
  # @return [Array<Token>] an array of tokens
567
562
  def children
568
563
  child_array = []
569
- PyCall::List.(@py_token.children).each do |child|
564
+ PyCall::List.call(@py_token.children).each do |child|
570
565
  child_array << Token.new(child)
571
566
  end
572
567
  child_array
@@ -576,7 +571,7 @@ module Spacy
576
571
  # @return [Array<Token>] an array of tokens
577
572
  def lefts
578
573
  token_array = []
579
- PyCall::List.(@py_token.lefts).each do |token|
574
+ PyCall::List.call(@py_token.lefts).each do |token|
580
575
  token_array << Token.new(token)
581
576
  end
582
577
  token_array
@@ -586,89 +581,87 @@ module Spacy
586
581
  # @return [Array<Token>] an array of tokens
587
582
  def rights
588
583
  token_array = []
589
- PyCall::List.(@py_token.rights).each do |token|
584
+ PyCall::List.call(@py_token.rights).each do |token|
590
585
  token_array << Token.new(token)
591
586
  end
592
587
  token_array
593
588
  end
594
589
 
595
590
  # String representation of the token.
596
- # @return [String]
591
+ # @return [String]
597
592
  def to_s
598
593
  @text
599
594
  end
600
595
 
601
596
  # Returns a hash or string of morphological information
602
597
  # @param hash [Boolean] if true, a hash will be returned instead of a string
603
- # @return [Hash, String]
604
- def morphology(hash = true)
598
+ # @return [Hash, String]
599
+ def morphology(hash: true)
605
600
  if @py_token.has_morph
606
601
  morph_analysis = @py_token.morph
607
- if hash
608
- return morph_analysis.to_dict
609
- else
610
- return morph_analysis.to_s
611
- end
612
- else
613
602
  if hash
614
- results = {}
603
+ morph_analysis.to_dict
615
604
  else
616
- return ""
605
+ morph_analysis.to_s
617
606
  end
607
+ elsif hash
608
+ {}
609
+ else
610
+ ""
618
611
  end
619
612
  end
620
613
 
621
614
  # Returns the lemma by calling `lemma_' of `@py_token` object
622
- # @return [String]
615
+ # @return [String]
623
616
  def lemma
624
617
  @py_token.lemma_
625
618
  end
626
619
 
627
620
  # Returns the lowercase form by calling `lower_' of `@py_token` object
628
- # @return [String]
621
+ # @return [String]
629
622
  def lower
630
623
  @py_token.lower_
631
624
  end
632
625
 
633
626
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
634
- # @return [String]
627
+ # @return [String]
635
628
  def shape
636
629
  @py_token.shape_
637
630
  end
638
631
 
639
632
  # Returns the pos by calling `pos_' of `@py_token` object
640
- # @return [String]
633
+ # @return [String]
641
634
  def pos
642
635
  @py_token.pos_
643
636
  end
644
637
 
645
638
  # Returns the fine-grained pos by calling `tag_' of `@py_token` object
646
- # @return [String]
647
- def tag
639
+ # @return [String]
640
+ def tag
648
641
  @py_token.tag_
649
642
  end
650
643
 
651
644
  # Returns the dependency relation by calling `dep_' of `@py_token` object
652
- # @return [String]
645
+ # @return [String]
653
646
  def dep
654
647
  @py_token.dep_
655
648
  end
656
-
649
+
657
650
  # Returns the language by calling `lang_' of `@py_token` object
658
- # @return [String]
659
- def lang
651
+ # @return [String]
652
+ def lang
660
653
  @py_token.lang_
661
654
  end
662
655
 
663
656
  # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
664
- # @return [String]
665
- def whitespace
657
+ # @return [String]
658
+ def whitespace
666
659
  @py_token.whitespace_
667
660
  end
668
661
 
669
662
  # Returns the named entity type by calling `ent_type_' of `@py_token` object
670
- # @return [String]
671
- def ent_type
663
+ # @return [String]
664
+ def ent_type
672
665
  @py_token.ent_type_
673
666
  end
674
667
 
@@ -682,11 +675,14 @@ module Spacy
682
675
  def method_missing(name, *args)
683
676
  @py_token.send(name, *args)
684
677
  end
678
+
679
+ def respond_to_missing?(sym)
680
+ sym ? true : super
681
+ end
685
682
  end
686
683
 
687
684
  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
688
- class Lexeme
689
-
685
+ class Lexeme
690
686
  # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
691
687
  attr_reader :py_lexeme
692
688
 
@@ -702,50 +698,50 @@ module Spacy
702
698
  end
703
699
 
704
700
  # String representation of the token.
705
- # @return [String]
701
+ # @return [String]
706
702
  def to_s
707
703
  @text
708
704
  end
709
705
 
710
706
  # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
711
- # @return [String]
707
+ # @return [String]
712
708
  def lower
713
709
  @py_lexeme.lower_
714
710
  end
715
711
 
716
712
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
717
- # @return [String]
713
+ # @return [String]
718
714
  def shape
719
715
  @py_lexeme.shape_
720
716
  end
721
717
 
722
718
  # Returns the language by calling `lang_' of `@py_lexeme` object
723
- # @return [String]
724
- def lang
719
+ # @return [String]
720
+ def lang
725
721
  @py_lexeme.lang_
726
722
  end
727
723
 
728
724
  # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
729
- # @return [String]
730
- def prefix
725
+ # @return [String]
726
+ def prefix
731
727
  @py_lexeme.prefix_
732
728
  end
733
- #
729
+
734
730
  # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
735
- # @return [String]
731
+ # @return [String]
736
732
  def suffix
737
733
  @py_lexeme.suffix_
738
734
  end
739
735
 
740
736
  # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
741
- # @return [String]
737
+ # @return [String]
742
738
  def norm
743
739
  @py_lexeme.norm_
744
740
  end
745
741
 
746
742
  # Returns a semantic similarity estimate.
747
- # @param other [Lexeme] the other doc to which a similarity estimation is made
748
- # @return [Float]
743
+ # @param other [Lexeme] the other lexeme to which a similarity estimation is made
744
+ # @return [Float]
749
745
  def similarity(other)
750
746
  @py_lexeme.similarity(other.py_lexeme)
751
747
  end
@@ -754,7 +750,9 @@ module Spacy
754
750
  def method_missing(name, *args)
755
751
  @py_lexeme.send(name, *args)
756
752
  end
757
- end
758
753
 
754
+ def respond_to_missing?(sym)
755
+ sym ? true : super
756
+ end
757
+ end
759
758
  end
760
-