ruby-spacy 0.1.4 → 0.1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/CHANGELOG.md +5 -1
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +3 -3
  7. data/README.md +40 -39
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +30 -27
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +30 -27
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +142 -136
  58. data/ruby-spacy.gemspec +15 -17
  59. data/tags +132 -0
  60. metadata +69 -10
data/lib/ruby-spacy.rb CHANGED
@@ -1,17 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "ruby-spacy/version"
4
- require 'enumerator'
5
- require 'strscan'
6
- require 'numpy'
7
- require 'pycall/import'
8
- include PyCall::Import
4
+ require "strscan"
5
+ require "numpy"
6
+ require "pycall/import"
9
7
 
10
8
  # This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
11
9
  module Spacy
12
-
13
10
  extend PyCall::Import
14
- spacy = PyCall.import_module('spacy')
11
+ spacy = PyCall.import_module("spacy")
15
12
 
16
13
  # Python `Language` class
17
14
  PyLanguage = spacy.language.Language
@@ -24,23 +21,22 @@ module Spacy
24
21
 
25
22
  # Python `Token` class object
26
23
  PyToken = spacy.tokens.Token
27
-
24
+
28
25
  # Python `Matcher` class object
29
26
  PyMatcher = spacy.matcher.Matcher
30
27
 
31
28
  # Python `displacy` object
32
29
  PyDisplacy = spacy.displacy
33
30
 
34
- # A utility module method to convert Python's generator object to a Ruby array,
31
+ # A utility module method to convert Python's generator object to a Ruby array,
35
32
  # mainly used on the items inside the array returned from dependency-related methods
36
33
  # such as {Span#rights}, {Span#lefts} and {Span#subtree}.
37
34
  def self.generator_to_array(py_generator)
38
- PyCall::List.(py_generator)
35
+ PyCall::List.call(py_generator)
39
36
  end
40
37
 
41
38
  # See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
42
39
  class Doc
43
-
44
40
  # @return [Object] a Python `Language` instance accessible via `PyCall`
45
41
  attr_reader :py_nlp
46
42
 
@@ -52,23 +48,19 @@ module Spacy
52
48
 
53
49
  include Enumerable
54
50
 
55
- alias_method :length, :count
56
- alias_method :len, :count
57
- alias_method :size, :count
51
+ alias length count
52
+ alias len count
53
+ alias size count
58
54
 
59
- # It is recommended to use {Language#read} method to create a doc. If you need to
60
- # create one using {Doc#initialize}, there are two method signatures:
55
+ # It is recommended to use {Language#read} method to create a doc. If you need to
56
+ # create one using {Doc#initialize}, there are two method signatures:
61
57
  # `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
62
58
  # @param nlp [Language] an instance of {Language} class
63
59
  # @param py_doc [Object] an instance of Python `Doc` class
64
60
  # @param text [String] the text string to be analyzed
65
61
  def initialize(nlp, py_doc: nil, text: nil)
66
62
  @py_nlp = nlp
67
- if py_doc
68
- @py_doc = py_doc
69
- else
70
- @py_doc = nlp.(text)
71
- end
63
+ @py_doc = py_doc || @py_doc = nlp.call(text)
72
64
  @text = @py_doc.text
73
65
  end
74
66
 
@@ -77,25 +69,25 @@ module Spacy
77
69
  # @param end_index [Integer] the end position of the span to be retokenized in the document
78
70
  # @param attributes [Hash] attributes to set on the merged token
79
71
  def retokenize(start_index, end_index, attributes = {})
80
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
81
- retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
72
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
73
+ retokenizer.merge(@py_doc[start_index..end_index], attrs: attributes)
82
74
  end
83
75
  end
84
76
 
85
77
  # Retokenizes the text splitting the specified token.
86
78
  # @param pos_in_doc [Integer] the position of the span to be retokenized in the document
87
- # @param split_array [Array<String>] text strings of the split results
79
+ # @param split_array [Array<String>] text strings of the split results
88
80
  # @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
89
81
  # @param attributes [Hash] the attributes of the split elements
90
82
  def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
91
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
83
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
92
84
  heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
93
85
  retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
94
86
  end
95
87
  end
96
88
 
97
89
  # String representation of the document.
98
- # @return [String]
90
+ # @return [String]
99
91
  def to_s
100
92
  @text
101
93
  end
@@ -104,7 +96,7 @@ module Spacy
104
96
  # @return [Array<Token>]
105
97
  def tokens
106
98
  results = []
107
- PyCall::List.(@py_doc).each do |py_token|
99
+ PyCall::List.call(@py_doc).each do |py_token|
108
100
  results << Token.new(py_token)
109
101
  end
110
102
  results
@@ -112,12 +104,12 @@ module Spacy
112
104
 
113
105
  # Iterates over the elements in the doc yielding a token instance each time.
114
106
  def each
115
- PyCall::List.(@py_doc).each do |py_token|
107
+ PyCall::List.call(@py_doc).each do |py_token|
116
108
  yield Token.new(py_token)
117
109
  end
118
110
  end
119
111
 
120
- # Returns a span of the specified range within the doc.
112
+ # Returns a span of the specified range within the doc.
121
113
  # The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
122
114
  # @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
123
115
  # @param optional_size [Integer] an integer representing the size of the span
@@ -125,7 +117,7 @@ module Spacy
125
117
  def span(range_or_start, optional_size = nil)
126
118
  if optional_size
127
119
  start_index = range_or_start
128
- temp = tokens[start_index ... start_index + optional_size]
120
+ temp = tokens[start_index...start_index + optional_size]
129
121
  else
130
122
  start_index = range_or_start.first
131
123
  range = range_or_start
@@ -141,7 +133,7 @@ module Spacy
141
133
  # @return [Array<Span>]
142
134
  def noun_chunks
143
135
  chunk_array = []
144
- py_chunks = PyCall::List.(@py_doc.noun_chunks)
136
+ py_chunks = PyCall::List.call(@py_doc.noun_chunks)
145
137
  py_chunks.each do |py_chunk|
146
138
  chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
147
139
  end
@@ -152,7 +144,7 @@ module Spacy
152
144
  # @return [Array<Span>]
153
145
  def sents
154
146
  sentence_array = []
155
- py_sentences = PyCall::List.(@py_doc.sents)
147
+ py_sentences = PyCall::List.call(@py_doc.sents)
156
148
  py_sentences.each do |py_sent|
157
149
  sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
158
150
  end
@@ -164,9 +156,9 @@ module Spacy
164
156
  def ents
165
157
  # so that ents canbe "each"-ed in Ruby
166
158
  ent_array = []
167
- PyCall::List.(@py_doc.ents).each do |ent|
159
+ PyCall::List.call(@py_doc.ents).each do |ent|
168
160
  ent.define_singleton_method :label do
169
- return self.label_
161
+ label_
170
162
  end
171
163
  ent_array << ent
172
164
  end
@@ -178,15 +170,15 @@ module Spacy
178
170
  def [](range)
179
171
  if range.is_a?(Range)
180
172
  py_span = @py_doc[range]
181
- return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
173
+ Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
182
174
  else
183
- return Token.new(@py_doc[range])
175
+ Token.new(@py_doc[range])
184
176
  end
185
177
  end
186
178
 
187
179
  # Returns a semantic similarity estimate.
188
180
  # @param other [Doc] the other doc to which a similarity estimation is made
189
- # @return [Float]
181
+ # @return [Float]
190
182
  def similarity(other)
191
183
  py_doc.similarity(other.py_doc)
192
184
  end
@@ -196,18 +188,21 @@ module Spacy
196
188
  # @param compact [Boolean] only relevant to the `dep' style
197
189
  # @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
198
190
  def displacy(style: "dep", compact: false)
199
- PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
191
+ PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
200
192
  end
201
193
 
202
194
  # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
203
195
  def method_missing(name, *args)
204
196
  @py_doc.send(name, *args)
205
197
  end
198
+
199
+ def respond_to_missing?(sym)
200
+ sym ? true : super
201
+ end
206
202
  end
207
203
 
208
204
  # See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
209
205
  class Language
210
-
211
206
  # @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
212
207
  attr_reader :spacy_nlp_id
213
208
 
@@ -245,7 +240,7 @@ module Spacy
245
240
  # @return [Array<String>] An array of text strings representing pipeline components
246
241
  def pipe_names
247
242
  pipe_array = []
248
- PyCall::List.(@py_nlp.pipe_names).each do |pipe|
243
+ PyCall::List.call(@py_nlp.pipe_names).each do |pipe|
249
244
  pipe_array << pipe
250
245
  end
251
246
  pipe_array
@@ -268,18 +263,25 @@ module Spacy
268
263
  # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
269
264
  # @param vector [Object] A vector representation of a word (whether existing or non-existing)
270
265
  # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
271
- def most_similar(vector, n)
266
+ def most_similar(vector, num)
272
267
  vec_array = Numpy.asarray([vector])
273
- py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
274
- key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
275
- keys = key_texts.map{|kt| kt[0]}
276
- texts = key_texts.map{|kt| kt[1]}
277
- best_rows = PyCall::List.(py_result[1])[0]
278
- scores = PyCall::List.(py_result[2])[0]
268
+ py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: num)
269
+ key_texts = PyCall.eval("[[str(num), #{@spacy_nlp_id}.vocab[num].text] for num in #{py_result[0][0].tolist}]")
270
+ keys = key_texts.map { |kt| kt[0] }
271
+ texts = key_texts.map { |kt| kt[1] }
272
+ best_rows = PyCall::List.call(py_result[1])[0]
273
+ scores = PyCall::List.call(py_result[2])[0]
279
274
 
280
275
  results = []
281
- n.times do |i|
282
- results << {key: keys[i].to_i, text: texts[i], best_row: best_rows[i], score: scores[i]}
276
+ num.times do |i|
277
+ result = { key: keys[i].to_i,
278
+ text: texts[i],
279
+ best_row: best_rows[i],
280
+ score: scores[i] }
281
+ result.each_key do |key|
282
+ result.define_singleton_method(key) { result[key] }
283
+ end
284
+ results << result
283
285
  end
284
286
  results
285
287
  end
@@ -289,9 +291,9 @@ module Spacy
289
291
  # @param disable [Array<String>]
290
292
  # @param batch_size [Integer]
291
293
  # @return [Array<Doc>]
292
- def pipe(texts, disable: [], batch_size: 50)
294
+ def pipe(texts, disable: [], batch_size: 50)
293
295
  docs = []
294
- PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
296
+ PyCall::List.call(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
295
297
  docs << Doc.new(@py_nlp, py_doc: py_doc)
296
298
  end
297
299
  docs
@@ -301,18 +303,21 @@ module Spacy
301
303
  def method_missing(name, *args)
302
304
  @py_nlp.send(name, *args)
303
305
  end
306
+
307
+ def respond_to_missing?(sym)
308
+ sym ? true : super
309
+ end
304
310
  end
305
311
 
306
312
  # See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
307
313
  class Matcher
308
-
309
314
  # @return [Object] a Python `Matcher` instance accessible via `PyCall`
310
315
  attr_reader :py_matcher
311
316
 
312
317
  # Creates a {Matcher} instance
313
318
  # @param nlp [Language] an instance of {Language} class
314
319
  def initialize(nlp)
315
- @py_matcher = PyMatcher.(nlp.vocab)
320
+ @py_matcher = PyMatcher.call(nlp.vocab)
316
321
  end
317
322
 
318
323
  # Adds a label string and a text pattern.
@@ -326,16 +331,17 @@ module Spacy
326
331
  # @param doc [Doc] an {Doc} instance
327
332
  # @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
328
333
  def match(doc)
329
- str_results = @py_matcher.(doc.py_doc).to_s
334
+ str_results = @py_matcher.call(doc.py_doc).to_s
330
335
  s = StringScanner.new(str_results[1..-2])
331
336
  results = []
332
337
  while s.scan_until(/(\d+), (\d+), (\d+)/)
333
338
  next unless s.matched
339
+
334
340
  triple = s.matched.split(", ")
335
341
  match_id = triple[0].to_i
336
342
  start_index = triple[1].to_i
337
343
  end_index = triple[2].to_i - 1
338
- results << {match_id: match_id, start_index: start_index, end_index: end_index}
344
+ results << { match_id: match_id, start_index: start_index, end_index: end_index }
339
345
  end
340
346
  results
341
347
  end
@@ -343,7 +349,6 @@ module Spacy
343
349
 
344
350
  # See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
345
351
  class Span
346
-
347
352
  # @return [Object] a Python `Span` instance accessible via `PyCall`
348
353
  attr_reader :py_span
349
354
 
@@ -352,11 +357,11 @@ module Spacy
352
357
 
353
358
  include Enumerable
354
359
 
355
- alias_method :length, :count
356
- alias_method :len, :count
357
- alias_method :size, :count
360
+ alias length count
361
+ alias len count
362
+ alias size count
358
363
 
359
- # It is recommended to use {Doc#span} method to create a span. If you need to
364
+ # It is recommended to use {Doc#span} method to create a span. If you need to
360
365
  # create one using {Span#initialize}, there are two method signatures:
361
366
  # `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
362
367
  # @param doc [Doc] the document to which this span belongs to
@@ -365,18 +370,14 @@ module Spacy
365
370
  # @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
366
371
  def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
367
372
  @doc = doc
368
- if py_span
369
- @py_span = py_span
370
- else
371
- @py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
372
- end
373
+ @py_span = py_span || @py_span = PySpan.call(@doc.py_doc, start_index, end_index + 1, options)
373
374
  end
374
375
 
375
376
  # Returns an array of tokens contained in the span.
376
377
  # @return [Array<Token>]
377
378
  def tokens
378
379
  results = []
379
- PyCall::List.(@py_span).each do |py_token|
380
+ PyCall::List.call(@py_span).each do |py_token|
380
381
  results << Token.new(py_token)
381
382
  end
382
383
  results
@@ -384,7 +385,7 @@ module Spacy
384
385
 
385
386
  # Iterates over the elements in the span yielding a token instance each time.
386
387
  def each
387
- PyCall::List.(@py_span).each do |py_token|
388
+ PyCall::List.call(@py_span).each do |py_token|
388
389
  yield Token.new(py_token)
389
390
  end
390
391
  end
@@ -393,7 +394,7 @@ module Spacy
393
394
  # @return [Array<Span>]
394
395
  def noun_chunks
395
396
  chunk_array = []
396
- py_chunks = PyCall::List.(@py_span.noun_chunks)
397
+ py_chunks = PyCall::List.call(@py_span.noun_chunks)
397
398
  py_chunks.each do |py_span|
398
399
  chunk_array << Span.new(@doc, py_span: py_span)
399
400
  end
@@ -402,7 +403,7 @@ module Spacy
402
403
 
403
404
  # Returns the head token
404
405
  # @return [Token]
405
- def root
406
+ def root
406
407
  Token.new(@py_span.root)
407
408
  end
408
409
 
@@ -410,7 +411,7 @@ module Spacy
410
411
  # @return [Array<Span>]
411
412
  def sents
412
413
  sentence_array = []
413
- py_sentences = PyCall::List.(@py_span.sents)
414
+ py_sentences = PyCall::List.call(@py_span.sents)
414
415
  py_sentences.each do |py_span|
415
416
  sentence_array << Span.new(@doc, py_span: py_span)
416
417
  end
@@ -421,7 +422,7 @@ module Spacy
421
422
  # @return [Array<Span>]
422
423
  def ents
423
424
  ent_array = []
424
- PyCall::List.(@py_span.ents).each do |py_span|
425
+ PyCall::List.call(@py_span.ents).each do |py_span|
425
426
  ent_array << Span.new(@doc, py_span: py_span)
426
427
  end
427
428
  ent_array
@@ -430,8 +431,8 @@ module Spacy
430
431
  # Returns a span that represents the sentence that the given span is part of.
431
432
  # @return [Span]
432
433
  def sent
433
- py_span = @py_span.sent
434
- return Span.new(@doc, py_span: py_span)
434
+ py_span = @py_span.sent
435
+ Span.new(@doc, py_span: py_span)
435
436
  end
436
437
 
437
438
  # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -439,67 +440,67 @@ module Spacy
439
440
  def [](range)
440
441
  if range.is_a?(Range)
441
442
  py_span = @py_span[range]
442
- return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
443
+ Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
443
444
  else
444
- return Token.new(@py_span[range])
445
+ Token.new(@py_span[range])
445
446
  end
446
447
  end
447
448
 
448
449
  # Returns a semantic similarity estimate.
449
450
  # @param other [Span] the other span to which a similarity estimation is conducted
450
- # @return [Float]
451
+ # @return [Float]
451
452
  def similarity(other)
452
453
  py_span.similarity(other.py_span)
453
454
  end
454
455
 
455
456
  # Creates a document instance from the span
456
- # @return [Doc]
457
+ # @return [Doc]
457
458
  def as_doc
458
- Doc.new(@doc.py_nlp, text: self.text)
459
+ Doc.new(@doc.py_nlp, text: text)
459
460
  end
460
461
 
461
462
  # Returns tokens conjugated to the root of the span.
462
463
  # @return [Array<Token>] an array of tokens
463
464
  def conjuncts
464
465
  conjunct_array = []
465
- PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
466
+ PyCall::List.call(@py_span.conjuncts).each do |py_conjunct|
466
467
  conjunct_array << Token.new(py_conjunct)
467
468
  end
468
469
  conjunct_array
469
470
  end
470
471
 
471
472
  # Returns tokens that are to the left of the span, whose heads are within the span.
472
- # @return [Array<Token>] an array of tokens
473
+ # @return [Array<Token>] an array of tokens
473
474
  def lefts
474
475
  left_array = []
475
- PyCall::List.(@py_span.lefts).each do |py_left|
476
+ PyCall::List.call(@py_span.lefts).each do |py_left|
476
477
  left_array << Token.new(py_left)
477
478
  end
478
479
  left_array
479
480
  end
480
481
 
481
482
  # Returns Tokens that are to the right of the span, whose heads are within the span.
482
- # @return [Array<Token>] an array of Tokens
483
+ # @return [Array<Token>] an array of Tokens
483
484
  def rights
484
485
  right_array = []
485
- PyCall::List.(@py_span.rights).each do |py_right|
486
+ PyCall::List.call(@py_span.rights).each do |py_right|
486
487
  right_array << Token.new(py_right)
487
488
  end
488
489
  right_array
489
490
  end
490
491
 
491
492
  # Returns Tokens that are within the span and tokens that descend from them.
492
- # @return [Array<Token>] an array of tokens
493
+ # @return [Array<Token>] an array of tokens
493
494
  def subtree
494
495
  subtree_array = []
495
- PyCall::List.(@py_span.subtree).each do |py_subtree|
496
+ PyCall::List.call(@py_span.subtree).each do |py_subtree|
496
497
  subtree_array << Token.new(py_subtree)
497
498
  end
498
499
  subtree_array
499
500
  end
500
501
 
501
502
  # Returns the label
502
- # @return [String]
503
+ # @return [String]
503
504
  def label
504
505
  @py_span.label_
505
506
  end
@@ -508,11 +509,14 @@ module Spacy
508
509
  def method_missing(name, *args)
509
510
  @py_span.send(name, *args)
510
511
  end
512
+
513
+ def respond_to_missing?(sym)
514
+ sym ? true : super
515
+ end
511
516
  end
512
517
 
513
518
  # See also spaCy Python API document for [`Token`](https://spacy.io/api/token).
514
519
  class Token
515
-
516
520
  # @return [Object] a Python `Token` instance accessible via `PyCall`
517
521
  attr_reader :py_token
518
522
 
@@ -520,17 +524,16 @@ module Spacy
520
524
  attr_reader :text
521
525
 
522
526
  # It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
523
- # There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
527
+ # There is no way to generate a token from scratch but relying on a pre-exising Python `Token` object.
524
528
  # @param py_token [Object] Python `Token` object
525
529
  def initialize(py_token)
526
530
  @py_token = py_token
527
531
  @text = @py_token.text
528
532
  end
529
533
 
530
-
531
534
  # Returns the head token
532
535
  # @return [Token]
533
- def head
536
+ def head
534
537
  Token.new(@py_token.head)
535
538
  end
536
539
 
@@ -538,7 +541,7 @@ module Spacy
538
541
  # @return [Array<Token>] an array of tokens
539
542
  def subtree
540
543
  descendant_array = []
541
- PyCall::List.(@py_token.subtree).each do |descendant|
544
+ PyCall::List.call(@py_token.subtree).each do |descendant|
542
545
  descendant_array << Token.new(descendant)
543
546
  end
544
547
  descendant_array
@@ -548,7 +551,7 @@ module Spacy
548
551
  # @return [Array<Token>] an array of tokens
549
552
  def ancestors
550
553
  ancestor_array = []
551
- PyCall::List.(@py_token.ancestors).each do |ancestor|
554
+ PyCall::List.call(@py_token.ancestors).each do |ancestor|
552
555
  ancestor_array << Token.new(ancestor)
553
556
  end
554
557
  ancestor_array
@@ -558,7 +561,7 @@ module Spacy
558
561
  # @return [Array<Token>] an array of tokens
559
562
  def children
560
563
  child_array = []
561
- PyCall::List.(@py_token.children).each do |child|
564
+ PyCall::List.call(@py_token.children).each do |child|
562
565
  child_array << Token.new(child)
563
566
  end
564
567
  child_array
@@ -568,7 +571,7 @@ module Spacy
568
571
  # @return [Array<Token>] an array of tokens
569
572
  def lefts
570
573
  token_array = []
571
- PyCall::List.(@py_token.lefts).each do |token|
574
+ PyCall::List.call(@py_token.lefts).each do |token|
572
575
  token_array << Token.new(token)
573
576
  end
574
577
  token_array
@@ -578,89 +581,87 @@ module Spacy
578
581
  # @return [Array<Token>] an array of tokens
579
582
  def rights
580
583
  token_array = []
581
- PyCall::List.(@py_token.rights).each do |token|
584
+ PyCall::List.call(@py_token.rights).each do |token|
582
585
  token_array << Token.new(token)
583
586
  end
584
587
  token_array
585
588
  end
586
589
 
587
590
  # String representation of the token.
588
- # @return [String]
591
+ # @return [String]
589
592
  def to_s
590
593
  @text
591
594
  end
592
595
 
593
596
  # Returns a hash or string of morphological information
594
597
  # @param hash [Boolean] if true, a hash will be returned instead of a string
595
- # @return [Hash, String]
596
- def morphology(hash = true)
598
+ # @return [Hash, String]
599
+ def morphology(hash: true)
597
600
  if @py_token.has_morph
598
601
  morph_analysis = @py_token.morph
599
- if hash
600
- return morph_analysis.to_dict
601
- else
602
- return morph_analysis.to_s
603
- end
604
- else
605
602
  if hash
606
- results = {}
603
+ morph_analysis.to_dict
607
604
  else
608
- return ""
605
+ morph_analysis.to_s
609
606
  end
607
+ elsif hash
608
+ {}
609
+ else
610
+ ""
610
611
  end
611
612
  end
612
613
 
613
614
  # Returns the lemma by calling `lemma_' of `@py_token` object
614
- # @return [String]
615
+ # @return [String]
615
616
  def lemma
616
617
  @py_token.lemma_
617
618
  end
618
619
 
619
620
  # Returns the lowercase form by calling `lower_' of `@py_token` object
620
- # @return [String]
621
+ # @return [String]
621
622
  def lower
622
623
  @py_token.lower_
623
624
  end
624
625
 
625
626
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
626
- # @return [String]
627
+ # @return [String]
627
628
  def shape
628
629
  @py_token.shape_
629
630
  end
630
631
 
631
632
  # Returns the pos by calling `pos_' of `@py_token` object
632
- # @return [String]
633
+ # @return [String]
633
634
  def pos
634
635
  @py_token.pos_
635
636
  end
636
637
 
637
638
  # Returns the fine-grained pos by calling `tag_' of `@py_token` object
638
- # @return [String]
639
- def tag
639
+ # @return [String]
640
+ def tag
640
641
  @py_token.tag_
641
642
  end
642
643
 
643
644
  # Returns the dependency relation by calling `dep_' of `@py_token` object
644
- # @return [String]
645
+ # @return [String]
645
646
  def dep
646
647
  @py_token.dep_
647
648
  end
648
-
649
+
649
650
  # Returns the language by calling `lang_' of `@py_token` object
650
- # @return [String]
651
- def lang
651
+ # @return [String]
652
+ def lang
652
653
  @py_token.lang_
653
654
  end
654
655
 
655
656
  # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
656
- # @return [String]
657
- def whitespace
657
+ # @return [String]
658
+ def whitespace
658
659
  @py_token.whitespace_
659
660
  end
660
661
 
661
662
  # Returns the named entity type by calling `ent_type_' of `@py_token` object
662
- # @return [String]
663
- def ent_type
663
+ # @return [String]
664
+ def ent_type
664
665
  @py_token.ent_type_
665
666
  end
666
667
 
@@ -674,11 +675,14 @@ module Spacy
674
675
  def method_missing(name, *args)
675
676
  @py_token.send(name, *args)
676
677
  end
678
+
679
+ def respond_to_missing?(sym)
680
+ sym ? true : super
681
+ end
677
682
  end
678
683
 
679
684
  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
680
- class Lexeme
681
-
685
+ class Lexeme
682
686
  # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
683
687
  attr_reader :py_lexeme
684
688
 
@@ -694,50 +698,50 @@ module Spacy
694
698
  end
695
699
 
696
700
  # String representation of the token.
697
- # @return [String]
701
+ # @return [String]
698
702
  def to_s
699
703
  @text
700
704
  end
701
705
 
702
706
  # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
703
- # @return [String]
707
+ # @return [String]
704
708
  def lower
705
709
  @py_lexeme.lower_
706
710
  end
707
711
 
708
712
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
709
- # @return [String]
713
+ # @return [String]
710
714
  def shape
711
715
  @py_lexeme.shape_
712
716
  end
713
717
 
714
718
  # Returns the language by calling `lang_' of `@py_lexeme` object
715
- # @return [String]
716
- def lang
719
+ # @return [String]
720
+ def lang
717
721
  @py_lexeme.lang_
718
722
  end
719
723
 
720
724
  # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
721
- # @return [String]
722
- def prefix
725
+ # @return [String]
726
+ def prefix
723
727
  @py_lexeme.prefix_
724
728
  end
725
- #
729
+
726
730
  # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
727
- # @return [String]
731
+ # @return [String]
728
732
  def suffix
729
733
  @py_lexeme.suffix_
730
734
  end
731
735
 
732
736
  # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
733
- # @return [String]
737
+ # @return [String]
734
738
  def norm
735
739
  @py_lexeme.norm_
736
740
  end
737
741
 
738
742
  # Returns a semantic similarity estimate.
739
- # @param other [Lexeme] the other doc to which a similarity estimation is made
740
- # @return [Float]
743
+ # @param other [Lexeme] the other lexeme to which a similarity estimation is made
744
+ # @return [Float]
741
745
  def similarity(other)
742
746
  @py_lexeme.similarity(other.py_lexeme)
743
747
  end
@@ -746,7 +750,9 @@ module Spacy
746
750
  def method_missing(name, *args)
747
751
  @py_lexeme.send(name, *args)
748
752
  end
749
- end
750
753
 
754
+ def respond_to_missing?(sym)
755
+ sym ? true : super
756
+ end
757
+ end
751
758
  end
752
-