ruby-spacy 0.1.4.1 → 0.1.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +48 -0
  4. data/.solargraph.yml +22 -0
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +88 -9
  7. data/README.md +7 -10
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +3 -1
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +3 -1
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +147 -142
  58. data/ruby-spacy.gemspec +15 -17
  59. metadata +68 -10
data/lib/ruby-spacy.rb CHANGED
@@ -1,17 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "ruby-spacy/version"
4
- require 'enumerator'
5
- require 'strscan'
6
- require 'numpy'
7
- require 'pycall/import'
8
- include PyCall::Import
4
+ require "strscan"
5
+ require "numpy"
6
+ require "pycall"
9
7
 
10
8
  # This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
11
9
  module Spacy
10
+ MAX_RETRIAL = 20
12
11
 
13
- extend PyCall::Import
14
- spacy = PyCall.import_module('spacy')
12
+ spacy = PyCall.import_module("spacy")
15
13
 
16
14
  # Python `Language` class
17
15
  PyLanguage = spacy.language.Language
@@ -24,23 +22,22 @@ module Spacy
24
22
 
25
23
  # Python `Token` class object
26
24
  PyToken = spacy.tokens.Token
27
-
25
+
28
26
  # Python `Matcher` class object
29
27
  PyMatcher = spacy.matcher.Matcher
30
28
 
31
29
  # Python `displacy` object
32
30
  PyDisplacy = spacy.displacy
33
31
 
34
- # A utility module method to convert Python's generator object to a Ruby array,
32
+ # A utility module method to convert Python's generator object to a Ruby array,
35
33
  # mainly used on the items inside the array returned from dependency-related methods
36
34
  # such as {Span#rights}, {Span#lefts} and {Span#subtree}.
37
35
  def self.generator_to_array(py_generator)
38
- PyCall::List.(py_generator)
36
+ PyCall::List.call(py_generator)
39
37
  end
40
38
 
41
39
  # See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
42
40
  class Doc
43
-
44
41
  # @return [Object] a Python `Language` instance accessible via `PyCall`
45
42
  attr_reader :py_nlp
46
43
 
@@ -52,23 +49,19 @@ module Spacy
52
49
 
53
50
  include Enumerable
54
51
 
55
- alias_method :length, :count
56
- alias_method :len, :count
57
- alias_method :size, :count
52
+ alias length count
53
+ alias len count
54
+ alias size count
58
55
 
59
- # It is recommended to use {Language#read} method to create a doc. If you need to
60
- # create one using {Doc#initialize}, there are two method signatures:
56
+ # It is recommended to use {Language#read} method to create a doc. If you need to
57
+ # create one using {Doc#initialize}, there are two method signatures:
61
58
  # `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
62
59
  # @param nlp [Language] an instance of {Language} class
63
60
  # @param py_doc [Object] an instance of Python `Doc` class
64
61
  # @param text [String] the text string to be analyzed
65
62
  def initialize(nlp, py_doc: nil, text: nil)
66
63
  @py_nlp = nlp
67
- if py_doc
68
- @py_doc = py_doc
69
- else
70
- @py_doc = nlp.(text)
71
- end
64
+ @py_doc = py_doc || @py_doc = nlp.call(text)
72
65
  @text = @py_doc.text
73
66
  end
74
67
 
@@ -77,25 +70,25 @@ module Spacy
77
70
  # @param end_index [Integer] the end position of the span to be retokenized in the document
78
71
  # @param attributes [Hash] attributes to set on the merged token
79
72
  def retokenize(start_index, end_index, attributes = {})
80
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
81
- retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
73
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
74
+ retokenizer.merge(@py_doc[start_index..end_index], attrs: attributes)
82
75
  end
83
76
  end
84
77
 
85
78
  # Retokenizes the text splitting the specified token.
86
79
  # @param pos_in_doc [Integer] the position of the span to be retokenized in the document
87
- # @param split_array [Array<String>] text strings of the split results
80
+ # @param split_array [Array<String>] text strings of the split results
88
81
  # @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
89
82
  # @param attributes [Hash] the attributes of the split elements
90
83
  def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
91
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
84
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
92
85
  heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
93
86
  retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
94
87
  end
95
88
  end
96
89
 
97
90
  # String representation of the document.
98
- # @return [String]
91
+ # @return [String]
99
92
  def to_s
100
93
  @text
101
94
  end
@@ -104,7 +97,7 @@ module Spacy
104
97
  # @return [Array<Token>]
105
98
  def tokens
106
99
  results = []
107
- PyCall::List.(@py_doc).each do |py_token|
100
+ PyCall::List.call(@py_doc).each do |py_token|
108
101
  results << Token.new(py_token)
109
102
  end
110
103
  results
@@ -112,12 +105,12 @@ module Spacy
112
105
 
113
106
  # Iterates over the elements in the doc yielding a token instance each time.
114
107
  def each
115
- PyCall::List.(@py_doc).each do |py_token|
108
+ PyCall::List.call(@py_doc).each do |py_token|
116
109
  yield Token.new(py_token)
117
110
  end
118
111
  end
119
112
 
120
- # Returns a span of the specified range within the doc.
113
+ # Returns a span of the specified range within the doc.
121
114
  # The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
122
115
  # @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
123
116
  # @param optional_size [Integer] an integer representing the size of the span
@@ -125,7 +118,7 @@ module Spacy
125
118
  def span(range_or_start, optional_size = nil)
126
119
  if optional_size
127
120
  start_index = range_or_start
128
- temp = tokens[start_index ... start_index + optional_size]
121
+ temp = tokens[start_index...start_index + optional_size]
129
122
  else
130
123
  start_index = range_or_start.first
131
124
  range = range_or_start
@@ -141,7 +134,7 @@ module Spacy
141
134
  # @return [Array<Span>]
142
135
  def noun_chunks
143
136
  chunk_array = []
144
- py_chunks = PyCall::List.(@py_doc.noun_chunks)
137
+ py_chunks = PyCall::List.call(@py_doc.noun_chunks)
145
138
  py_chunks.each do |py_chunk|
146
139
  chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
147
140
  end
@@ -152,7 +145,7 @@ module Spacy
152
145
  # @return [Array<Span>]
153
146
  def sents
154
147
  sentence_array = []
155
- py_sentences = PyCall::List.(@py_doc.sents)
148
+ py_sentences = PyCall::List.call(@py_doc.sents)
156
149
  py_sentences.each do |py_sent|
157
150
  sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
158
151
  end
@@ -164,9 +157,9 @@ module Spacy
164
157
  def ents
165
158
  # so that ents canbe "each"-ed in Ruby
166
159
  ent_array = []
167
- PyCall::List.(@py_doc.ents).each do |ent|
160
+ PyCall::List.call(@py_doc.ents).each do |ent|
168
161
  ent.define_singleton_method :label do
169
- return self.label_
162
+ label_
170
163
  end
171
164
  ent_array << ent
172
165
  end
@@ -178,15 +171,15 @@ module Spacy
178
171
  def [](range)
179
172
  if range.is_a?(Range)
180
173
  py_span = @py_doc[range]
181
- return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
174
+ Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
182
175
  else
183
- return Token.new(@py_doc[range])
176
+ Token.new(@py_doc[range])
184
177
  end
185
178
  end
186
179
 
187
180
  # Returns a semantic similarity estimate.
188
181
  # @param other [Doc] the other doc to which a similarity estimation is made
189
- # @return [Float]
182
+ # @return [Float]
190
183
  def similarity(other)
191
184
  py_doc.similarity(other.py_doc)
192
185
  end
@@ -196,18 +189,21 @@ module Spacy
196
189
  # @param compact [Boolean] only relevant to the `dep' style
197
190
  # @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
198
191
  def displacy(style: "dep", compact: false)
199
- PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
192
+ PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
200
193
  end
201
194
 
202
195
  # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
203
196
  def method_missing(name, *args)
204
197
  @py_doc.send(name, *args)
205
198
  end
199
+
200
+ def respond_to_missing?(sym)
201
+ sym ? true : super
202
+ end
206
203
  end
207
204
 
208
205
  # See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
209
206
  class Language
210
-
211
207
  # @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
212
208
  attr_reader :spacy_nlp_id
213
209
 
@@ -216,10 +212,16 @@ module Spacy
216
212
 
217
213
  # Creates a language model instance, which is conventionally referred to by a variable named `nlp`.
218
214
  # @param model [String] A language model installed in the system
219
- def initialize(model = "en_core_web_sm")
215
+ def initialize(model = "en_core_web_sm", max_retrial = MAX_RETRIAL, retrial = 0)
220
216
  @spacy_nlp_id = "nlp_#{model.object_id}"
221
217
  PyCall.exec("import spacy; #{@spacy_nlp_id} = spacy.load('#{model}')")
222
218
  @py_nlp = PyCall.eval(@spacy_nlp_id)
219
+ rescue StandardError
220
+ retrial += 1
221
+ raise "Error: Pycall failed to load Spacy" unless retrial <= max_retrial
222
+
223
+ sleep 0.5
224
+ initialize(model, max_retrial, retrial)
223
225
  end
224
226
 
225
227
  # Reads and analyze the given text.
@@ -245,7 +247,7 @@ module Spacy
245
247
  # @return [Array<String>] An array of text strings representing pipeline components
246
248
  def pipe_names
247
249
  pipe_array = []
248
- PyCall::List.(@py_nlp.pipe_names).each do |pipe|
250
+ PyCall::List.call(@py_nlp.pipe_names).each do |pipe|
249
251
  pipe_array << pipe
250
252
  end
251
253
  pipe_array
@@ -268,24 +270,23 @@ module Spacy
268
270
  # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
269
271
  # @param vector [Object] A vector representation of a word (whether existing or non-existing)
270
272
  # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
271
- def most_similar(vector, n)
273
+ def most_similar(vector, num)
272
274
  vec_array = Numpy.asarray([vector])
273
- py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
274
- key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
275
- keys = key_texts.map{|kt| kt[0]}
276
- texts = key_texts.map{|kt| kt[1]}
277
- best_rows = PyCall::List.(py_result[1])[0]
278
- scores = PyCall::List.(py_result[2])[0]
275
+ py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: num)
276
+ key_texts = PyCall.eval("[[str(num), #{@spacy_nlp_id}.vocab[num].text] for num in #{py_result[0][0].tolist}]")
277
+ keys = key_texts.map { |kt| kt[0] }
278
+ texts = key_texts.map { |kt| kt[1] }
279
+ best_rows = PyCall::List.call(py_result[1])[0]
280
+ scores = PyCall::List.call(py_result[2])[0]
279
281
 
280
282
  results = []
281
- n.times do |i|
282
- result = {key: keys[i].to_i,
283
- text: texts[i],
284
- best_row: best_rows[i],
285
- score: scores[i]
286
- }
283
+ num.times do |i|
284
+ result = { key: keys[i].to_i,
285
+ text: texts[i],
286
+ best_row: best_rows[i],
287
+ score: scores[i] }
287
288
  result.each_key do |key|
288
- result.define_singleton_method(key){ result[key] }
289
+ result.define_singleton_method(key) { result[key] }
289
290
  end
290
291
  results << result
291
292
  end
@@ -297,9 +298,9 @@ module Spacy
297
298
  # @param disable [Array<String>]
298
299
  # @param batch_size [Integer]
299
300
  # @return [Array<Doc>]
300
- def pipe(texts, disable: [], batch_size: 50)
301
+ def pipe(texts, disable: [], batch_size: 50)
301
302
  docs = []
302
- PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
303
+ PyCall::List.call(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
303
304
  docs << Doc.new(@py_nlp, py_doc: py_doc)
304
305
  end
305
306
  docs
@@ -309,18 +310,21 @@ module Spacy
309
310
  def method_missing(name, *args)
310
311
  @py_nlp.send(name, *args)
311
312
  end
313
+
314
+ def respond_to_missing?(sym)
315
+ sym ? true : super
316
+ end
312
317
  end
313
318
 
314
319
  # See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
315
320
  class Matcher
316
-
317
321
  # @return [Object] a Python `Matcher` instance accessible via `PyCall`
318
322
  attr_reader :py_matcher
319
323
 
320
324
  # Creates a {Matcher} instance
321
325
  # @param nlp [Language] an instance of {Language} class
322
326
  def initialize(nlp)
323
- @py_matcher = PyMatcher.(nlp.vocab)
327
+ @py_matcher = PyMatcher.call(nlp.vocab)
324
328
  end
325
329
 
326
330
  # Adds a label string and a text pattern.
@@ -334,16 +338,17 @@ module Spacy
334
338
  # @param doc [Doc] an {Doc} instance
335
339
  # @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
336
340
  def match(doc)
337
- str_results = @py_matcher.(doc.py_doc).to_s
341
+ str_results = @py_matcher.call(doc.py_doc).to_s
338
342
  s = StringScanner.new(str_results[1..-2])
339
343
  results = []
340
344
  while s.scan_until(/(\d+), (\d+), (\d+)/)
341
345
  next unless s.matched
346
+
342
347
  triple = s.matched.split(", ")
343
348
  match_id = triple[0].to_i
344
349
  start_index = triple[1].to_i
345
350
  end_index = triple[2].to_i - 1
346
- results << {match_id: match_id, start_index: start_index, end_index: end_index}
351
+ results << { match_id: match_id, start_index: start_index, end_index: end_index }
347
352
  end
348
353
  results
349
354
  end
@@ -351,7 +356,6 @@ module Spacy
351
356
 
352
357
  # See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
353
358
  class Span
354
-
355
359
  # @return [Object] a Python `Span` instance accessible via `PyCall`
356
360
  attr_reader :py_span
357
361
 
@@ -360,11 +364,11 @@ module Spacy
360
364
 
361
365
  include Enumerable
362
366
 
363
- alias_method :length, :count
364
- alias_method :len, :count
365
- alias_method :size, :count
367
+ alias length count
368
+ alias len count
369
+ alias size count
366
370
 
367
- # It is recommended to use {Doc#span} method to create a span. If you need to
371
+ # It is recommended to use {Doc#span} method to create a span. If you need to
368
372
  # create one using {Span#initialize}, there are two method signatures:
369
373
  # `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
370
374
  # @param doc [Doc] the document to which this span belongs to
@@ -373,18 +377,14 @@ module Spacy
373
377
  # @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
374
378
  def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
375
379
  @doc = doc
376
- if py_span
377
- @py_span = py_span
378
- else
379
- @py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
380
- end
380
+ @py_span = py_span || @py_span = PySpan.call(@doc.py_doc, start_index, end_index + 1, options)
381
381
  end
382
382
 
383
383
  # Returns an array of tokens contained in the span.
384
384
  # @return [Array<Token>]
385
385
  def tokens
386
386
  results = []
387
- PyCall::List.(@py_span).each do |py_token|
387
+ PyCall::List.call(@py_span).each do |py_token|
388
388
  results << Token.new(py_token)
389
389
  end
390
390
  results
@@ -392,7 +392,7 @@ module Spacy
392
392
 
393
393
  # Iterates over the elements in the span yielding a token instance each time.
394
394
  def each
395
- PyCall::List.(@py_span).each do |py_token|
395
+ PyCall::List.call(@py_span).each do |py_token|
396
396
  yield Token.new(py_token)
397
397
  end
398
398
  end
@@ -401,7 +401,7 @@ module Spacy
401
401
  # @return [Array<Span>]
402
402
  def noun_chunks
403
403
  chunk_array = []
404
- py_chunks = PyCall::List.(@py_span.noun_chunks)
404
+ py_chunks = PyCall::List.call(@py_span.noun_chunks)
405
405
  py_chunks.each do |py_span|
406
406
  chunk_array << Span.new(@doc, py_span: py_span)
407
407
  end
@@ -410,7 +410,7 @@ module Spacy
410
410
 
411
411
  # Returns the head token
412
412
  # @return [Token]
413
- def root
413
+ def root
414
414
  Token.new(@py_span.root)
415
415
  end
416
416
 
@@ -418,7 +418,7 @@ module Spacy
418
418
  # @return [Array<Span>]
419
419
  def sents
420
420
  sentence_array = []
421
- py_sentences = PyCall::List.(@py_span.sents)
421
+ py_sentences = PyCall::List.call(@py_span.sents)
422
422
  py_sentences.each do |py_span|
423
423
  sentence_array << Span.new(@doc, py_span: py_span)
424
424
  end
@@ -429,7 +429,7 @@ module Spacy
429
429
  # @return [Array<Span>]
430
430
  def ents
431
431
  ent_array = []
432
- PyCall::List.(@py_span.ents).each do |py_span|
432
+ PyCall::List.call(@py_span.ents).each do |py_span|
433
433
  ent_array << Span.new(@doc, py_span: py_span)
434
434
  end
435
435
  ent_array
@@ -438,8 +438,8 @@ module Spacy
438
438
  # Returns a span that represents the sentence that the given span is part of.
439
439
  # @return [Span]
440
440
  def sent
441
- py_span = @py_span.sent
442
- return Span.new(@doc, py_span: py_span)
441
+ py_span = @py_span.sent
442
+ Span.new(@doc, py_span: py_span)
443
443
  end
444
444
 
445
445
  # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -447,67 +447,67 @@ module Spacy
447
447
  def [](range)
448
448
  if range.is_a?(Range)
449
449
  py_span = @py_span[range]
450
- return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
450
+ Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
451
451
  else
452
- return Token.new(@py_span[range])
452
+ Token.new(@py_span[range])
453
453
  end
454
454
  end
455
455
 
456
456
  # Returns a semantic similarity estimate.
457
457
  # @param other [Span] the other span to which a similarity estimation is conducted
458
- # @return [Float]
458
+ # @return [Float]
459
459
  def similarity(other)
460
460
  py_span.similarity(other.py_span)
461
461
  end
462
462
 
463
463
  # Creates a document instance from the span
464
- # @return [Doc]
464
+ # @return [Doc]
465
465
  def as_doc
466
- Doc.new(@doc.py_nlp, text: self.text)
466
+ Doc.new(@doc.py_nlp, text: text)
467
467
  end
468
468
 
469
469
  # Returns tokens conjugated to the root of the span.
470
470
  # @return [Array<Token>] an array of tokens
471
471
  def conjuncts
472
472
  conjunct_array = []
473
- PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
473
+ PyCall::List.call(@py_span.conjuncts).each do |py_conjunct|
474
474
  conjunct_array << Token.new(py_conjunct)
475
475
  end
476
476
  conjunct_array
477
477
  end
478
478
 
479
479
  # Returns tokens that are to the left of the span, whose heads are within the span.
480
- # @return [Array<Token>] an array of tokens
480
+ # @return [Array<Token>] an array of tokens
481
481
  def lefts
482
482
  left_array = []
483
- PyCall::List.(@py_span.lefts).each do |py_left|
483
+ PyCall::List.call(@py_span.lefts).each do |py_left|
484
484
  left_array << Token.new(py_left)
485
485
  end
486
486
  left_array
487
487
  end
488
488
 
489
489
  # Returns Tokens that are to the right of the span, whose heads are within the span.
490
- # @return [Array<Token>] an array of Tokens
490
+ # @return [Array<Token>] an array of Tokens
491
491
  def rights
492
492
  right_array = []
493
- PyCall::List.(@py_span.rights).each do |py_right|
493
+ PyCall::List.call(@py_span.rights).each do |py_right|
494
494
  right_array << Token.new(py_right)
495
495
  end
496
496
  right_array
497
497
  end
498
498
 
499
499
  # Returns Tokens that are within the span and tokens that descend from them.
500
- # @return [Array<Token>] an array of tokens
500
+ # @return [Array<Token>] an array of tokens
501
501
  def subtree
502
502
  subtree_array = []
503
- PyCall::List.(@py_span.subtree).each do |py_subtree|
503
+ PyCall::List.call(@py_span.subtree).each do |py_subtree|
504
504
  subtree_array << Token.new(py_subtree)
505
505
  end
506
506
  subtree_array
507
507
  end
508
508
 
509
509
  # Returns the label
510
- # @return [String]
510
+ # @return [String]
511
511
  def label
512
512
  @py_span.label_
513
513
  end
@@ -516,11 +516,14 @@ module Spacy
516
516
  def method_missing(name, *args)
517
517
  @py_span.send(name, *args)
518
518
  end
519
+
520
+ def respond_to_missing?(sym)
521
+ sym ? true : super
522
+ end
519
523
  end
520
524
 
521
525
  # See also spaCy Python API document for [`Token`](https://spacy.io/api/token).
522
526
  class Token
523
-
524
527
  # @return [Object] a Python `Token` instance accessible via `PyCall`
525
528
  attr_reader :py_token
526
529
 
@@ -528,17 +531,16 @@ module Spacy
528
531
  attr_reader :text
529
532
 
530
533
  # It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
531
- # There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
534
+ # There is no way to generate a token from scratch but relying on a pre-exising Python `Token` object.
532
535
  # @param py_token [Object] Python `Token` object
533
536
  def initialize(py_token)
534
537
  @py_token = py_token
535
538
  @text = @py_token.text
536
539
  end
537
540
 
538
-
539
541
  # Returns the head token
540
542
  # @return [Token]
541
- def head
543
+ def head
542
544
  Token.new(@py_token.head)
543
545
  end
544
546
 
@@ -546,7 +548,7 @@ module Spacy
546
548
  # @return [Array<Token>] an array of tokens
547
549
  def subtree
548
550
  descendant_array = []
549
- PyCall::List.(@py_token.subtree).each do |descendant|
551
+ PyCall::List.call(@py_token.subtree).each do |descendant|
550
552
  descendant_array << Token.new(descendant)
551
553
  end
552
554
  descendant_array
@@ -556,7 +558,7 @@ module Spacy
556
558
  # @return [Array<Token>] an array of tokens
557
559
  def ancestors
558
560
  ancestor_array = []
559
- PyCall::List.(@py_token.ancestors).each do |ancestor|
561
+ PyCall::List.call(@py_token.ancestors).each do |ancestor|
560
562
  ancestor_array << Token.new(ancestor)
561
563
  end
562
564
  ancestor_array
@@ -566,7 +568,7 @@ module Spacy
566
568
  # @return [Array<Token>] an array of tokens
567
569
  def children
568
570
  child_array = []
569
- PyCall::List.(@py_token.children).each do |child|
571
+ PyCall::List.call(@py_token.children).each do |child|
570
572
  child_array << Token.new(child)
571
573
  end
572
574
  child_array
@@ -576,7 +578,7 @@ module Spacy
576
578
  # @return [Array<Token>] an array of tokens
577
579
  def lefts
578
580
  token_array = []
579
- PyCall::List.(@py_token.lefts).each do |token|
581
+ PyCall::List.call(@py_token.lefts).each do |token|
580
582
  token_array << Token.new(token)
581
583
  end
582
584
  token_array
@@ -586,89 +588,87 @@ module Spacy
586
588
  # @return [Array<Token>] an array of tokens
587
589
  def rights
588
590
  token_array = []
589
- PyCall::List.(@py_token.rights).each do |token|
591
+ PyCall::List.call(@py_token.rights).each do |token|
590
592
  token_array << Token.new(token)
591
593
  end
592
594
  token_array
593
595
  end
594
596
 
595
597
  # String representation of the token.
596
- # @return [String]
598
+ # @return [String]
597
599
  def to_s
598
600
  @text
599
601
  end
600
602
 
601
603
  # Returns a hash or string of morphological information
602
604
  # @param hash [Boolean] if true, a hash will be returned instead of a string
603
- # @return [Hash, String]
604
- def morphology(hash = true)
605
+ # @return [Hash, String]
606
+ def morphology(hash: true)
605
607
  if @py_token.has_morph
606
608
  morph_analysis = @py_token.morph
607
- if hash
608
- return morph_analysis.to_dict
609
- else
610
- return morph_analysis.to_s
611
- end
612
- else
613
609
  if hash
614
- results = {}
610
+ morph_analysis.to_dict
615
611
  else
616
- return ""
612
+ morph_analysis.to_s
617
613
  end
614
+ elsif hash
615
+ {}
616
+ else
617
+ ""
618
618
  end
619
619
  end
620
620
 
621
621
  # Returns the lemma by calling `lemma_' of `@py_token` object
622
- # @return [String]
622
+ # @return [String]
623
623
  def lemma
624
624
  @py_token.lemma_
625
625
  end
626
626
 
627
627
  # Returns the lowercase form by calling `lower_' of `@py_token` object
628
- # @return [String]
628
+ # @return [String]
629
629
  def lower
630
630
  @py_token.lower_
631
631
  end
632
632
 
633
633
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
634
- # @return [String]
634
+ # @return [String]
635
635
  def shape
636
636
  @py_token.shape_
637
637
  end
638
638
 
639
639
  # Returns the pos by calling `pos_' of `@py_token` object
640
- # @return [String]
640
+ # @return [String]
641
641
  def pos
642
642
  @py_token.pos_
643
643
  end
644
644
 
645
645
  # Returns the fine-grained pos by calling `tag_' of `@py_token` object
646
- # @return [String]
647
- def tag
646
+ # @return [String]
647
+ def tag
648
648
  @py_token.tag_
649
649
  end
650
650
 
651
651
  # Returns the dependency relation by calling `dep_' of `@py_token` object
652
- # @return [String]
652
+ # @return [String]
653
653
  def dep
654
654
  @py_token.dep_
655
655
  end
656
-
656
+
657
657
  # Returns the language by calling `lang_' of `@py_token` object
658
- # @return [String]
659
- def lang
658
+ # @return [String]
659
+ def lang
660
660
  @py_token.lang_
661
661
  end
662
662
 
663
663
  # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
664
- # @return [String]
665
- def whitespace
664
+ # @return [String]
665
+ def whitespace
666
666
  @py_token.whitespace_
667
667
  end
668
668
 
669
669
  # Returns the named entity type by calling `ent_type_' of `@py_token` object
670
- # @return [String]
671
- def ent_type
670
+ # @return [String]
671
+ def ent_type
672
672
  @py_token.ent_type_
673
673
  end
674
674
 
@@ -682,11 +682,14 @@ module Spacy
682
682
  def method_missing(name, *args)
683
683
  @py_token.send(name, *args)
684
684
  end
685
+
686
+ def respond_to_missing?(sym)
687
+ sym ? true : super
688
+ end
685
689
  end
686
690
 
687
691
  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
688
- class Lexeme
689
-
692
+ class Lexeme
690
693
  # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
691
694
  attr_reader :py_lexeme
692
695
 
@@ -702,50 +705,50 @@ module Spacy
702
705
  end
703
706
 
704
707
  # String representation of the token.
705
- # @return [String]
708
+ # @return [String]
706
709
  def to_s
707
710
  @text
708
711
  end
709
712
 
710
713
  # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
711
- # @return [String]
714
+ # @return [String]
712
715
  def lower
713
716
  @py_lexeme.lower_
714
717
  end
715
718
 
716
719
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
717
- # @return [String]
720
+ # @return [String]
718
721
  def shape
719
722
  @py_lexeme.shape_
720
723
  end
721
724
 
722
725
  # Returns the language by calling `lang_' of `@py_lexeme` object
723
- # @return [String]
724
- def lang
726
+ # @return [String]
727
+ def lang
725
728
  @py_lexeme.lang_
726
729
  end
727
730
 
728
731
  # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
729
- # @return [String]
730
- def prefix
732
+ # @return [String]
733
+ def prefix
731
734
  @py_lexeme.prefix_
732
735
  end
733
- #
736
+
734
737
  # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
735
- # @return [String]
738
+ # @return [String]
736
739
  def suffix
737
740
  @py_lexeme.suffix_
738
741
  end
739
742
 
740
743
  # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
741
- # @return [String]
744
+ # @return [String]
742
745
  def norm
743
746
  @py_lexeme.norm_
744
747
  end
745
748
 
746
749
  # Returns a semantic similarity estimate.
747
- # @param other [Lexeme] the other doc to which a similarity estimation is made
748
- # @return [Float]
750
+ # @param other [Lexeme] the other lexeme to which a similarity estimation is made
751
+ # @return [Float]
749
752
  def similarity(other)
750
753
  @py_lexeme.similarity(other.py_lexeme)
751
754
  end
@@ -754,7 +757,9 @@ module Spacy
754
757
  def method_missing(name, *args)
755
758
  @py_lexeme.send(name, *args)
756
759
  end
757
- end
758
760
 
761
+ def respond_to_missing?(sym)
762
+ sym ? true : super
763
+ end
764
+ end
759
765
  end
760
-