ruby-spacy 0.1.4.1 → 0.1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +48 -0
  4. data/.solargraph.yml +22 -0
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +88 -9
  7. data/README.md +7 -10
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +3 -1
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +3 -1
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +147 -142
  58. data/ruby-spacy.gemspec +15 -17
  59. metadata +68 -10
data/lib/ruby-spacy.rb CHANGED
@@ -1,17 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "ruby-spacy/version"
4
- require 'enumerator'
5
- require 'strscan'
6
- require 'numpy'
7
- require 'pycall/import'
8
- include PyCall::Import
4
+ require "strscan"
5
+ require "numpy"
6
+ require "pycall"
9
7
 
10
8
  # This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
11
9
  module Spacy
10
+ MAX_RETRIAL = 20
12
11
 
13
- extend PyCall::Import
14
- spacy = PyCall.import_module('spacy')
12
+ spacy = PyCall.import_module("spacy")
15
13
 
16
14
  # Python `Language` class
17
15
  PyLanguage = spacy.language.Language
@@ -24,23 +22,22 @@ module Spacy
24
22
 
25
23
  # Python `Token` class object
26
24
  PyToken = spacy.tokens.Token
27
-
25
+
28
26
  # Python `Matcher` class object
29
27
  PyMatcher = spacy.matcher.Matcher
30
28
 
31
29
  # Python `displacy` object
32
30
  PyDisplacy = spacy.displacy
33
31
 
34
- # A utility module method to convert Python's generator object to a Ruby array,
32
+ # A utility module method to convert Python's generator object to a Ruby array,
35
33
  # mainly used on the items inside the array returned from dependency-related methods
36
34
  # such as {Span#rights}, {Span#lefts} and {Span#subtree}.
37
35
  def self.generator_to_array(py_generator)
38
- PyCall::List.(py_generator)
36
+ PyCall::List.call(py_generator)
39
37
  end
40
38
 
41
39
  # See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
42
40
  class Doc
43
-
44
41
  # @return [Object] a Python `Language` instance accessible via `PyCall`
45
42
  attr_reader :py_nlp
46
43
 
@@ -52,23 +49,19 @@ module Spacy
52
49
 
53
50
  include Enumerable
54
51
 
55
- alias_method :length, :count
56
- alias_method :len, :count
57
- alias_method :size, :count
52
+ alias length count
53
+ alias len count
54
+ alias size count
58
55
 
59
- # It is recommended to use {Language#read} method to create a doc. If you need to
60
- # create one using {Doc#initialize}, there are two method signatures:
56
+ # It is recommended to use {Language#read} method to create a doc. If you need to
57
+ # create one using {Doc#initialize}, there are two method signatures:
61
58
  # `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
62
59
  # @param nlp [Language] an instance of {Language} class
63
60
  # @param py_doc [Object] an instance of Python `Doc` class
64
61
  # @param text [String] the text string to be analyzed
65
62
  def initialize(nlp, py_doc: nil, text: nil)
66
63
  @py_nlp = nlp
67
- if py_doc
68
- @py_doc = py_doc
69
- else
70
- @py_doc = nlp.(text)
71
- end
64
+ @py_doc = py_doc || @py_doc = nlp.call(text)
72
65
  @text = @py_doc.text
73
66
  end
74
67
 
@@ -77,25 +70,25 @@ module Spacy
77
70
  # @param end_index [Integer] the end position of the span to be retokenized in the document
78
71
  # @param attributes [Hash] attributes to set on the merged token
79
72
  def retokenize(start_index, end_index, attributes = {})
80
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
81
- retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
73
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
74
+ retokenizer.merge(@py_doc[start_index..end_index], attrs: attributes)
82
75
  end
83
76
  end
84
77
 
85
78
  # Retokenizes the text splitting the specified token.
86
79
  # @param pos_in_doc [Integer] the position of the span to be retokenized in the document
87
- # @param split_array [Array<String>] text strings of the split results
80
+ # @param split_array [Array<String>] text strings of the split results
88
81
  # @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
89
82
  # @param attributes [Hash] the attributes of the split elements
90
83
  def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
91
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
84
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
92
85
  heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
93
86
  retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
94
87
  end
95
88
  end
96
89
 
97
90
  # String representation of the document.
98
- # @return [String]
91
+ # @return [String]
99
92
  def to_s
100
93
  @text
101
94
  end
@@ -104,7 +97,7 @@ module Spacy
104
97
  # @return [Array<Token>]
105
98
  def tokens
106
99
  results = []
107
- PyCall::List.(@py_doc).each do |py_token|
100
+ PyCall::List.call(@py_doc).each do |py_token|
108
101
  results << Token.new(py_token)
109
102
  end
110
103
  results
@@ -112,12 +105,12 @@ module Spacy
112
105
 
113
106
  # Iterates over the elements in the doc yielding a token instance each time.
114
107
  def each
115
- PyCall::List.(@py_doc).each do |py_token|
108
+ PyCall::List.call(@py_doc).each do |py_token|
116
109
  yield Token.new(py_token)
117
110
  end
118
111
  end
119
112
 
120
- # Returns a span of the specified range within the doc.
113
+ # Returns a span of the specified range within the doc.
121
114
  # The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
122
115
  # @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
123
116
  # @param optional_size [Integer] an integer representing the size of the span
@@ -125,7 +118,7 @@ module Spacy
125
118
  def span(range_or_start, optional_size = nil)
126
119
  if optional_size
127
120
  start_index = range_or_start
128
- temp = tokens[start_index ... start_index + optional_size]
121
+ temp = tokens[start_index...start_index + optional_size]
129
122
  else
130
123
  start_index = range_or_start.first
131
124
  range = range_or_start
@@ -141,7 +134,7 @@ module Spacy
141
134
  # @return [Array<Span>]
142
135
  def noun_chunks
143
136
  chunk_array = []
144
- py_chunks = PyCall::List.(@py_doc.noun_chunks)
137
+ py_chunks = PyCall::List.call(@py_doc.noun_chunks)
145
138
  py_chunks.each do |py_chunk|
146
139
  chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
147
140
  end
@@ -152,7 +145,7 @@ module Spacy
152
145
  # @return [Array<Span>]
153
146
  def sents
154
147
  sentence_array = []
155
- py_sentences = PyCall::List.(@py_doc.sents)
148
+ py_sentences = PyCall::List.call(@py_doc.sents)
156
149
  py_sentences.each do |py_sent|
157
150
  sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
158
151
  end
@@ -164,9 +157,9 @@ module Spacy
164
157
  def ents
165
158
  # so that ents canbe "each"-ed in Ruby
166
159
  ent_array = []
167
- PyCall::List.(@py_doc.ents).each do |ent|
160
+ PyCall::List.call(@py_doc.ents).each do |ent|
168
161
  ent.define_singleton_method :label do
169
- return self.label_
162
+ label_
170
163
  end
171
164
  ent_array << ent
172
165
  end
@@ -178,15 +171,15 @@ module Spacy
178
171
  def [](range)
179
172
  if range.is_a?(Range)
180
173
  py_span = @py_doc[range]
181
- return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
174
+ Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
182
175
  else
183
- return Token.new(@py_doc[range])
176
+ Token.new(@py_doc[range])
184
177
  end
185
178
  end
186
179
 
187
180
  # Returns a semantic similarity estimate.
188
181
  # @param other [Doc] the other doc to which a similarity estimation is made
189
- # @return [Float]
182
+ # @return [Float]
190
183
  def similarity(other)
191
184
  py_doc.similarity(other.py_doc)
192
185
  end
@@ -196,18 +189,21 @@ module Spacy
196
189
  # @param compact [Boolean] only relevant to the `dep' style
197
190
  # @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
198
191
  def displacy(style: "dep", compact: false)
199
- PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
192
+ PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
200
193
  end
201
194
 
202
195
  # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
203
196
  def method_missing(name, *args)
204
197
  @py_doc.send(name, *args)
205
198
  end
199
+
200
+ def respond_to_missing?(sym)
201
+ sym ? true : super
202
+ end
206
203
  end
207
204
 
208
205
  # See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
209
206
  class Language
210
-
211
207
  # @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
212
208
  attr_reader :spacy_nlp_id
213
209
 
@@ -216,10 +212,16 @@ module Spacy
216
212
 
217
213
  # Creates a language model instance, which is conventionally referred to by a variable named `nlp`.
218
214
  # @param model [String] A language model installed in the system
219
- def initialize(model = "en_core_web_sm")
215
+ def initialize(model = "en_core_web_sm", max_retrial = MAX_RETRIAL, retrial = 0)
220
216
  @spacy_nlp_id = "nlp_#{model.object_id}"
221
217
  PyCall.exec("import spacy; #{@spacy_nlp_id} = spacy.load('#{model}')")
222
218
  @py_nlp = PyCall.eval(@spacy_nlp_id)
219
+ rescue StandardError
220
+ retrial += 1
221
+ raise "Error: Pycall failed to load Spacy" unless retrial <= max_retrial
222
+
223
+ sleep 0.5
224
+ initialize(model, max_retrial, retrial)
223
225
  end
224
226
 
225
227
  # Reads and analyze the given text.
@@ -245,7 +247,7 @@ module Spacy
245
247
  # @return [Array<String>] An array of text strings representing pipeline components
246
248
  def pipe_names
247
249
  pipe_array = []
248
- PyCall::List.(@py_nlp.pipe_names).each do |pipe|
250
+ PyCall::List.call(@py_nlp.pipe_names).each do |pipe|
249
251
  pipe_array << pipe
250
252
  end
251
253
  pipe_array
@@ -268,24 +270,23 @@ module Spacy
268
270
  # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
269
271
  # @param vector [Object] A vector representation of a word (whether existing or non-existing)
270
272
  # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
271
- def most_similar(vector, n)
273
+ def most_similar(vector, num)
272
274
  vec_array = Numpy.asarray([vector])
273
- py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
274
- key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
275
- keys = key_texts.map{|kt| kt[0]}
276
- texts = key_texts.map{|kt| kt[1]}
277
- best_rows = PyCall::List.(py_result[1])[0]
278
- scores = PyCall::List.(py_result[2])[0]
275
+ py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: num)
276
+ key_texts = PyCall.eval("[[str(num), #{@spacy_nlp_id}.vocab[num].text] for num in #{py_result[0][0].tolist}]")
277
+ keys = key_texts.map { |kt| kt[0] }
278
+ texts = key_texts.map { |kt| kt[1] }
279
+ best_rows = PyCall::List.call(py_result[1])[0]
280
+ scores = PyCall::List.call(py_result[2])[0]
279
281
 
280
282
  results = []
281
- n.times do |i|
282
- result = {key: keys[i].to_i,
283
- text: texts[i],
284
- best_row: best_rows[i],
285
- score: scores[i]
286
- }
283
+ num.times do |i|
284
+ result = { key: keys[i].to_i,
285
+ text: texts[i],
286
+ best_row: best_rows[i],
287
+ score: scores[i] }
287
288
  result.each_key do |key|
288
- result.define_singleton_method(key){ result[key] }
289
+ result.define_singleton_method(key) { result[key] }
289
290
  end
290
291
  results << result
291
292
  end
@@ -297,9 +298,9 @@ module Spacy
297
298
  # @param disable [Array<String>]
298
299
  # @param batch_size [Integer]
299
300
  # @return [Array<Doc>]
300
- def pipe(texts, disable: [], batch_size: 50)
301
+ def pipe(texts, disable: [], batch_size: 50)
301
302
  docs = []
302
- PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
303
+ PyCall::List.call(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
303
304
  docs << Doc.new(@py_nlp, py_doc: py_doc)
304
305
  end
305
306
  docs
@@ -309,18 +310,21 @@ module Spacy
309
310
  def method_missing(name, *args)
310
311
  @py_nlp.send(name, *args)
311
312
  end
313
+
314
+ def respond_to_missing?(sym)
315
+ sym ? true : super
316
+ end
312
317
  end
313
318
 
314
319
  # See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
315
320
  class Matcher
316
-
317
321
  # @return [Object] a Python `Matcher` instance accessible via `PyCall`
318
322
  attr_reader :py_matcher
319
323
 
320
324
  # Creates a {Matcher} instance
321
325
  # @param nlp [Language] an instance of {Language} class
322
326
  def initialize(nlp)
323
- @py_matcher = PyMatcher.(nlp.vocab)
327
+ @py_matcher = PyMatcher.call(nlp.vocab)
324
328
  end
325
329
 
326
330
  # Adds a label string and a text pattern.
@@ -334,16 +338,17 @@ module Spacy
334
338
  # @param doc [Doc] an {Doc} instance
335
339
  # @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
336
340
  def match(doc)
337
- str_results = @py_matcher.(doc.py_doc).to_s
341
+ str_results = @py_matcher.call(doc.py_doc).to_s
338
342
  s = StringScanner.new(str_results[1..-2])
339
343
  results = []
340
344
  while s.scan_until(/(\d+), (\d+), (\d+)/)
341
345
  next unless s.matched
346
+
342
347
  triple = s.matched.split(", ")
343
348
  match_id = triple[0].to_i
344
349
  start_index = triple[1].to_i
345
350
  end_index = triple[2].to_i - 1
346
- results << {match_id: match_id, start_index: start_index, end_index: end_index}
351
+ results << { match_id: match_id, start_index: start_index, end_index: end_index }
347
352
  end
348
353
  results
349
354
  end
@@ -351,7 +356,6 @@ module Spacy
351
356
 
352
357
  # See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
353
358
  class Span
354
-
355
359
  # @return [Object] a Python `Span` instance accessible via `PyCall`
356
360
  attr_reader :py_span
357
361
 
@@ -360,11 +364,11 @@ module Spacy
360
364
 
361
365
  include Enumerable
362
366
 
363
- alias_method :length, :count
364
- alias_method :len, :count
365
- alias_method :size, :count
367
+ alias length count
368
+ alias len count
369
+ alias size count
366
370
 
367
- # It is recommended to use {Doc#span} method to create a span. If you need to
371
+ # It is recommended to use {Doc#span} method to create a span. If you need to
368
372
  # create one using {Span#initialize}, there are two method signatures:
369
373
  # `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
370
374
  # @param doc [Doc] the document to which this span belongs to
@@ -373,18 +377,14 @@ module Spacy
373
377
  # @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
374
378
  def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
375
379
  @doc = doc
376
- if py_span
377
- @py_span = py_span
378
- else
379
- @py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
380
- end
380
+ @py_span = py_span || @py_span = PySpan.call(@doc.py_doc, start_index, end_index + 1, options)
381
381
  end
382
382
 
383
383
  # Returns an array of tokens contained in the span.
384
384
  # @return [Array<Token>]
385
385
  def tokens
386
386
  results = []
387
- PyCall::List.(@py_span).each do |py_token|
387
+ PyCall::List.call(@py_span).each do |py_token|
388
388
  results << Token.new(py_token)
389
389
  end
390
390
  results
@@ -392,7 +392,7 @@ module Spacy
392
392
 
393
393
  # Iterates over the elements in the span yielding a token instance each time.
394
394
  def each
395
- PyCall::List.(@py_span).each do |py_token|
395
+ PyCall::List.call(@py_span).each do |py_token|
396
396
  yield Token.new(py_token)
397
397
  end
398
398
  end
@@ -401,7 +401,7 @@ module Spacy
401
401
  # @return [Array<Span>]
402
402
  def noun_chunks
403
403
  chunk_array = []
404
- py_chunks = PyCall::List.(@py_span.noun_chunks)
404
+ py_chunks = PyCall::List.call(@py_span.noun_chunks)
405
405
  py_chunks.each do |py_span|
406
406
  chunk_array << Span.new(@doc, py_span: py_span)
407
407
  end
@@ -410,7 +410,7 @@ module Spacy
410
410
 
411
411
  # Returns the head token
412
412
  # @return [Token]
413
- def root
413
+ def root
414
414
  Token.new(@py_span.root)
415
415
  end
416
416
 
@@ -418,7 +418,7 @@ module Spacy
418
418
  # @return [Array<Span>]
419
419
  def sents
420
420
  sentence_array = []
421
- py_sentences = PyCall::List.(@py_span.sents)
421
+ py_sentences = PyCall::List.call(@py_span.sents)
422
422
  py_sentences.each do |py_span|
423
423
  sentence_array << Span.new(@doc, py_span: py_span)
424
424
  end
@@ -429,7 +429,7 @@ module Spacy
429
429
  # @return [Array<Span>]
430
430
  def ents
431
431
  ent_array = []
432
- PyCall::List.(@py_span.ents).each do |py_span|
432
+ PyCall::List.call(@py_span.ents).each do |py_span|
433
433
  ent_array << Span.new(@doc, py_span: py_span)
434
434
  end
435
435
  ent_array
@@ -438,8 +438,8 @@ module Spacy
438
438
  # Returns a span that represents the sentence that the given span is part of.
439
439
  # @return [Span]
440
440
  def sent
441
- py_span = @py_span.sent
442
- return Span.new(@doc, py_span: py_span)
441
+ py_span = @py_span.sent
442
+ Span.new(@doc, py_span: py_span)
443
443
  end
444
444
 
445
445
  # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -447,67 +447,67 @@ module Spacy
447
447
  def [](range)
448
448
  if range.is_a?(Range)
449
449
  py_span = @py_span[range]
450
- return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
450
+ Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
451
451
  else
452
- return Token.new(@py_span[range])
452
+ Token.new(@py_span[range])
453
453
  end
454
454
  end
455
455
 
456
456
  # Returns a semantic similarity estimate.
457
457
  # @param other [Span] the other span to which a similarity estimation is conducted
458
- # @return [Float]
458
+ # @return [Float]
459
459
  def similarity(other)
460
460
  py_span.similarity(other.py_span)
461
461
  end
462
462
 
463
463
  # Creates a document instance from the span
464
- # @return [Doc]
464
+ # @return [Doc]
465
465
  def as_doc
466
- Doc.new(@doc.py_nlp, text: self.text)
466
+ Doc.new(@doc.py_nlp, text: text)
467
467
  end
468
468
 
469
469
  # Returns tokens conjugated to the root of the span.
470
470
  # @return [Array<Token>] an array of tokens
471
471
  def conjuncts
472
472
  conjunct_array = []
473
- PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
473
+ PyCall::List.call(@py_span.conjuncts).each do |py_conjunct|
474
474
  conjunct_array << Token.new(py_conjunct)
475
475
  end
476
476
  conjunct_array
477
477
  end
478
478
 
479
479
  # Returns tokens that are to the left of the span, whose heads are within the span.
480
- # @return [Array<Token>] an array of tokens
480
+ # @return [Array<Token>] an array of tokens
481
481
  def lefts
482
482
  left_array = []
483
- PyCall::List.(@py_span.lefts).each do |py_left|
483
+ PyCall::List.call(@py_span.lefts).each do |py_left|
484
484
  left_array << Token.new(py_left)
485
485
  end
486
486
  left_array
487
487
  end
488
488
 
489
489
  # Returns Tokens that are to the right of the span, whose heads are within the span.
490
- # @return [Array<Token>] an array of Tokens
490
+ # @return [Array<Token>] an array of Tokens
491
491
  def rights
492
492
  right_array = []
493
- PyCall::List.(@py_span.rights).each do |py_right|
493
+ PyCall::List.call(@py_span.rights).each do |py_right|
494
494
  right_array << Token.new(py_right)
495
495
  end
496
496
  right_array
497
497
  end
498
498
 
499
499
  # Returns Tokens that are within the span and tokens that descend from them.
500
- # @return [Array<Token>] an array of tokens
500
+ # @return [Array<Token>] an array of tokens
501
501
  def subtree
502
502
  subtree_array = []
503
- PyCall::List.(@py_span.subtree).each do |py_subtree|
503
+ PyCall::List.call(@py_span.subtree).each do |py_subtree|
504
504
  subtree_array << Token.new(py_subtree)
505
505
  end
506
506
  subtree_array
507
507
  end
508
508
 
509
509
  # Returns the label
510
- # @return [String]
510
+ # @return [String]
511
511
  def label
512
512
  @py_span.label_
513
513
  end
@@ -516,11 +516,14 @@ module Spacy
516
516
  def method_missing(name, *args)
517
517
  @py_span.send(name, *args)
518
518
  end
519
+
520
+ def respond_to_missing?(sym)
521
+ sym ? true : super
522
+ end
519
523
  end
520
524
 
521
525
  # See also spaCy Python API document for [`Token`](https://spacy.io/api/token).
522
526
  class Token
523
-
524
527
  # @return [Object] a Python `Token` instance accessible via `PyCall`
525
528
  attr_reader :py_token
526
529
 
@@ -528,17 +531,16 @@ module Spacy
528
531
  attr_reader :text
529
532
 
530
533
  # It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
531
- # There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
534
+ # There is no way to generate a token from scratch but relying on a pre-exising Python `Token` object.
532
535
  # @param py_token [Object] Python `Token` object
533
536
  def initialize(py_token)
534
537
  @py_token = py_token
535
538
  @text = @py_token.text
536
539
  end
537
540
 
538
-
539
541
  # Returns the head token
540
542
  # @return [Token]
541
- def head
543
+ def head
542
544
  Token.new(@py_token.head)
543
545
  end
544
546
 
@@ -546,7 +548,7 @@ module Spacy
546
548
  # @return [Array<Token>] an array of tokens
547
549
  def subtree
548
550
  descendant_array = []
549
- PyCall::List.(@py_token.subtree).each do |descendant|
551
+ PyCall::List.call(@py_token.subtree).each do |descendant|
550
552
  descendant_array << Token.new(descendant)
551
553
  end
552
554
  descendant_array
@@ -556,7 +558,7 @@ module Spacy
556
558
  # @return [Array<Token>] an array of tokens
557
559
  def ancestors
558
560
  ancestor_array = []
559
- PyCall::List.(@py_token.ancestors).each do |ancestor|
561
+ PyCall::List.call(@py_token.ancestors).each do |ancestor|
560
562
  ancestor_array << Token.new(ancestor)
561
563
  end
562
564
  ancestor_array
@@ -566,7 +568,7 @@ module Spacy
566
568
  # @return [Array<Token>] an array of tokens
567
569
  def children
568
570
  child_array = []
569
- PyCall::List.(@py_token.children).each do |child|
571
+ PyCall::List.call(@py_token.children).each do |child|
570
572
  child_array << Token.new(child)
571
573
  end
572
574
  child_array
@@ -576,7 +578,7 @@ module Spacy
576
578
  # @return [Array<Token>] an array of tokens
577
579
  def lefts
578
580
  token_array = []
579
- PyCall::List.(@py_token.lefts).each do |token|
581
+ PyCall::List.call(@py_token.lefts).each do |token|
580
582
  token_array << Token.new(token)
581
583
  end
582
584
  token_array
@@ -586,89 +588,87 @@ module Spacy
586
588
  # @return [Array<Token>] an array of tokens
587
589
  def rights
588
590
  token_array = []
589
- PyCall::List.(@py_token.rights).each do |token|
591
+ PyCall::List.call(@py_token.rights).each do |token|
590
592
  token_array << Token.new(token)
591
593
  end
592
594
  token_array
593
595
  end
594
596
 
595
597
  # String representation of the token.
596
- # @return [String]
598
+ # @return [String]
597
599
  def to_s
598
600
  @text
599
601
  end
600
602
 
601
603
  # Returns a hash or string of morphological information
602
604
  # @param hash [Boolean] if true, a hash will be returned instead of a string
603
- # @return [Hash, String]
604
- def morphology(hash = true)
605
+ # @return [Hash, String]
606
+ def morphology(hash: true)
605
607
  if @py_token.has_morph
606
608
  morph_analysis = @py_token.morph
607
- if hash
608
- return morph_analysis.to_dict
609
- else
610
- return morph_analysis.to_s
611
- end
612
- else
613
609
  if hash
614
- results = {}
610
+ morph_analysis.to_dict
615
611
  else
616
- return ""
612
+ morph_analysis.to_s
617
613
  end
614
+ elsif hash
615
+ {}
616
+ else
617
+ ""
618
618
  end
619
619
  end
620
620
 
621
621
  # Returns the lemma by calling `lemma_' of `@py_token` object
622
- # @return [String]
622
+ # @return [String]
623
623
  def lemma
624
624
  @py_token.lemma_
625
625
  end
626
626
 
627
627
  # Returns the lowercase form by calling `lower_' of `@py_token` object
628
- # @return [String]
628
+ # @return [String]
629
629
  def lower
630
630
  @py_token.lower_
631
631
  end
632
632
 
633
633
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
634
- # @return [String]
634
+ # @return [String]
635
635
  def shape
636
636
  @py_token.shape_
637
637
  end
638
638
 
639
639
  # Returns the pos by calling `pos_' of `@py_token` object
640
- # @return [String]
640
+ # @return [String]
641
641
  def pos
642
642
  @py_token.pos_
643
643
  end
644
644
 
645
645
  # Returns the fine-grained pos by calling `tag_' of `@py_token` object
646
- # @return [String]
647
- def tag
646
+ # @return [String]
647
+ def tag
648
648
  @py_token.tag_
649
649
  end
650
650
 
651
651
  # Returns the dependency relation by calling `dep_' of `@py_token` object
652
- # @return [String]
652
+ # @return [String]
653
653
  def dep
654
654
  @py_token.dep_
655
655
  end
656
-
656
+
657
657
  # Returns the language by calling `lang_' of `@py_token` object
658
- # @return [String]
659
- def lang
658
+ # @return [String]
659
+ def lang
660
660
  @py_token.lang_
661
661
  end
662
662
 
663
663
  # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
664
- # @return [String]
665
- def whitespace
664
+ # @return [String]
665
+ def whitespace
666
666
  @py_token.whitespace_
667
667
  end
668
668
 
669
669
  # Returns the named entity type by calling `ent_type_' of `@py_token` object
670
- # @return [String]
671
- def ent_type
670
+ # @return [String]
671
+ def ent_type
672
672
  @py_token.ent_type_
673
673
  end
674
674
 
@@ -682,11 +682,14 @@ module Spacy
682
682
  def method_missing(name, *args)
683
683
  @py_token.send(name, *args)
684
684
  end
685
+
686
+ def respond_to_missing?(sym)
687
+ sym ? true : super
688
+ end
685
689
  end
686
690
 
687
691
  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
688
- class Lexeme
689
-
692
+ class Lexeme
690
693
  # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
691
694
  attr_reader :py_lexeme
692
695
 
@@ -702,50 +705,50 @@ module Spacy
702
705
  end
703
706
 
704
707
  # String representation of the token.
705
- # @return [String]
708
+ # @return [String]
706
709
  def to_s
707
710
  @text
708
711
  end
709
712
 
710
713
  # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
711
- # @return [String]
714
+ # @return [String]
712
715
  def lower
713
716
  @py_lexeme.lower_
714
717
  end
715
718
 
716
719
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
717
- # @return [String]
720
+ # @return [String]
718
721
  def shape
719
722
  @py_lexeme.shape_
720
723
  end
721
724
 
722
725
  # Returns the language by calling `lang_' of `@py_lexeme` object
723
- # @return [String]
724
- def lang
726
+ # @return [String]
727
+ def lang
725
728
  @py_lexeme.lang_
726
729
  end
727
730
 
728
731
  # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
729
- # @return [String]
730
- def prefix
732
+ # @return [String]
733
+ def prefix
731
734
  @py_lexeme.prefix_
732
735
  end
733
- #
736
+
734
737
  # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
735
- # @return [String]
738
+ # @return [String]
736
739
  def suffix
737
740
  @py_lexeme.suffix_
738
741
  end
739
742
 
740
743
  # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
741
- # @return [String]
744
+ # @return [String]
742
745
  def norm
743
746
  @py_lexeme.norm_
744
747
  end
745
748
 
746
749
  # Returns a semantic similarity estimate.
747
- # @param other [Lexeme] the other doc to which a similarity estimation is made
748
- # @return [Float]
750
+ # @param other [Lexeme] the other lexeme to which a similarity estimation is made
751
+ # @return [Float]
749
752
  def similarity(other)
750
753
  @py_lexeme.similarity(other.py_lexeme)
751
754
  end
@@ -754,7 +757,9 @@ module Spacy
754
757
  def method_missing(name, *args)
755
758
  @py_lexeme.send(name, *args)
756
759
  end
757
- end
758
760
 
761
+ def respond_to_missing?(sym)
762
+ sym ? true : super
763
+ end
764
+ end
759
765
  end
760
-