ruby-spacy 0.1.4.1 → 0.1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/Gemfile +7 -7
  5. data/Gemfile.lock +2 -2
  6. data/README.md +7 -10
  7. data/examples/get_started/lexeme.rb +3 -1
  8. data/examples/get_started/linguistic_annotations.rb +3 -1
  9. data/examples/get_started/morphology.rb +3 -1
  10. data/examples/get_started/most_similar.rb +3 -1
  11. data/examples/get_started/named_entities.rb +4 -2
  12. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  13. data/examples/get_started/similarity.rb +4 -2
  14. data/examples/get_started/tokenization.rb +3 -1
  15. data/examples/get_started/visualizing_dependencies.rb +2 -2
  16. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  17. data/examples/get_started/visualizing_named_entities.rb +4 -2
  18. data/examples/get_started/vocab.rb +3 -1
  19. data/examples/get_started/word_vectors.rb +3 -1
  20. data/examples/japanese/ancestors.rb +6 -4
  21. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  22. data/examples/japanese/information_extraction.rb +6 -6
  23. data/examples/japanese/lemmatization.rb +3 -1
  24. data/examples/japanese/most_similar.rb +3 -1
  25. data/examples/japanese/named_entity_recognition.rb +3 -2
  26. data/examples/japanese/navigating_parse_tree.rb +19 -17
  27. data/examples/japanese/noun_chunks.rb +2 -0
  28. data/examples/japanese/pos_tagging.rb +3 -1
  29. data/examples/japanese/sentence_segmentation.rb +3 -2
  30. data/examples/japanese/similarity.rb +2 -0
  31. data/examples/japanese/tokenization.rb +2 -0
  32. data/examples/japanese/visualizing_dependencies.rb +3 -1
  33. data/examples/japanese/visualizing_named_entities.rb +4 -2
  34. data/examples/linguistic_features/ancestors.rb +7 -5
  35. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  36. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  37. data/examples/linguistic_features/information_extraction.rb +9 -9
  38. data/examples/linguistic_features/iterating_children.rb +6 -8
  39. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  40. data/examples/linguistic_features/lemmatization.rb +3 -1
  41. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  42. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  43. data/examples/linguistic_features/noun_chunks.rb +3 -1
  44. data/examples/linguistic_features/pos_tagging.rb +3 -1
  45. data/examples/linguistic_features/retokenize_1.rb +2 -0
  46. data/examples/linguistic_features/retokenize_2.rb +4 -2
  47. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  48. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  49. data/examples/linguistic_features/similarity.rb +4 -2
  50. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  51. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  52. data/examples/linguistic_features/tokenization.rb +3 -2
  53. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  54. data/examples/rule_based_matching/matcher.rb +4 -2
  55. data/lib/ruby-spacy/version.rb +1 -1
  56. data/lib/ruby-spacy.rb +139 -141
  57. data/ruby-spacy.gemspec +15 -17
  58. data/tags +132 -0
  59. metadata +69 -10
data/lib/ruby-spacy.rb CHANGED
@@ -1,17 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "ruby-spacy/version"
4
- require 'enumerator'
5
- require 'strscan'
6
- require 'numpy'
7
- require 'pycall/import'
8
- include PyCall::Import
4
+ require "strscan"
5
+ require "numpy"
6
+ require "pycall/import"
9
7
 
10
8
  # This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
11
9
  module Spacy
12
-
13
10
  extend PyCall::Import
14
- spacy = PyCall.import_module('spacy')
11
+ spacy = PyCall.import_module("spacy")
15
12
 
16
13
  # Python `Language` class
17
14
  PyLanguage = spacy.language.Language
@@ -24,23 +21,22 @@ module Spacy
24
21
 
25
22
  # Python `Token` class object
26
23
  PyToken = spacy.tokens.Token
27
-
24
+
28
25
  # Python `Matcher` class object
29
26
  PyMatcher = spacy.matcher.Matcher
30
27
 
31
28
  # Python `displacy` object
32
29
  PyDisplacy = spacy.displacy
33
30
 
34
- # A utility module method to convert Python's generator object to a Ruby array,
31
+ # A utility module method to convert Python's generator object to a Ruby array,
35
32
  # mainly used on the items inside the array returned from dependency-related methods
36
33
  # such as {Span#rights}, {Span#lefts} and {Span#subtree}.
37
34
  def self.generator_to_array(py_generator)
38
- PyCall::List.(py_generator)
35
+ PyCall::List.call(py_generator)
39
36
  end
40
37
 
41
38
  # See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
42
39
  class Doc
43
-
44
40
  # @return [Object] a Python `Language` instance accessible via `PyCall`
45
41
  attr_reader :py_nlp
46
42
 
@@ -52,23 +48,19 @@ module Spacy
52
48
 
53
49
  include Enumerable
54
50
 
55
- alias_method :length, :count
56
- alias_method :len, :count
57
- alias_method :size, :count
51
+ alias length count
52
+ alias len count
53
+ alias size count
58
54
 
59
- # It is recommended to use {Language#read} method to create a doc. If you need to
60
- # create one using {Doc#initialize}, there are two method signatures:
55
+ # It is recommended to use {Language#read} method to create a doc. If you need to
56
+ # create one using {Doc#initialize}, there are two method signatures:
61
57
  # `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
62
58
  # @param nlp [Language] an instance of {Language} class
63
59
  # @param py_doc [Object] an instance of Python `Doc` class
64
60
  # @param text [String] the text string to be analyzed
65
61
  def initialize(nlp, py_doc: nil, text: nil)
66
62
  @py_nlp = nlp
67
- if py_doc
68
- @py_doc = py_doc
69
- else
70
- @py_doc = nlp.(text)
71
- end
63
+ @py_doc = py_doc || @py_doc = nlp.call(text)
72
64
  @text = @py_doc.text
73
65
  end
74
66
 
@@ -77,25 +69,25 @@ module Spacy
77
69
  # @param end_index [Integer] the end position of the span to be retokenized in the document
78
70
  # @param attributes [Hash] attributes to set on the merged token
79
71
  def retokenize(start_index, end_index, attributes = {})
80
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
81
- retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
72
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
73
+ retokenizer.merge(@py_doc[start_index..end_index], attrs: attributes)
82
74
  end
83
75
  end
84
76
 
85
77
  # Retokenizes the text splitting the specified token.
86
78
  # @param pos_in_doc [Integer] the position of the span to be retokenized in the document
87
- # @param split_array [Array<String>] text strings of the split results
79
+ # @param split_array [Array<String>] text strings of the split results
88
80
  # @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
89
81
  # @param attributes [Hash] the attributes of the split elements
90
82
  def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
91
- PyCall.with(@py_doc.retokenize()) do |retokenizer|
83
+ PyCall.with(@py_doc.retokenize) do |retokenizer|
92
84
  heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
93
85
  retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
94
86
  end
95
87
  end
96
88
 
97
89
  # String representation of the document.
98
- # @return [String]
90
+ # @return [String]
99
91
  def to_s
100
92
  @text
101
93
  end
@@ -104,7 +96,7 @@ module Spacy
104
96
  # @return [Array<Token>]
105
97
  def tokens
106
98
  results = []
107
- PyCall::List.(@py_doc).each do |py_token|
99
+ PyCall::List.call(@py_doc).each do |py_token|
108
100
  results << Token.new(py_token)
109
101
  end
110
102
  results
@@ -112,12 +104,12 @@ module Spacy
112
104
 
113
105
  # Iterates over the elements in the doc yielding a token instance each time.
114
106
  def each
115
- PyCall::List.(@py_doc).each do |py_token|
107
+ PyCall::List.call(@py_doc).each do |py_token|
116
108
  yield Token.new(py_token)
117
109
  end
118
110
  end
119
111
 
120
- # Returns a span of the specified range within the doc.
112
+ # Returns a span of the specified range within the doc.
121
113
  # The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
122
114
  # @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
123
115
  # @param optional_size [Integer] an integer representing the size of the span
@@ -125,7 +117,7 @@ module Spacy
125
117
  def span(range_or_start, optional_size = nil)
126
118
  if optional_size
127
119
  start_index = range_or_start
128
- temp = tokens[start_index ... start_index + optional_size]
120
+ temp = tokens[start_index...start_index + optional_size]
129
121
  else
130
122
  start_index = range_or_start.first
131
123
  range = range_or_start
@@ -141,7 +133,7 @@ module Spacy
141
133
  # @return [Array<Span>]
142
134
  def noun_chunks
143
135
  chunk_array = []
144
- py_chunks = PyCall::List.(@py_doc.noun_chunks)
136
+ py_chunks = PyCall::List.call(@py_doc.noun_chunks)
145
137
  py_chunks.each do |py_chunk|
146
138
  chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
147
139
  end
@@ -152,7 +144,7 @@ module Spacy
152
144
  # @return [Array<Span>]
153
145
  def sents
154
146
  sentence_array = []
155
- py_sentences = PyCall::List.(@py_doc.sents)
147
+ py_sentences = PyCall::List.call(@py_doc.sents)
156
148
  py_sentences.each do |py_sent|
157
149
  sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
158
150
  end
@@ -164,9 +156,9 @@ module Spacy
164
156
  def ents
165
157
  # so that ents canbe "each"-ed in Ruby
166
158
  ent_array = []
167
- PyCall::List.(@py_doc.ents).each do |ent|
159
+ PyCall::List.call(@py_doc.ents).each do |ent|
168
160
  ent.define_singleton_method :label do
169
- return self.label_
161
+ label_
170
162
  end
171
163
  ent_array << ent
172
164
  end
@@ -178,15 +170,15 @@ module Spacy
178
170
  def [](range)
179
171
  if range.is_a?(Range)
180
172
  py_span = @py_doc[range]
181
- return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
173
+ Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
182
174
  else
183
- return Token.new(@py_doc[range])
175
+ Token.new(@py_doc[range])
184
176
  end
185
177
  end
186
178
 
187
179
  # Returns a semantic similarity estimate.
188
180
  # @param other [Doc] the other doc to which a similarity estimation is made
189
- # @return [Float]
181
+ # @return [Float]
190
182
  def similarity(other)
191
183
  py_doc.similarity(other.py_doc)
192
184
  end
@@ -196,18 +188,21 @@ module Spacy
196
188
  # @param compact [Boolean] only relevant to the `dep' style
197
189
  # @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
198
190
  def displacy(style: "dep", compact: false)
199
- PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
191
+ PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
200
192
  end
201
193
 
202
194
  # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
203
195
  def method_missing(name, *args)
204
196
  @py_doc.send(name, *args)
205
197
  end
198
+
199
+ def respond_to_missing?(sym)
200
+ sym ? true : super
201
+ end
206
202
  end
207
203
 
208
204
  # See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
209
205
  class Language
210
-
211
206
  # @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
212
207
  attr_reader :spacy_nlp_id
213
208
 
@@ -245,7 +240,7 @@ module Spacy
245
240
  # @return [Array<String>] An array of text strings representing pipeline components
246
241
  def pipe_names
247
242
  pipe_array = []
248
- PyCall::List.(@py_nlp.pipe_names).each do |pipe|
243
+ PyCall::List.call(@py_nlp.pipe_names).each do |pipe|
249
244
  pipe_array << pipe
250
245
  end
251
246
  pipe_array
@@ -268,24 +263,23 @@ module Spacy
268
263
  # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
269
264
  # @param vector [Object] A vector representation of a word (whether existing or non-existing)
270
265
  # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
271
- def most_similar(vector, n)
266
+ def most_similar(vector, num)
272
267
  vec_array = Numpy.asarray([vector])
273
- py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
274
- key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
275
- keys = key_texts.map{|kt| kt[0]}
276
- texts = key_texts.map{|kt| kt[1]}
277
- best_rows = PyCall::List.(py_result[1])[0]
278
- scores = PyCall::List.(py_result[2])[0]
268
+ py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: num)
269
+ key_texts = PyCall.eval("[[str(num), #{@spacy_nlp_id}.vocab[num].text] for num in #{py_result[0][0].tolist}]")
270
+ keys = key_texts.map { |kt| kt[0] }
271
+ texts = key_texts.map { |kt| kt[1] }
272
+ best_rows = PyCall::List.call(py_result[1])[0]
273
+ scores = PyCall::List.call(py_result[2])[0]
279
274
 
280
275
  results = []
281
- n.times do |i|
282
- result = {key: keys[i].to_i,
283
- text: texts[i],
284
- best_row: best_rows[i],
285
- score: scores[i]
286
- }
276
+ num.times do |i|
277
+ result = { key: keys[i].to_i,
278
+ text: texts[i],
279
+ best_row: best_rows[i],
280
+ score: scores[i] }
287
281
  result.each_key do |key|
288
- result.define_singleton_method(key){ result[key] }
282
+ result.define_singleton_method(key) { result[key] }
289
283
  end
290
284
  results << result
291
285
  end
@@ -297,9 +291,9 @@ module Spacy
297
291
  # @param disable [Array<String>]
298
292
  # @param batch_size [Integer]
299
293
  # @return [Array<Doc>]
300
- def pipe(texts, disable: [], batch_size: 50)
294
+ def pipe(texts, disable: [], batch_size: 50)
301
295
  docs = []
302
- PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
296
+ PyCall::List.call(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
303
297
  docs << Doc.new(@py_nlp, py_doc: py_doc)
304
298
  end
305
299
  docs
@@ -309,18 +303,21 @@ module Spacy
309
303
  def method_missing(name, *args)
310
304
  @py_nlp.send(name, *args)
311
305
  end
306
+
307
+ def respond_to_missing?(sym)
308
+ sym ? true : super
309
+ end
312
310
  end
313
311
 
314
312
  # See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
315
313
  class Matcher
316
-
317
314
  # @return [Object] a Python `Matcher` instance accessible via `PyCall`
318
315
  attr_reader :py_matcher
319
316
 
320
317
  # Creates a {Matcher} instance
321
318
  # @param nlp [Language] an instance of {Language} class
322
319
  def initialize(nlp)
323
- @py_matcher = PyMatcher.(nlp.vocab)
320
+ @py_matcher = PyMatcher.call(nlp.vocab)
324
321
  end
325
322
 
326
323
  # Adds a label string and a text pattern.
@@ -334,16 +331,17 @@ module Spacy
334
331
  # @param doc [Doc] an {Doc} instance
335
332
  # @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
336
333
  def match(doc)
337
- str_results = @py_matcher.(doc.py_doc).to_s
334
+ str_results = @py_matcher.call(doc.py_doc).to_s
338
335
  s = StringScanner.new(str_results[1..-2])
339
336
  results = []
340
337
  while s.scan_until(/(\d+), (\d+), (\d+)/)
341
338
  next unless s.matched
339
+
342
340
  triple = s.matched.split(", ")
343
341
  match_id = triple[0].to_i
344
342
  start_index = triple[1].to_i
345
343
  end_index = triple[2].to_i - 1
346
- results << {match_id: match_id, start_index: start_index, end_index: end_index}
344
+ results << { match_id: match_id, start_index: start_index, end_index: end_index }
347
345
  end
348
346
  results
349
347
  end
@@ -351,7 +349,6 @@ module Spacy
351
349
 
352
350
  # See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
353
351
  class Span
354
-
355
352
  # @return [Object] a Python `Span` instance accessible via `PyCall`
356
353
  attr_reader :py_span
357
354
 
@@ -360,11 +357,11 @@ module Spacy
360
357
 
361
358
  include Enumerable
362
359
 
363
- alias_method :length, :count
364
- alias_method :len, :count
365
- alias_method :size, :count
360
+ alias length count
361
+ alias len count
362
+ alias size count
366
363
 
367
- # It is recommended to use {Doc#span} method to create a span. If you need to
364
+ # It is recommended to use {Doc#span} method to create a span. If you need to
368
365
  # create one using {Span#initialize}, there are two method signatures:
369
366
  # `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
370
367
  # @param doc [Doc] the document to which this span belongs to
@@ -373,18 +370,14 @@ module Spacy
373
370
  # @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
374
371
  def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
375
372
  @doc = doc
376
- if py_span
377
- @py_span = py_span
378
- else
379
- @py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
380
- end
373
+ @py_span = py_span || @py_span = PySpan.call(@doc.py_doc, start_index, end_index + 1, options)
381
374
  end
382
375
 
383
376
  # Returns an array of tokens contained in the span.
384
377
  # @return [Array<Token>]
385
378
  def tokens
386
379
  results = []
387
- PyCall::List.(@py_span).each do |py_token|
380
+ PyCall::List.call(@py_span).each do |py_token|
388
381
  results << Token.new(py_token)
389
382
  end
390
383
  results
@@ -392,7 +385,7 @@ module Spacy
392
385
 
393
386
  # Iterates over the elements in the span yielding a token instance each time.
394
387
  def each
395
- PyCall::List.(@py_span).each do |py_token|
388
+ PyCall::List.call(@py_span).each do |py_token|
396
389
  yield Token.new(py_token)
397
390
  end
398
391
  end
@@ -401,7 +394,7 @@ module Spacy
401
394
  # @return [Array<Span>]
402
395
  def noun_chunks
403
396
  chunk_array = []
404
- py_chunks = PyCall::List.(@py_span.noun_chunks)
397
+ py_chunks = PyCall::List.call(@py_span.noun_chunks)
405
398
  py_chunks.each do |py_span|
406
399
  chunk_array << Span.new(@doc, py_span: py_span)
407
400
  end
@@ -410,7 +403,7 @@ module Spacy
410
403
 
411
404
  # Returns the head token
412
405
  # @return [Token]
413
- def root
406
+ def root
414
407
  Token.new(@py_span.root)
415
408
  end
416
409
 
@@ -418,7 +411,7 @@ module Spacy
418
411
  # @return [Array<Span>]
419
412
  def sents
420
413
  sentence_array = []
421
- py_sentences = PyCall::List.(@py_span.sents)
414
+ py_sentences = PyCall::List.call(@py_span.sents)
422
415
  py_sentences.each do |py_span|
423
416
  sentence_array << Span.new(@doc, py_span: py_span)
424
417
  end
@@ -429,7 +422,7 @@ module Spacy
429
422
  # @return [Array<Span>]
430
423
  def ents
431
424
  ent_array = []
432
- PyCall::List.(@py_span.ents).each do |py_span|
425
+ PyCall::List.call(@py_span.ents).each do |py_span|
433
426
  ent_array << Span.new(@doc, py_span: py_span)
434
427
  end
435
428
  ent_array
@@ -438,8 +431,8 @@ module Spacy
438
431
  # Returns a span that represents the sentence that the given span is part of.
439
432
  # @return [Span]
440
433
  def sent
441
- py_span = @py_span.sent
442
- return Span.new(@doc, py_span: py_span)
434
+ py_span = @py_span.sent
435
+ Span.new(@doc, py_span: py_span)
443
436
  end
444
437
 
445
438
  # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -447,67 +440,67 @@ module Spacy
447
440
  def [](range)
448
441
  if range.is_a?(Range)
449
442
  py_span = @py_span[range]
450
- return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
443
+ Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
451
444
  else
452
- return Token.new(@py_span[range])
445
+ Token.new(@py_span[range])
453
446
  end
454
447
  end
455
448
 
456
449
  # Returns a semantic similarity estimate.
457
450
  # @param other [Span] the other span to which a similarity estimation is conducted
458
- # @return [Float]
451
+ # @return [Float]
459
452
  def similarity(other)
460
453
  py_span.similarity(other.py_span)
461
454
  end
462
455
 
463
456
  # Creates a document instance from the span
464
- # @return [Doc]
457
+ # @return [Doc]
465
458
  def as_doc
466
- Doc.new(@doc.py_nlp, text: self.text)
459
+ Doc.new(@doc.py_nlp, text: text)
467
460
  end
468
461
 
469
462
  # Returns tokens conjugated to the root of the span.
470
463
  # @return [Array<Token>] an array of tokens
471
464
  def conjuncts
472
465
  conjunct_array = []
473
- PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
466
+ PyCall::List.call(@py_span.conjuncts).each do |py_conjunct|
474
467
  conjunct_array << Token.new(py_conjunct)
475
468
  end
476
469
  conjunct_array
477
470
  end
478
471
 
479
472
  # Returns tokens that are to the left of the span, whose heads are within the span.
480
- # @return [Array<Token>] an array of tokens
473
+ # @return [Array<Token>] an array of tokens
481
474
  def lefts
482
475
  left_array = []
483
- PyCall::List.(@py_span.lefts).each do |py_left|
476
+ PyCall::List.call(@py_span.lefts).each do |py_left|
484
477
  left_array << Token.new(py_left)
485
478
  end
486
479
  left_array
487
480
  end
488
481
 
489
482
  # Returns Tokens that are to the right of the span, whose heads are within the span.
490
- # @return [Array<Token>] an array of Tokens
483
+ # @return [Array<Token>] an array of Tokens
491
484
  def rights
492
485
  right_array = []
493
- PyCall::List.(@py_span.rights).each do |py_right|
486
+ PyCall::List.call(@py_span.rights).each do |py_right|
494
487
  right_array << Token.new(py_right)
495
488
  end
496
489
  right_array
497
490
  end
498
491
 
499
492
  # Returns Tokens that are within the span and tokens that descend from them.
500
- # @return [Array<Token>] an array of tokens
493
+ # @return [Array<Token>] an array of tokens
501
494
  def subtree
502
495
  subtree_array = []
503
- PyCall::List.(@py_span.subtree).each do |py_subtree|
496
+ PyCall::List.call(@py_span.subtree).each do |py_subtree|
504
497
  subtree_array << Token.new(py_subtree)
505
498
  end
506
499
  subtree_array
507
500
  end
508
501
 
509
502
  # Returns the label
510
- # @return [String]
503
+ # @return [String]
511
504
  def label
512
505
  @py_span.label_
513
506
  end
@@ -516,11 +509,14 @@ module Spacy
516
509
  def method_missing(name, *args)
517
510
  @py_span.send(name, *args)
518
511
  end
512
+
513
+ def respond_to_missing?(sym)
514
+ sym ? true : super
515
+ end
519
516
  end
520
517
 
521
518
  # See also spaCy Python API document for [`Token`](https://spacy.io/api/token).
522
519
  class Token
523
-
524
520
  # @return [Object] a Python `Token` instance accessible via `PyCall`
525
521
  attr_reader :py_token
526
522
 
@@ -528,17 +524,16 @@ module Spacy
528
524
  attr_reader :text
529
525
 
530
526
  # It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
531
- # There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
527
+ # There is no way to generate a token from scratch but relying on a pre-exising Python `Token` object.
532
528
  # @param py_token [Object] Python `Token` object
533
529
  def initialize(py_token)
534
530
  @py_token = py_token
535
531
  @text = @py_token.text
536
532
  end
537
533
 
538
-
539
534
  # Returns the head token
540
535
  # @return [Token]
541
- def head
536
+ def head
542
537
  Token.new(@py_token.head)
543
538
  end
544
539
 
@@ -546,7 +541,7 @@ module Spacy
546
541
  # @return [Array<Token>] an array of tokens
547
542
  def subtree
548
543
  descendant_array = []
549
- PyCall::List.(@py_token.subtree).each do |descendant|
544
+ PyCall::List.call(@py_token.subtree).each do |descendant|
550
545
  descendant_array << Token.new(descendant)
551
546
  end
552
547
  descendant_array
@@ -556,7 +551,7 @@ module Spacy
556
551
  # @return [Array<Token>] an array of tokens
557
552
  def ancestors
558
553
  ancestor_array = []
559
- PyCall::List.(@py_token.ancestors).each do |ancestor|
554
+ PyCall::List.call(@py_token.ancestors).each do |ancestor|
560
555
  ancestor_array << Token.new(ancestor)
561
556
  end
562
557
  ancestor_array
@@ -566,7 +561,7 @@ module Spacy
566
561
  # @return [Array<Token>] an array of tokens
567
562
  def children
568
563
  child_array = []
569
- PyCall::List.(@py_token.children).each do |child|
564
+ PyCall::List.call(@py_token.children).each do |child|
570
565
  child_array << Token.new(child)
571
566
  end
572
567
  child_array
@@ -576,7 +571,7 @@ module Spacy
576
571
  # @return [Array<Token>] an array of tokens
577
572
  def lefts
578
573
  token_array = []
579
- PyCall::List.(@py_token.lefts).each do |token|
574
+ PyCall::List.call(@py_token.lefts).each do |token|
580
575
  token_array << Token.new(token)
581
576
  end
582
577
  token_array
@@ -586,89 +581,87 @@ module Spacy
586
581
  # @return [Array<Token>] an array of tokens
587
582
  def rights
588
583
  token_array = []
589
- PyCall::List.(@py_token.rights).each do |token|
584
+ PyCall::List.call(@py_token.rights).each do |token|
590
585
  token_array << Token.new(token)
591
586
  end
592
587
  token_array
593
588
  end
594
589
 
595
590
  # String representation of the token.
596
- # @return [String]
591
+ # @return [String]
597
592
  def to_s
598
593
  @text
599
594
  end
600
595
 
601
596
  # Returns a hash or string of morphological information
602
597
  # @param hash [Boolean] if true, a hash will be returned instead of a string
603
- # @return [Hash, String]
604
- def morphology(hash = true)
598
+ # @return [Hash, String]
599
+ def morphology(hash: true)
605
600
  if @py_token.has_morph
606
601
  morph_analysis = @py_token.morph
607
- if hash
608
- return morph_analysis.to_dict
609
- else
610
- return morph_analysis.to_s
611
- end
612
- else
613
602
  if hash
614
- results = {}
603
+ morph_analysis.to_dict
615
604
  else
616
- return ""
605
+ morph_analysis.to_s
617
606
  end
607
+ elsif hash
608
+ {}
609
+ else
610
+ ""
618
611
  end
619
612
  end
620
613
 
621
614
  # Returns the lemma by calling `lemma_' of `@py_token` object
622
- # @return [String]
615
+ # @return [String]
623
616
  def lemma
624
617
  @py_token.lemma_
625
618
  end
626
619
 
627
620
  # Returns the lowercase form by calling `lower_' of `@py_token` object
628
- # @return [String]
621
+ # @return [String]
629
622
  def lower
630
623
  @py_token.lower_
631
624
  end
632
625
 
633
626
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
634
- # @return [String]
627
+ # @return [String]
635
628
  def shape
636
629
  @py_token.shape_
637
630
  end
638
631
 
639
632
  # Returns the pos by calling `pos_' of `@py_token` object
640
- # @return [String]
633
+ # @return [String]
641
634
  def pos
642
635
  @py_token.pos_
643
636
  end
644
637
 
645
638
  # Returns the fine-grained pos by calling `tag_' of `@py_token` object
646
- # @return [String]
647
- def tag
639
+ # @return [String]
640
+ def tag
648
641
  @py_token.tag_
649
642
  end
650
643
 
651
644
  # Returns the dependency relation by calling `dep_' of `@py_token` object
652
- # @return [String]
645
+ # @return [String]
653
646
  def dep
654
647
  @py_token.dep_
655
648
  end
656
-
649
+
657
650
  # Returns the language by calling `lang_' of `@py_token` object
658
- # @return [String]
659
- def lang
651
+ # @return [String]
652
+ def lang
660
653
  @py_token.lang_
661
654
  end
662
655
 
663
656
  # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
664
- # @return [String]
665
- def whitespace
657
+ # @return [String]
658
+ def whitespace
666
659
  @py_token.whitespace_
667
660
  end
668
661
 
669
662
  # Returns the named entity type by calling `ent_type_' of `@py_token` object
670
- # @return [String]
671
- def ent_type
663
+ # @return [String]
664
+ def ent_type
672
665
  @py_token.ent_type_
673
666
  end
674
667
 
@@ -682,11 +675,14 @@ module Spacy
682
675
  def method_missing(name, *args)
683
676
  @py_token.send(name, *args)
684
677
  end
678
+
679
+ def respond_to_missing?(sym)
680
+ sym ? true : super
681
+ end
685
682
  end
686
683
 
687
684
  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
688
- class Lexeme
689
-
685
+ class Lexeme
690
686
  # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
691
687
  attr_reader :py_lexeme
692
688
 
@@ -702,50 +698,50 @@ module Spacy
702
698
  end
703
699
 
704
700
  # String representation of the token.
705
- # @return [String]
701
+ # @return [String]
706
702
  def to_s
707
703
  @text
708
704
  end
709
705
 
710
706
  # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
711
- # @return [String]
707
+ # @return [String]
712
708
  def lower
713
709
  @py_lexeme.lower_
714
710
  end
715
711
 
716
712
  # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
717
- # @return [String]
713
+ # @return [String]
718
714
  def shape
719
715
  @py_lexeme.shape_
720
716
  end
721
717
 
722
718
  # Returns the language by calling `lang_' of `@py_lexeme` object
723
- # @return [String]
724
- def lang
719
+ # @return [String]
720
+ def lang
725
721
  @py_lexeme.lang_
726
722
  end
727
723
 
728
724
  # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
729
- # @return [String]
730
- def prefix
725
+ # @return [String]
726
+ def prefix
731
727
  @py_lexeme.prefix_
732
728
  end
733
- #
729
+
734
730
  # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
735
- # @return [String]
731
+ # @return [String]
736
732
  def suffix
737
733
  @py_lexeme.suffix_
738
734
  end
739
735
 
740
736
  # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
741
- # @return [String]
737
+ # @return [String]
742
738
  def norm
743
739
  @py_lexeme.norm_
744
740
  end
745
741
 
746
742
  # Returns a semantic similarity estimate.
747
- # @param other [Lexeme] the other doc to which a similarity estimation is made
748
- # @return [Float]
743
+ # @param other [Lexeme] the other lexeme to which a similarity estimation is made
744
+ # @return [Float]
749
745
  def similarity(other)
750
746
  @py_lexeme.similarity(other.py_lexeme)
751
747
  end
@@ -754,7 +750,9 @@ module Spacy
754
750
  def method_missing(name, *args)
755
751
  @py_lexeme.send(name, *args)
756
752
  end
757
- end
758
753
 
754
+ def respond_to_missing?(sym)
755
+ sym ? true : super
756
+ end
757
+ end
759
758
  end
760
-