ruby-spacy 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Gemfile.lock +1 -1
- data/examples/get_started/pos_tags_and_dependencies.rb +1 -1
- data/examples/get_started/similarity.rb +2 -2
- data/examples/japanese/visualizing_dependencies.rb +2 -2
- data/examples/japanese/visualizing_named_entities.rb +1 -1
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +1 -1
- data/examples/linguistic_features/similarity.rb +2 -2
- data/examples/linguistic_features/similarity_between_spans.rb +2 -2
- data/lib/ruby-spacy.rb +331 -325
- data/lib/ruby-spacy/version.rb +1 -1
- metadata +4 -4
- data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2571b8bd9a0e170c5462d73b57ab75d535936f591c6b7954d0ca6b2a9d74aeb
|
4
|
+
data.tar.gz: 55bffce60937e8ae1f4135a076185b64f2e989c9431e67914d926ee60b2cc537
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b3de6a6179eef74f224db186075fe084c93fb2fc802c4831b4b23c069594ac8c6aada546336d16065b9eeef116ba3a686afc6c441e68a66ba0fbddd7ffa321a6
|
7
|
+
data.tar.gz: 05b174052487979bf1c81a54469264068dac76d17d17b6f870b7666472625ca596f022abf6f6d734f5b02d4d623b11351c57e1d1d90deb192aaaa36d59e7255c
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
@@ -2,7 +2,7 @@ require "ruby-spacy"
|
|
2
2
|
require "terminal-table"
|
3
3
|
|
4
4
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
|
6
6
|
|
7
7
|
headings = ["text", "lemma", "pos", "tag", "dep"]
|
8
8
|
rows = []
|
@@ -4,8 +4,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
4
4
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
5
5
|
doc2 = nlp.read("Fast food tastes very good.")
|
6
6
|
|
7
|
-
puts "Doc 1: " + doc1
|
8
|
-
puts "Doc 2: " + doc2
|
7
|
+
puts "Doc 1: " + doc1.text
|
8
|
+
puts "Doc 2: " + doc2.text
|
9
9
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
10
10
|
|
11
11
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -6,8 +6,8 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
6
6
|
sentence = "自動運転車は保険責任を製造者に転嫁する。"
|
7
7
|
doc = nlp.read(sentence)
|
8
8
|
|
9
|
-
dep_svg = doc.displacy('dep', false)
|
9
|
+
dep_svg = doc.displacy(style: 'dep', compact: false)
|
10
10
|
|
11
|
-
File.open(File.join(File.dirname(__FILE__), "
|
11
|
+
File.open(File.join(File.dirname(__FILE__), "test_dep.svg"), "w") do |file|
|
12
12
|
file.write(dep_svg)
|
13
13
|
end
|
@@ -5,7 +5,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
5
|
|
6
6
|
doc = nlp.read("bright red apples on the tree")
|
7
7
|
|
8
|
-
puts "Text: " + doc
|
8
|
+
puts "Text: " + doc.text
|
9
9
|
|
10
10
|
puts "Words to the left of 'apple': " + Spacy.generator_to_array(doc[2].lefts).to_s
|
11
11
|
puts "Words to the right of 'apple': " + Spacy.generator_to_array(doc[2].rights).to_s
|
@@ -5,8 +5,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
5
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
6
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
7
|
|
8
|
-
puts "Doc 1: " + doc1
|
9
|
-
puts "Doc 2: " + doc2
|
8
|
+
puts "Doc 1: " + doc1.text
|
9
|
+
puts "Doc 2: " + doc2.text
|
10
10
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
11
|
|
12
12
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -5,8 +5,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
5
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
6
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
7
|
|
8
|
-
puts "Doc 1: " + doc1
|
9
|
-
puts "Doc 2: " + doc2
|
8
|
+
puts "Doc 1: " + doc1.text
|
9
|
+
puts "Doc 2: " + doc2.text
|
10
10
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
11
|
|
12
12
|
span1 = doc1.span(2, 2) # salty fries
|
data/lib/ruby-spacy.rb
CHANGED
@@ -3,12 +3,34 @@
|
|
3
3
|
require_relative "ruby-spacy/version"
|
4
4
|
require 'enumerator'
|
5
5
|
require 'strscan'
|
6
|
-
require 'pycall/import'
|
7
6
|
require 'numpy'
|
7
|
+
require 'pycall/import'
|
8
8
|
include PyCall::Import
|
9
9
|
|
10
10
|
# This module covers the areas of spaCy functionality for _using_ many varieties of its language models, not for _building_ ones.
|
11
11
|
module Spacy
|
12
|
+
|
13
|
+
extend PyCall::Import
|
14
|
+
spacy = PyCall.import_module('spacy')
|
15
|
+
|
16
|
+
# Python `Language` class
|
17
|
+
PyLanguage = spacy.language.Language
|
18
|
+
|
19
|
+
# Python `Doc` class object
|
20
|
+
PyDoc = spacy.tokens.Doc
|
21
|
+
|
22
|
+
# Python `Span` class object
|
23
|
+
PySpan = spacy.tokens.Span
|
24
|
+
|
25
|
+
# Python `Token` class object
|
26
|
+
PyToken = spacy.tokens.Token
|
27
|
+
|
28
|
+
# Python `Matcher` class object
|
29
|
+
PyMatcher = spacy.matcher.Matcher
|
30
|
+
|
31
|
+
# Python `displacy` object
|
32
|
+
PyDisplacy = spacy.displacy
|
33
|
+
|
12
34
|
# A utility module method to convert Python's generator object to a Ruby array,
|
13
35
|
# mainly used on the items inside the array returned from dependency-related methods
|
14
36
|
# such as {Span#rights}, {Span#lefts} and {Span#subtree}.
|
@@ -16,12 +38,303 @@ module Spacy
|
|
16
38
|
PyCall::List.(py_generator)
|
17
39
|
end
|
18
40
|
|
41
|
+
# See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
|
42
|
+
class Doc
|
43
|
+
|
44
|
+
# @return [Object] a Python `Language` instance accessible via `PyCall`
|
45
|
+
attr_reader :py_nlp
|
46
|
+
|
47
|
+
# @return [Object] a Python `Doc` instance accessible via `PyCall`
|
48
|
+
attr_reader :py_doc
|
49
|
+
|
50
|
+
# @return [String] a text string of the document
|
51
|
+
attr_reader :text
|
52
|
+
|
53
|
+
include Enumerable
|
54
|
+
|
55
|
+
alias_method :length, :count
|
56
|
+
alias_method :len, :count
|
57
|
+
alias_method :size, :count
|
58
|
+
|
59
|
+
# It is recommended to use {Language#read} method to create a doc. If you need to
|
60
|
+
# create one using {Doc#initialize}, there are two method signatures:
|
61
|
+
# `Spacy::Doc.new(nlp_id, py_doc: Object)` and `Spacy::Doc.new(nlp_id, text: String)`.
|
62
|
+
# @param nlp [Language] an instance of {Language} class
|
63
|
+
# @param py_doc [Object] an instance of Python `Doc` class
|
64
|
+
# @param text [String] the text string to be analyzed
|
65
|
+
def initialize(nlp, py_doc: nil, text: nil)
|
66
|
+
@py_nlp = nlp
|
67
|
+
if py_doc
|
68
|
+
@py_doc = py_doc
|
69
|
+
else
|
70
|
+
@py_doc = nlp.(text)
|
71
|
+
end
|
72
|
+
@text = @py_doc.text
|
73
|
+
end
|
74
|
+
|
75
|
+
# Retokenizes the text merging a span into a single token.
|
76
|
+
# @param start_index [Integer] the start position of the span to be retokenized in the document
|
77
|
+
# @param end_index [Integer] the end position of the span to be retokenized in the document
|
78
|
+
# @param attributes [Hash] attributes to set on the merged token
|
79
|
+
def retokenize(start_index, end_index, attributes = {})
|
80
|
+
PyCall.with(@py_doc.retokenize()) do |retokenizer|
|
81
|
+
retokenizer.merge(@py_doc[start_index .. end_index], attrs: attributes)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Retokenizes the text splitting the specified token.
|
86
|
+
# @param pos_in_doc [Integer] the position of the span to be retokenized in the document
|
87
|
+
# @param split_array [Array<String>] text strings of the split results
|
88
|
+
# @param ancestor_pos [Integer] the position of the immediate ancestor element of the split elements in the document
|
89
|
+
# @param attributes [Hash] the attributes of the split elements
|
90
|
+
def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
|
91
|
+
PyCall.with(@py_doc.retokenize()) do |retokenizer|
|
92
|
+
heads = [[@py_doc[pos_in_doc], head_pos_in_split], @py_doc[ancestor_pos]]
|
93
|
+
retokenizer.split(@py_doc[pos_in_doc], split_array, heads: heads, attrs: attributes)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# String representation of the document.
|
98
|
+
# @return [String]
|
99
|
+
def to_s
|
100
|
+
@text
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns an array of tokens contained in the doc.
|
104
|
+
# @return [Array<Token>]
|
105
|
+
def tokens
|
106
|
+
results = []
|
107
|
+
PyCall::List.(@py_doc).each do |py_token|
|
108
|
+
results << Token.new(py_token)
|
109
|
+
end
|
110
|
+
results
|
111
|
+
end
|
112
|
+
|
113
|
+
# Iterates over the elements in the doc yielding a token instance each time.
|
114
|
+
def each
|
115
|
+
PyCall::List.(@py_doc).each do |py_token|
|
116
|
+
yield Token.new(py_token)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Returns a span of the specified range within the doc.
|
121
|
+
# The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
|
122
|
+
# @param range_or_start [Range, Integer] a range object, or, alternatively, an integer that represents the start position of the span
|
123
|
+
# @param optional_size [Integer] an integer representing the size of the span
|
124
|
+
# @return [Span]
|
125
|
+
def span(range_or_start, optional_size = nil)
|
126
|
+
if optional_size
|
127
|
+
start_index = range_or_start
|
128
|
+
temp = tokens[start_index ... start_index + optional_size]
|
129
|
+
else
|
130
|
+
start_index = range_or_start.first
|
131
|
+
range = range_or_start
|
132
|
+
temp = tokens[range]
|
133
|
+
end
|
134
|
+
|
135
|
+
end_index = start_index + temp.size - 1
|
136
|
+
|
137
|
+
Span.new(self, start_index: start_index, end_index: end_index)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Returns an array of spans representing noun chunks.
|
141
|
+
# @return [Array<Span>]
|
142
|
+
def noun_chunks
|
143
|
+
chunk_array = []
|
144
|
+
py_chunks = PyCall::List.(@py_doc.noun_chunks)
|
145
|
+
py_chunks.each do |py_chunk|
|
146
|
+
chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
|
147
|
+
end
|
148
|
+
chunk_array
|
149
|
+
end
|
150
|
+
|
151
|
+
# Returns an array of spans each representing a sentence.
|
152
|
+
# @return [Array<Span>]
|
153
|
+
def sents
|
154
|
+
sentence_array = []
|
155
|
+
py_sentences = PyCall::List.(@py_doc.sents)
|
156
|
+
py_sentences.each do |py_sent|
|
157
|
+
sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
|
158
|
+
end
|
159
|
+
sentence_array
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns an array of spans each representing a named entity.
|
163
|
+
# @return [Array<Span>]
|
164
|
+
def ents
|
165
|
+
# so that ents canbe "each"-ed in Ruby
|
166
|
+
ent_array = []
|
167
|
+
PyCall::List.(@py_doc.ents).each do |ent|
|
168
|
+
ent_array << ent
|
169
|
+
end
|
170
|
+
ent_array
|
171
|
+
end
|
172
|
+
|
173
|
+
# Returns a span if given a range object; or returns a token if given an integer representing a position in the doc.
|
174
|
+
# @param range [Range] an ordinary Ruby's range object such as `0..3`, `1...4`, or `3 .. -1`
|
175
|
+
def [](range)
|
176
|
+
if range.is_a?(Range)
|
177
|
+
py_span = @py_doc[range]
|
178
|
+
return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
|
179
|
+
else
|
180
|
+
return Token.new(@py_doc[range])
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Returns a semantic similarity estimate.
|
185
|
+
# @param other [Doc] the other doc to which a similarity estimation is made
|
186
|
+
# @return [Float]
|
187
|
+
def similarity(other)
|
188
|
+
py_doc.similarity(other.py_doc)
|
189
|
+
end
|
190
|
+
|
191
|
+
# Visualize the document in one of two styles: "dep" (dependencies) or "ent" (named entities).
|
192
|
+
# @param style [String] either `dep` or `ent`
|
193
|
+
# @param compact [Boolean] only relevant to the `dep' style
|
194
|
+
# @return [String] in the case of `dep`, the output text will be an SVG, whereas in the `ent` style, the output text will be an HTML.
|
195
|
+
def displacy(style: "dep", compact: false)
|
196
|
+
PyDisplacy.render(py_doc, style: style, options: {compact: compact}, jupyter: false)
|
197
|
+
end
|
198
|
+
|
199
|
+
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
|
200
|
+
def method_missing(name, *args)
|
201
|
+
@py_doc.send(name, *args)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
|
206
|
+
class Language
|
207
|
+
|
208
|
+
# @return [String] an identifier string that can be used to refer to the Python `Language` object inside `PyCall::exec` or `PyCall::eval`
|
209
|
+
attr_reader :spacy_nlp_id
|
210
|
+
|
211
|
+
# @return [Object] a Python `Language` instance accessible via `PyCall`
|
212
|
+
attr_reader :py_nlp
|
213
|
+
|
214
|
+
# Creates a language model instance, which is conventionally referred to by a variable named `nlp`.
|
215
|
+
# @param model [String] A language model installed in the system
|
216
|
+
def initialize(model = "en_core_web_sm")
|
217
|
+
@spacy_nlp_id = "nlp_#{model.object_id}"
|
218
|
+
PyCall.exec("import spacy; #{@spacy_nlp_id} = spacy.load('#{model}')")
|
219
|
+
@py_nlp = PyCall.eval(@spacy_nlp_id)
|
220
|
+
end
|
221
|
+
|
222
|
+
# Reads and analyze the given text.
|
223
|
+
# @param text [String] a text to be read and analyzed
|
224
|
+
def read(text)
|
225
|
+
Doc.new(py_nlp, text: text)
|
226
|
+
end
|
227
|
+
|
228
|
+
# Generates a matcher for the current language model.
|
229
|
+
# @return [Matcher]
|
230
|
+
def matcher
|
231
|
+
Matcher.new(@py_nlp)
|
232
|
+
end
|
233
|
+
|
234
|
+
# A utility method to lookup a vocabulary item of the given id.
|
235
|
+
# @param id [Integer] a vocabulary id
|
236
|
+
# @return [Object] a Python `Lexeme` object (https://spacy.io/api/lexeme)
|
237
|
+
def vocab_string_lookup(id)
|
238
|
+
PyCall.eval("#{@spacy_nlp_id}.vocab.strings[#{id}]")
|
239
|
+
end
|
240
|
+
|
241
|
+
# A utility method to list pipeline components.
|
242
|
+
# @return [Array<String>] An array of text strings representing pipeline components
|
243
|
+
def pipe_names
|
244
|
+
pipe_array = []
|
245
|
+
PyCall::List.(@py_nlp.pipe_names).each do |pipe|
|
246
|
+
pipe_array << pipe
|
247
|
+
end
|
248
|
+
pipe_array
|
249
|
+
end
|
250
|
+
|
251
|
+
# A utility method to get a Python `Lexeme` object.
|
252
|
+
# @param text [String] A text string representing a lexeme
|
253
|
+
# @return [Object] Python `Lexeme` object (https://spacy.io/api/lexeme)
|
254
|
+
def get_lexeme(text)
|
255
|
+
text = text.gsub("'", "\'")
|
256
|
+
@py_nlp.vocab[text]
|
257
|
+
end
|
258
|
+
|
259
|
+
# Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
|
260
|
+
# @param vector [Object] A vector representation of a word (whether existing or non-existing)
|
261
|
+
# @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
|
262
|
+
def most_similar(vector, n)
|
263
|
+
vec_array = Numpy.asarray([vector])
|
264
|
+
py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
|
265
|
+
key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist}]")
|
266
|
+
keys = key_texts.map{|kt| kt[0]}
|
267
|
+
texts = key_texts.map{|kt| kt[1]}
|
268
|
+
best_rows = PyCall::List.(py_result[1])[0]
|
269
|
+
scores = PyCall::List.(py_result[2])[0]
|
270
|
+
|
271
|
+
results = []
|
272
|
+
n.times do |i|
|
273
|
+
results << {key: keys[i].to_i, text: texts[i], best_row: best_rows[i], score: scores[i]}
|
274
|
+
end
|
275
|
+
results
|
276
|
+
end
|
277
|
+
|
278
|
+
# Utility function to batch process many texts
|
279
|
+
# @param texts [String]
|
280
|
+
# @param disable [Array<String>]
|
281
|
+
# @param batch_size [Integer]
|
282
|
+
# @return [Array<Doc>]
|
283
|
+
def pipe(texts, disable: [], batch_size: 50)
|
284
|
+
docs = []
|
285
|
+
PyCall::List.(@py_nlp.pipe(texts, disable: disable, batch_size: batch_size)).each do |py_doc|
|
286
|
+
docs << Doc.new(@py_nlp, py_doc: py_doc)
|
287
|
+
end
|
288
|
+
docs
|
289
|
+
end
|
290
|
+
|
291
|
+
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism....
|
292
|
+
def method_missing(name, *args)
|
293
|
+
@py_nlp.send(name, *args)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
|
298
|
+
class Matcher
|
299
|
+
|
300
|
+
# @return [Object] a Python `Matcher` instance accessible via `PyCall`
|
301
|
+
attr_reader :py_matcher
|
302
|
+
|
303
|
+
# Creates a {Matcher} instance
|
304
|
+
# @param nlp [Language] an instance of {Language} class
|
305
|
+
def initialize(nlp)
|
306
|
+
@py_matcher = PyMatcher.(nlp.vocab)
|
307
|
+
end
|
308
|
+
|
309
|
+
# Adds a label string and a text pattern.
|
310
|
+
# @param text [String] a label string given to the pattern
|
311
|
+
# @param pattern [Array<Array<Hash>>] sequences of text patterns that are alternative to each other
|
312
|
+
def add(text, pattern)
|
313
|
+
@py_matcher.add(text, pattern)
|
314
|
+
end
|
315
|
+
|
316
|
+
# Execute the match.
|
317
|
+
# @param doc [Doc] an {Doc} instance
|
318
|
+
# @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] the id of the matched pattern, the starting position, and the end position
|
319
|
+
def match(doc)
|
320
|
+
str_results = @py_matcher.(doc.py_doc).to_s
|
321
|
+
s = StringScanner.new(str_results[1..-2])
|
322
|
+
results = []
|
323
|
+
while s.scan_until(/(\d+), (\d+), (\d+)/)
|
324
|
+
next unless s.matched
|
325
|
+
triple = s.matched.split(", ")
|
326
|
+
match_id = triple[0].to_i
|
327
|
+
start_index = triple[1].to_i
|
328
|
+
end_index = triple[2].to_i - 1
|
329
|
+
results << {match_id: match_id, start_index: start_index, end_index: end_index}
|
330
|
+
end
|
331
|
+
results
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
19
335
|
# See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
|
20
336
|
class Span
|
21
337
|
|
22
|
-
# @return [String] an identifier string that can be used when referring to the Python object inside `PyCall::exec` or `PyCall::eval`
|
23
|
-
attr_reader :spacy_span_id
|
24
|
-
|
25
338
|
# @return [Object] a Python `Span` instance accessible via `PyCall`
|
26
339
|
attr_reader :py_span
|
27
340
|
|
@@ -35,21 +348,18 @@ module Spacy
|
|
35
348
|
alias_method :size, :count
|
36
349
|
|
37
350
|
# It is recommended to use {Doc#span} method to create a span. If you need to
|
38
|
-
# create one using {Span#initialize},
|
351
|
+
# create one using {Span#initialize}, there are two method signatures:
|
352
|
+
# `Span.new(doc, py_span: Object)` or `Span.new(doc, start_index: Integer, end_index: Integer, options: Hash)`.
|
39
353
|
# @param doc [Doc] the document to which this span belongs to
|
40
354
|
# @param start_index [Integer] the index of the item starting the span inside a doc
|
41
355
|
# @param end_index [Integer] the index of the item ending the span inside a doc
|
42
356
|
# @param options [Hash] options (`:label`, `:kb_id`, `:vector`)
|
43
357
|
def initialize(doc, py_span: nil, start_index: nil, end_index: nil, options: {})
|
44
358
|
@doc = doc
|
45
|
-
@spacy_span_id = "doc_#{doc.object_id}_span_#{start_index}_#{end_index}"
|
46
359
|
if py_span
|
47
360
|
@py_span = py_span
|
48
361
|
else
|
49
|
-
|
50
|
-
PyCall.exec("#{@spacy_span_id}_opts = #{options}")
|
51
|
-
PyCall.exec("#{@spacy_span_id} = Span(#{@doc.spacy_doc_id}, #{start_index}, #{end_index + 1}, **#{@spacy_span_id}_opts)")
|
52
|
-
@py_span = PyCall.eval(@spacy_span_id)
|
362
|
+
@py_span = PySpan.(@doc.py_doc, start_index, end_index + 1, options)
|
53
363
|
end
|
54
364
|
end
|
55
365
|
|
@@ -63,7 +373,7 @@ module Spacy
|
|
63
373
|
results
|
64
374
|
end
|
65
375
|
|
66
|
-
# Iterates over the elements in the span yielding a token instance.
|
376
|
+
# Iterates over the elements in the span yielding a token instance each time.
|
67
377
|
def each
|
68
378
|
PyCall::List.(@py_span).each do |py_token|
|
69
379
|
yield Token.new(py_token)
|
@@ -97,7 +407,6 @@ module Spacy
|
|
97
407
|
def ents
|
98
408
|
ent_array = []
|
99
409
|
PyCall::List.(@py_span.ents).each do |py_span|
|
100
|
-
# ent_array << ent
|
101
410
|
ent_array << Spacy::Span.new(@doc, py_span: py_span)
|
102
411
|
end
|
103
412
|
ent_array
|
@@ -106,11 +415,11 @@ module Spacy
|
|
106
415
|
# Returns a span that represents the sentence that the given span is part of.
|
107
416
|
# @return [Span]
|
108
417
|
def sent
|
109
|
-
py_span
|
418
|
+
py_span = @py_span.sent
|
110
419
|
return Spacy::Span.new(@doc, py_span: py_span)
|
111
420
|
end
|
112
421
|
|
113
|
-
# Returns a span if a range object is given
|
422
|
+
# Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
|
114
423
|
# @param range [Range] an ordinary Ruby's range object such as `0..3`, `1...4`, or `3 .. -1`
|
115
424
|
def [](range)
|
116
425
|
if range.is_a?(Range)
|
@@ -125,16 +434,16 @@ module Spacy
|
|
125
434
|
# @param other [Span] the other span to which a similarity estimation is conducted
|
126
435
|
# @return [Float]
|
127
436
|
def similarity(other)
|
128
|
-
|
437
|
+
py_span.similarity(other.py_span)
|
129
438
|
end
|
130
439
|
|
131
|
-
# Creates a document instance
|
440
|
+
# Creates a document instance from the span
|
132
441
|
# @return [Doc]
|
133
442
|
def as_doc
|
134
|
-
Spacy::Doc.new(@doc.
|
443
|
+
Spacy::Doc.new(@doc.py_nlp, text: self.text)
|
135
444
|
end
|
136
445
|
|
137
|
-
# Returns
|
446
|
+
# Returns tokens conjugated to the root of the span.
|
138
447
|
# @return [Array<Token>] an array of tokens
|
139
448
|
def conjuncts
|
140
449
|
conjunct_array = []
|
@@ -144,7 +453,7 @@ module Spacy
|
|
144
453
|
conjunct_array
|
145
454
|
end
|
146
455
|
|
147
|
-
# Returns
|
456
|
+
# Returns tokens that are to the left of the span, whose heads are within the span.
|
148
457
|
# @return [Array<Token>] an array of tokens
|
149
458
|
def lefts
|
150
459
|
left_array = []
|
@@ -189,7 +498,8 @@ module Spacy
|
|
189
498
|
# @return [String] a string representing the token
|
190
499
|
attr_reader :text
|
191
500
|
|
192
|
-
# It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
|
501
|
+
# It is recommended to use {Doc#tokens} or {Span#tokens} methods to create tokens.
|
502
|
+
# There is no way to generate a token from scratch but relying on a pre-exising Python {Token} object.
|
193
503
|
# @param py_token [Object] Python `Token` object
|
194
504
|
def initialize(py_token)
|
195
505
|
@py_token = py_token
|
@@ -253,7 +563,7 @@ module Spacy
|
|
253
563
|
end
|
254
564
|
|
255
565
|
# Returns a hash or string of morphological information
|
256
|
-
# @param
|
566
|
+
# @param hash [Boolean] if true, a hash will be returned instead of a string
|
257
567
|
# @return [Hash, String]
|
258
568
|
def morphology(hash = true)
|
259
569
|
if @py_token.has_morph
|
@@ -278,310 +588,6 @@ module Spacy
|
|
278
588
|
end
|
279
589
|
end
|
280
590
|
|
281
|
-
# See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
|
282
|
-
class Doc
|
283
|
-
|
284
|
-
# @return [String] an identifier string that can be used when referring to the Python object inside `PyCall::exec` or `PyCall::eval`
|
285
|
-
attr_reader :spacy_nlp_id
|
286
|
-
|
287
|
-
# @return [String] an identifier string that can be used when referring to the Python object inside `PyCall::exec` or `PyCall::eval`
|
288
|
-
attr_reader :spacy_doc_id
|
289
|
-
|
290
|
-
# @return [Object] a Python `Doc` instance accessible via `PyCall`
|
291
|
-
attr_reader :py_doc
|
292
|
-
|
293
|
-
# @return [String] a text string of the document
|
294
|
-
attr_reader :text
|
295
|
-
|
296
|
-
include Enumerable
|
297
|
-
|
298
|
-
alias_method :length, :count
|
299
|
-
alias_method :len, :count
|
300
|
-
alias_method :size, :count
|
301
|
-
|
302
|
-
# Creates a new instance of {Doc}.
|
303
|
-
# @param nlp_id [String] The id string of the `nlp`, an instance of {Language} class
|
304
|
-
# @param text [String] The text string to be analyzed
|
305
|
-
def initialize(nlp_id, text)
|
306
|
-
@text = text
|
307
|
-
@spacy_nlp_id = nlp_id
|
308
|
-
@spacy_doc_id = "doc_#{text.object_id}"
|
309
|
-
quoted = text.gsub('"', '\"')
|
310
|
-
PyCall.exec(%Q[text_#{text.object_id} = """#{quoted}"""])
|
311
|
-
PyCall.exec("#{@spacy_doc_id} = #{nlp_id}(text_#{text.object_id})")
|
312
|
-
@py_doc = PyCall.eval(@spacy_doc_id)
|
313
|
-
end
|
314
|
-
|
315
|
-
|
316
|
-
# Retokenizes the text merging a span into a single token.
|
317
|
-
# @param start_index [Integer] The start position of the span to be retokenized in the document
|
318
|
-
# @param end_index [Integer] The end position of the span to be retokenized in the document
|
319
|
-
# @param attributes [Hash] Attributes to set on the merged token
|
320
|
-
def retokenize(start_index, end_index, attributes = {})
|
321
|
-
py_attrs = PyCall::Dict.(attributes)
|
322
|
-
PyCall.exec(<<PY)
|
323
|
-
with #{@spacy_doc_id}.retokenize() as retokenizer:
|
324
|
-
retokenizer.merge(#{@spacy_doc_id}[#{start_index} : #{end_index + 1}], attrs=#{py_attrs})
|
325
|
-
PY
|
326
|
-
@py_doc = PyCall.eval(@spacy_doc_id)
|
327
|
-
end
|
328
|
-
|
329
|
-
# Retokenizes the text splitting the specified token.
|
330
|
-
# @param pos_in_doc [Integer] The position of the span to be retokenized in the document
|
331
|
-
# @param split_array [Array<String>] text strings of the split results
|
332
|
-
# @param ancestor_pos [Integer] The position of the immediate ancestor element of the split elements in the document
|
333
|
-
# @param attributes [Hash] The attributes of the split elements
|
334
|
-
def retokenize_split(pos_in_doc, split_array, head_pos_in_split, ancestor_pos, attributes = {})
|
335
|
-
py_attrs = PyCall::Dict.(attributes)
|
336
|
-
py_split_array = PyCall::List.(split_array)
|
337
|
-
PyCall.exec(<<PY)
|
338
|
-
with #{@spacy_doc_id}.retokenize() as retokenizer:
|
339
|
-
heads = [(#{@spacy_doc_id}[#{pos_in_doc}], #{head_pos_in_split}), #{@spacy_doc_id}[#{ancestor_pos}]]
|
340
|
-
attrs = #{py_attrs}
|
341
|
-
split_array = #{py_split_array}
|
342
|
-
retokenizer.split(#{@spacy_doc_id}[#{pos_in_doc}], split_array, heads=heads, attrs=attrs)
|
343
|
-
PY
|
344
|
-
@py_doc = PyCall.eval(@spacy_doc_id)
|
345
|
-
end
|
346
|
-
|
347
|
-
# String representation of the token.
|
348
|
-
# @return [String]
|
349
|
-
def to_s
|
350
|
-
@text
|
351
|
-
end
|
352
|
-
|
353
|
-
# Returns an array of tokens contained in the doc.
|
354
|
-
# @return [Array<Token>]
|
355
|
-
def tokens
|
356
|
-
results = []
|
357
|
-
PyCall::List.(@py_doc).each do |py_token|
|
358
|
-
results << Token.new(py_token)
|
359
|
-
end
|
360
|
-
results
|
361
|
-
end
|
362
|
-
|
363
|
-
# Iterates over the elements in the doc yielding a token instance.
|
364
|
-
def each
|
365
|
-
PyCall::List.(@py_doc).each do |py_token|
|
366
|
-
yield Token.new(py_token)
|
367
|
-
end
|
368
|
-
end
|
369
|
-
|
370
|
-
# Returns a span of the specified range within the doc.
|
371
|
-
# The method should be used either of the two ways: `Doc#span(range)` or `Doc#span{start_pos, size_of_span}`.
|
372
|
-
# @param range_or_start [Range, Integer] A range object, or, alternatively, an integer that represents the start position of the span
|
373
|
-
# @param optional_size [Integer] An integer representing the size of the span
|
374
|
-
# @return [Span]
|
375
|
-
def span(range_or_start, optional_size = nil)
|
376
|
-
if optional_size
|
377
|
-
start_index = range_or_start
|
378
|
-
temp = tokens[start_index ... start_index + optional_size]
|
379
|
-
else
|
380
|
-
start_index = range_or_start.first
|
381
|
-
range = range_or_start
|
382
|
-
temp = tokens[range]
|
383
|
-
end
|
384
|
-
|
385
|
-
end_index = start_index + temp.size - 1
|
386
|
-
|
387
|
-
Span.new(self, start_index: start_index, end_index: end_index)
|
388
|
-
end
|
389
|
-
|
390
|
-
# Returns an array of spans representing noun chunks.
|
391
|
-
# @return [Array<Span>]
|
392
|
-
def noun_chunks
|
393
|
-
chunk_array = []
|
394
|
-
py_chunks = PyCall::List.(@py_doc.noun_chunks)
|
395
|
-
py_chunks.each do |py_chunk|
|
396
|
-
chunk_array << Span.new(self, start_index: py_chunk.start, end_index: py_chunk.end - 1)
|
397
|
-
end
|
398
|
-
chunk_array
|
399
|
-
end
|
400
|
-
|
401
|
-
# Returns an array of spans representing sentences.
|
402
|
-
# @return [Array<Span>]
|
403
|
-
def sents
|
404
|
-
sentence_array = []
|
405
|
-
py_sentences = PyCall::List.(@py_doc.sents)
|
406
|
-
py_sentences.each do |py_sent|
|
407
|
-
sentence_array << Span.new(self, start_index: py_sent.start, end_index: py_sent.end - 1)
|
408
|
-
end
|
409
|
-
sentence_array
|
410
|
-
end
|
411
|
-
|
412
|
-
# Returns an array of spans representing named entities.
|
413
|
-
# @return [Array<Span>]
|
414
|
-
def ents
|
415
|
-
# so that ents canbe "each"-ed in Ruby
|
416
|
-
ent_array = []
|
417
|
-
PyCall::List.(@py_doc.ents).each do |ent|
|
418
|
-
ent_array << ent
|
419
|
-
end
|
420
|
-
ent_array
|
421
|
-
end
|
422
|
-
|
423
|
-
# Returns a span if given a range object; returns a token if given an integer representing a position in the doc.
|
424
|
-
# @param range [Range] an ordinary Ruby's range object such as `0..3`, `1...4`, or `3 .. -1`
|
425
|
-
def [](range)
|
426
|
-
if range.is_a?(Range)
|
427
|
-
py_span = @py_doc[range]
|
428
|
-
return Span.new(self, start_index: py_span.start, end_index: py_span.end - 1)
|
429
|
-
else
|
430
|
-
return Token.new(@py_doc[range])
|
431
|
-
end
|
432
|
-
end
|
433
|
-
|
434
|
-
# Returns a semantic similarity estimate.
|
435
|
-
# @param other [Doc] the other doc to which a similarity estimation is made
|
436
|
-
# @return [Float]
|
437
|
-
def similarity(other)
|
438
|
-
PyCall.eval("#{@spacy_doc_id}.similarity(#{other.spacy_doc_id})")
|
439
|
-
end
|
440
|
-
|
441
|
-
# Visualize the document in one of two styles: dep (dependencies) or ent (named entities).
|
442
|
-
# @param style [String] Either `dep` or `ent`
|
443
|
-
# @param compact [Boolean] Only relevant to the `dep' style
|
444
|
-
# @return [String] in the case of `dep`, the output text is an SVG while in the `ent` style, the output text is an HTML.
|
445
|
-
def displacy(style: "dep", compact: false)
|
446
|
-
PyCall.eval("displacy.render(#{@spacy_doc_id}, style='#{style}', options={'compact': #{compact.to_s.capitalize}}, jupyter=False)")
|
447
|
-
end
|
448
|
-
|
449
|
-
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
|
450
|
-
def method_missing(name, *args)
|
451
|
-
@py_doc.send(name, *args)
|
452
|
-
end
|
453
|
-
end
|
454
|
-
|
455
|
-
# See also spaCy Python API document for [`Matcher`](https://spacy.io/api/matcher).
|
456
|
-
class Matcher
|
457
|
-
|
458
|
-
# @return [String] an identifier string that can be used when referring to the Python object inside `PyCall::exec` or `PyCall::eval`
|
459
|
-
attr_reader :spacy_matcher_id
|
460
|
-
|
461
|
-
# @return [Object] a Python `Matcher` instance accessible via `PyCall`
|
462
|
-
attr_reader :py_matcher
|
463
|
-
|
464
|
-
# Creates a {Matcher} instance
|
465
|
-
# @param nlp_id [String] The id string of the `nlp`, an instance of {Language} class
|
466
|
-
def initialize(nlp_id)
|
467
|
-
@spacy_matcher_id = "doc_#{nlp_id}_matcher"
|
468
|
-
PyCall.exec("#{@spacy_matcher_id} = Matcher(#{nlp_id}.vocab)")
|
469
|
-
@py_matcher = PyCall.eval(@spacy_matcher_id)
|
470
|
-
end
|
471
|
-
|
472
|
-
# Adds a label string and a text pattern.
|
473
|
-
# @param text [String] a label string given to the pattern
|
474
|
-
# @param pattern [Array<Array<Hash>>] alternative sequences of text patterns
|
475
|
-
def add(text, pattern)
|
476
|
-
@py_matcher.add(text, pattern)
|
477
|
-
end
|
478
|
-
|
479
|
-
# Execute the match.
|
480
|
-
# @param doc [Doc] An {Doc} instance
|
481
|
-
# @return [Array<Hash{:match_id => Integer, :start_index => Integer, :end_index => Integer}>] The id of the matched pattern, the starting position, and the end position
|
482
|
-
def match(doc)
|
483
|
-
str_results = PyCall.eval("#{@spacy_matcher_id}(#{doc.spacy_doc_id})").to_s
|
484
|
-
s = StringScanner.new(str_results[1..-2])
|
485
|
-
results = []
|
486
|
-
while s.scan_until(/(\d+), (\d+), (\d+)/)
|
487
|
-
next unless s.matched
|
488
|
-
triple = s.matched.split(", ")
|
489
|
-
match_id = triple[0].to_i
|
490
|
-
start_index = triple[1].to_i
|
491
|
-
end_index = triple[2].to_i - 1
|
492
|
-
results << {match_id: match_id, start_index: start_index, end_index: end_index}
|
493
|
-
end
|
494
|
-
results
|
495
|
-
end
|
496
|
-
end
|
497
|
-
|
498
|
-
# See also spaCy Python API document for [`Language`](https://spacy.io/api/language).
|
499
|
-
class Language
|
500
|
-
|
501
|
-
# @return [String] an identifier string that can be used when referring to the Python object inside `PyCall::exec` or `PyCall::eval`
|
502
|
-
attr_reader :spacy_nlp_id
|
503
|
-
|
504
|
-
# @return [Object] a Python `Language` instance accessible via `PyCall`
|
505
|
-
attr_reader :py_nlp
|
506
|
-
|
507
|
-
# Creates a language model instance, which is conventionally referred to by a variable named `nlp`.
|
508
|
-
# @param model [String] A language model installed in the system
|
509
|
-
def initialize(model = "en_core_web_sm")
|
510
|
-
@spacy_nlp_id = "nlp_#{model.object_id}"
|
511
|
-
PyCall.exec("import spacy; from spacy.tokens import Span; from spacy.matcher import Matcher; from spacy import displacy")
|
512
|
-
PyCall.exec("#{@spacy_nlp_id} = spacy.load('#{model}')")
|
513
|
-
@py_nlp = PyCall.eval(@spacy_nlp_id)
|
514
|
-
end
|
515
|
-
|
516
|
-
# Reads and analyze the given text.
|
517
|
-
# @param text [String] A text to be read and analyzed
|
518
|
-
def read(text)
|
519
|
-
Doc.new(@spacy_nlp_id, text)
|
520
|
-
end
|
521
|
-
|
522
|
-
# Generates a matcher for the current language model.
|
523
|
-
# @return [Matcher]
|
524
|
-
def matcher
|
525
|
-
Matcher.new(@spacy_nlp_id)
|
526
|
-
end
|
527
|
-
|
528
|
-
# A utility method to lookup a vocabulary item of the given id.
|
529
|
-
# @param id [Integer] A vocabulary id
|
530
|
-
# @return [Object] A Python `Lexeme` object
|
531
|
-
def vocab_string_lookup(id)
|
532
|
-
PyCall.eval("#{@spacy_nlp_id}.vocab.strings[#{id}]")
|
533
|
-
end
|
534
|
-
|
535
|
-
# A utility method to list pipeline components.
|
536
|
-
# @return [Array<String>] An array of text strings representing pipeline components
|
537
|
-
def pipe_names
|
538
|
-
pipe_array = []
|
539
|
-
PyCall::List.(@py_nlp.pipe_names).each do |pipe|
|
540
|
-
pipe_array << pipe
|
541
|
-
end
|
542
|
-
pipe_array
|
543
|
-
end
|
544
|
-
|
545
|
-
# A utility method to get the tokenizer Python object.
|
546
|
-
# @return [Object] Python `Tokenizer` object
|
547
|
-
def tokenizer
|
548
|
-
return PyCall.eval("#{@spacy_nlp_id}.tokenizer")
|
549
|
-
end
|
550
|
-
|
551
|
-
# A utility method to get a Python `Lexeme` object.
|
552
|
-
# @param text [String] A text string representing a lexeme
|
553
|
-
# @return [Object] Python `Tokenizer` object
|
554
|
-
def get_lexeme(text)
|
555
|
-
text = text.gsub("'", "\'")
|
556
|
-
py_lexeme = PyCall.eval("#{@spacy_nlp_id}.vocab['#{text}']")
|
557
|
-
return py_lexeme
|
558
|
-
end
|
559
|
-
|
560
|
-
# Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
|
561
|
-
# @param vector [Object] A vector representation of a word (whether existing or non-existing)
|
562
|
-
# @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
|
563
|
-
def most_similar(vector, n)
|
564
|
-
vec_array = Numpy.asarray([vector])
|
565
|
-
py_result = @py_nlp.vocab.vectors.most_similar(vec_array, n: n)
|
566
|
-
key_texts = PyCall.eval("[[str(n), #{@spacy_nlp_id}.vocab[n].text] for n in #{py_result[0][0].tolist()}]")
|
567
|
-
keys = key_texts.map{|kt| kt[0]}
|
568
|
-
texts = key_texts.map{|kt| kt[1]}
|
569
|
-
best_rows = PyCall::List.(py_result[1])[0]
|
570
|
-
scores = PyCall::List.(py_result[2])[0]
|
571
|
-
|
572
|
-
results = []
|
573
|
-
n.times do |i|
|
574
|
-
results << {key: keys[i].to_i, text: texts[i], best_row: best_rows[i], score: scores[i]}
|
575
|
-
end
|
576
|
-
|
577
|
-
results
|
578
|
-
end
|
579
|
-
|
580
|
-
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism....
|
581
|
-
def method_missing(name, *args)
|
582
|
-
@py_nlp.send(name, *args)
|
583
|
-
end
|
584
|
-
end
|
585
591
|
|
586
592
|
end
|
587
593
|
|
data/lib/ruby-spacy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-spacy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yoichiro Hasebe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-06-
|
11
|
+
date: 2021-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pycall
|
@@ -66,6 +66,7 @@ extra_rdoc_files: []
|
|
66
66
|
files:
|
67
67
|
- ".gitignore"
|
68
68
|
- ".yardopts"
|
69
|
+
- CHANGELOG.md
|
69
70
|
- Gemfile
|
70
71
|
- Gemfile.lock
|
71
72
|
- LICENSE.txt
|
@@ -123,7 +124,6 @@ files:
|
|
123
124
|
- examples/linguistic_features/sentence_segmentation.rb
|
124
125
|
- examples/linguistic_features/similarity.rb
|
125
126
|
- examples/linguistic_features/similarity_between_spans.rb
|
126
|
-
- examples/linguistic_features/special_case_tokenization_rules.rb
|
127
127
|
- examples/linguistic_features/tokenization.rb
|
128
128
|
- examples/rule_based_matching/creating_spans_from_matches.rb
|
129
129
|
- examples/rule_based_matching/matcher.rb
|
@@ -149,7 +149,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
149
|
- !ruby/object:Gem::Version
|
150
150
|
version: '0'
|
151
151
|
requirements: []
|
152
|
-
rubygems_version: 3.2.
|
152
|
+
rubygems_version: 3.2.3
|
153
153
|
signing_key:
|
154
154
|
specification_version: 4
|
155
155
|
summary: A wrapper module for using spaCy natural language processing library from
|
@@ -1,19 +0,0 @@
|
|
1
|
-
require "ruby-spacy"
|
2
|
-
require "terminal-table"
|
3
|
-
|
4
|
-
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
|
6
|
-
doc = nlp.read("gimme that")
|
7
|
-
|
8
|
-
puts doc.tokens.join(" ")
|
9
|
-
|
10
|
-
# Add special case rule
|
11
|
-
special_case = [{ORTH: "gim"}, {ORTH: "me"}]
|
12
|
-
tokenizer = nlp.tokenizer
|
13
|
-
tokenizer.add_special_case("gimme", special_case)
|
14
|
-
|
15
|
-
# Check new tokenization
|
16
|
-
puts nlp.read("gimme that").tokens.join(" ")
|
17
|
-
|
18
|
-
# gimme that
|
19
|
-
# gim me that
|