ruby-spacy 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -1
- data/README.md +82 -40
- data/examples/get_started/morphology.rb +45 -0
- data/examples/get_started/pos_tags_and_dependencies.rb +17 -17
- data/examples/japanese/pos_tagging.rb +20 -20
- data/lib/ruby-spacy.rb +20 -0
- data/lib/ruby-spacy/version.rb +1 -1
- metadata +3 -3
- data/examples/linguistic_features/morphology.rb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9add9d3b065bbf5064652cb115f824221d929a20478d182782df5db564cc8f45
|
4
|
+
data.tar.gz: f07d502f79883a452e7f250f0fe784425511a0de4f8a43db0b29ca03801bd755
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 373c795a148034f4191cfaf130a23f464dc2b43927bf6aa3165999c78797365ce2f976021ea8b9ab1dd083736e5f9a1da51a5ccf0156d00ec39dac9fd19bde7c
|
7
|
+
data.tar.gz: e370e503c23d15a0a44be84bf578775b0a4acc5557468c7fc9468cde44e0e084018be8dc17c3e7c21d9efdaf229611ca234614fcd2e811272051c7c2922b408d
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.
|
4
|
+
ruby-spacy (0.1.2)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -23,6 +23,7 @@ GEM
|
|
23
23
|
|
24
24
|
PLATFORMS
|
25
25
|
arm64-darwin-20
|
26
|
+
x86_64-darwin-20
|
26
27
|
|
27
28
|
DEPENDENCIES
|
28
29
|
github-markup
|
data/README.md
CHANGED
@@ -111,12 +111,10 @@ Output:
|
|
111
111
|
|:-----:|:--:|:-------:|:--:|:------:|:----:|:-------:|:---:|:-:|:--:|:-------:|
|
112
112
|
| Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
113
113
|
|
114
|
-
### Part-of-speech
|
114
|
+
### Part-of-speech and dependency
|
115
115
|
|
116
116
|
→ [spaCy: Part-of-speech tags and dependencies](https://spacy.io/usage/spacy-101#annotations-pos-deps)
|
117
117
|
|
118
|
-
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
119
|
-
|
120
118
|
Ruby code:
|
121
119
|
|
122
120
|
```ruby
|
@@ -126,73 +124,117 @@ require "terminal-table"
|
|
126
124
|
nlp = Spacy::Language.new("en_core_web_sm")
|
127
125
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
128
126
|
|
127
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
129
128
|
rows = []
|
130
129
|
|
131
130
|
doc.each do |token|
|
132
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
131
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
133
132
|
end
|
134
133
|
|
135
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
136
134
|
table = Terminal::Table.new rows: rows, headings: headings
|
137
135
|
puts table
|
138
136
|
```
|
139
137
|
|
140
138
|
Output:
|
141
139
|
|
142
|
-
| text | lemma | pos | tag | dep |
|
143
|
-
|
144
|
-
| Apple | Apple | PROPN | NNP | nsubj |
|
145
|
-
| is | be | AUX | VBZ | aux |
|
146
|
-
| looking | look | VERB | VBG | ROOT |
|
147
|
-
| at | at | ADP | IN | prep |
|
148
|
-
| buying | buy | VERB | VBG | pcomp |
|
149
|
-
| U.K. | U.K. | PROPN | NNP | dobj |
|
150
|
-
| startup | startup | NOUN | NN | advcl |
|
151
|
-
| for | for | ADP | IN | prep |
|
152
|
-
| $ | $ | SYM | $ | quantmod |
|
153
|
-
| 1 | 1 | NUM | CD | compound |
|
154
|
-
| billion | billion | NUM | CD | pobj |
|
155
|
-
|
156
|
-
### Part-of-speech
|
140
|
+
| text | lemma | pos | tag | dep |
|
141
|
+
|:--------|:--------|:------|:----|:---------|
|
142
|
+
| Apple | Apple | PROPN | NNP | nsubj |
|
143
|
+
| is | be | AUX | VBZ | aux |
|
144
|
+
| looking | look | VERB | VBG | ROOT |
|
145
|
+
| at | at | ADP | IN | prep |
|
146
|
+
| buying | buy | VERB | VBG | pcomp |
|
147
|
+
| U.K. | U.K. | PROPN | NNP | dobj |
|
148
|
+
| startup | startup | NOUN | NN | advcl |
|
149
|
+
| for | for | ADP | IN | prep |
|
150
|
+
| $ | $ | SYM | $ | quantmod |
|
151
|
+
| 1 | 1 | NUM | CD | compound |
|
152
|
+
| billion | billion | NUM | CD | pobj |
|
153
|
+
|
154
|
+
### Part-of-speech and dependency (Japanese)
|
157
155
|
|
158
156
|
Ruby code:
|
159
157
|
|
160
158
|
```ruby
|
161
|
-
require
|
159
|
+
require "ruby-spacy"
|
162
160
|
require "terminal-table"
|
163
161
|
|
164
162
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
165
|
-
doc = nlp.read("任天堂は1983
|
163
|
+
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
166
164
|
|
165
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
167
166
|
rows = []
|
168
167
|
|
169
168
|
doc.each do |token|
|
170
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
169
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
171
170
|
end
|
172
171
|
|
173
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
174
172
|
table = Terminal::Table.new rows: rows, headings: headings
|
175
173
|
puts table
|
176
174
|
```
|
177
175
|
|
178
176
|
Output:
|
179
177
|
|
180
|
-
| text | lemma | pos | tag | dep |
|
181
|
-
|
182
|
-
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
183
|
-
| は | は | ADP | 助詞-係助詞 | case |
|
184
|
-
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
185
|
-
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
186
|
-
| に | に | ADP | 助詞-格助詞 | case |
|
187
|
-
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
188
|
-
| を | を | ADP | 助詞-格助詞 | case |
|
189
|
-
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
190
|
-
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
191
|
-
| で | で | ADP | 助詞-格助詞 | case |
|
192
|
-
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
193
|
-
| し | する | AUX | 動詞-非自立可能 | aux |
|
194
|
-
| た | た | AUX | 助動詞 | aux |
|
195
|
-
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
178
|
+
| text | lemma | pos | tag | dep |
|
179
|
+
|:-----------|:-----------|:------|:-------------------------|:-------|
|
180
|
+
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
181
|
+
| は | は | ADP | 助詞-係助詞 | case |
|
182
|
+
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
183
|
+
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
184
|
+
| に | に | ADP | 助詞-格助詞 | case |
|
185
|
+
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
186
|
+
| を | を | ADP | 助詞-格助詞 | case |
|
187
|
+
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
188
|
+
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
189
|
+
| で | で | ADP | 助詞-格助詞 | case |
|
190
|
+
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
191
|
+
| し | する | AUX | 動詞-非自立可能 | aux |
|
192
|
+
| た | た | AUX | 助動詞 | aux |
|
193
|
+
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
194
|
+
|
195
|
+
### Morphology
|
196
|
+
|
197
|
+
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
198
|
+
|
199
|
+
Ruby code:
|
200
|
+
|
201
|
+
```ruby
|
202
|
+
require "ruby-spacy"
|
203
|
+
require "terminal-table"
|
204
|
+
|
205
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
206
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
207
|
+
|
208
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
209
|
+
rows = []
|
210
|
+
|
211
|
+
doc.each do |token|
|
212
|
+
morph = token.morphology.map do |k, v|
|
213
|
+
"#{k} = #{v}"
|
214
|
+
end.join("\n")
|
215
|
+
rows << [token.text, token.shape_, token.is_alpha, token.is_stop, morph]
|
216
|
+
end
|
217
|
+
|
218
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
219
|
+
puts table
|
220
|
+
|
221
|
+
```
|
222
|
+
|
223
|
+
Output:
|
224
|
+
|
225
|
+
| text | shape | is_alpha | is_stop | morphology |
|
226
|
+
|:--------|:------|:---------|:--------|:------------------------------------------------------------------------------------|
|
227
|
+
| Apple | Xxxxx | true | false | NounType = Prop<br />Number = Sing |
|
228
|
+
| is | xx | true | true | Mood = Ind<br />Number = Sing<br />Person = 3<br />Tense = Pres<br />VerbForm = Fin |
|
229
|
+
| looking | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
230
|
+
| at | xx | true | true | |
|
231
|
+
| buying | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
232
|
+
| U.K. | X.X. | false | false | NounType = Prop<br />Number = Sing |
|
233
|
+
| startup | xxxx | true | false | Number = Sing |
|
234
|
+
| for | xxx | true | true | |
|
235
|
+
| $ | $ | false | false | |
|
236
|
+
| 1 | d | false | false | NumType = Card |
|
237
|
+
| billion | xxxx | true | false | NumType = Card |
|
196
238
|
|
197
239
|
### Visualizing dependency
|
198
240
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
morph = token.morphology.map do |k, v|
|
12
|
+
"#{k} = #{v}"
|
13
|
+
end.join("\n")
|
14
|
+
# end.join("<br />")
|
15
|
+
rows << [token.text, token.shape_, token.is_alpha, token.is_stop, morph]
|
16
|
+
end
|
17
|
+
|
18
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
19
|
+
puts table
|
20
|
+
|
21
|
+
# +---------+-------+----------+---------+-----------------+
|
22
|
+
# | text | shape | is_alpha | is_stop | morphology |
|
23
|
+
# +---------+-------+----------+---------+-----------------+
|
24
|
+
# | Apple | Xxxxx | true | false | NounType = Prop |
|
25
|
+
# | | | | | Number = Sing |
|
26
|
+
# | is | xx | true | true | Mood = Ind |
|
27
|
+
# | | | | | Number = Sing |
|
28
|
+
# | | | | | Person = 3 |
|
29
|
+
# | | | | | Tense = Pres |
|
30
|
+
# | | | | | VerbForm = Fin |
|
31
|
+
# | looking | xxxx | true | false | Aspect = Prog |
|
32
|
+
# | | | | | Tense = Pres |
|
33
|
+
# | | | | | VerbForm = Part |
|
34
|
+
# | at | xx | true | true | |
|
35
|
+
# | buying | xxxx | true | false | Aspect = Prog |
|
36
|
+
# | | | | | Tense = Pres |
|
37
|
+
# | | | | | VerbForm = Part |
|
38
|
+
# | U.K. | X.X. | false | false | NounType = Prop |
|
39
|
+
# | | | | | Number = Sing |
|
40
|
+
# | startup | xxxx | true | false | Number = Sing |
|
41
|
+
# | for | xxx | true | true | |
|
42
|
+
# | $ | $ | false | false | |
|
43
|
+
# | 1 | d | false | false | NumType = Card |
|
44
|
+
# | billion | xxxx | true | false | NumType = Card |
|
45
|
+
# +---------+-------+----------+---------+-----------------+
|
@@ -4,28 +4,28 @@ require "terminal-table"
|
|
4
4
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
5
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
6
|
|
7
|
-
headings = ["text", "lemma", "pos", "tag", "dep"
|
7
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
8
8
|
rows = []
|
9
9
|
|
10
10
|
doc.each do |token|
|
11
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
11
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
12
12
|
end
|
13
13
|
|
14
14
|
table = Terminal::Table.new rows: rows, headings: headings
|
15
15
|
puts table
|
16
16
|
|
17
|
-
#
|
18
|
-
# | text | lemma | pos | tag | dep |
|
19
|
-
#
|
20
|
-
# | Apple | Apple | PROPN | NNP | nsubj |
|
21
|
-
# | is | be | AUX | VBZ | aux |
|
22
|
-
# | looking | look | VERB | VBG | ROOT |
|
23
|
-
# | at | at | ADP | IN | prep |
|
24
|
-
# | buying | buy | VERB | VBG | pcomp |
|
25
|
-
# | U.K. | U.K. | PROPN | NNP | dobj |
|
26
|
-
# | startup | startup | NOUN | NN | advcl |
|
27
|
-
# | for | for | ADP | IN | prep |
|
28
|
-
# | $ | $ | SYM | $ | quantmod |
|
29
|
-
# | 1 | 1 | NUM | CD | compound |
|
30
|
-
# | billion | billion | NUM | CD | pobj |
|
31
|
-
#
|
17
|
+
# +---------+---------+-------+-----+----------+
|
18
|
+
# | text | lemma | pos | tag | dep |
|
19
|
+
# +---------+---------+-------+-----+----------+
|
20
|
+
# | Apple | Apple | PROPN | NNP | nsubj |
|
21
|
+
# | is | be | AUX | VBZ | aux |
|
22
|
+
# | looking | look | VERB | VBG | ROOT |
|
23
|
+
# | at | at | ADP | IN | prep |
|
24
|
+
# | buying | buy | VERB | VBG | pcomp |
|
25
|
+
# | U.K. | U.K. | PROPN | NNP | dobj |
|
26
|
+
# | startup | startup | NOUN | NN | advcl |
|
27
|
+
# | for | for | ADP | IN | prep |
|
28
|
+
# | $ | $ | SYM | $ | quantmod |
|
29
|
+
# | 1 | 1 | NUM | CD | compound |
|
30
|
+
# | billion | billion | NUM | CD | pobj |
|
31
|
+
# +---------+---------+-------+-----+----------+
|
@@ -4,31 +4,31 @@ require "terminal-table"
|
|
4
4
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
5
|
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
6
6
|
|
7
|
-
headings = ["text", "lemma", "pos", "tag", "dep"
|
7
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
8
8
|
rows = []
|
9
9
|
|
10
10
|
doc.each do |token|
|
11
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
11
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
12
12
|
end
|
13
13
|
|
14
14
|
table = Terminal::Table.new rows: rows, headings: headings
|
15
15
|
puts table
|
16
16
|
|
17
|
-
#
|
18
|
-
# | text | lemma | pos | tag | dep |
|
19
|
-
#
|
20
|
-
# | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
21
|
-
# | は | は | ADP | 助詞-係助詞 | case |
|
22
|
-
# | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
23
|
-
# | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
24
|
-
# | に | に | ADP | 助詞-格助詞 | case |
|
25
|
-
# | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
26
|
-
# | を | を | ADP | 助詞-格助詞 | case |
|
27
|
-
# | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
28
|
-
# | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
29
|
-
# | で | で | ADP | 助詞-格助詞 | case |
|
30
|
-
# | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
31
|
-
# | し | する | AUX | 動詞-非自立可能 | aux |
|
32
|
-
# | た | た | AUX | 助動詞 | aux |
|
33
|
-
# | 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
34
|
-
#
|
17
|
+
# +------------+------------+-------+--------------------------+--------+
|
18
|
+
# | text | lemma | pos | tag | dep |
|
19
|
+
# +------------+------------+-------+--------------------------+--------+
|
20
|
+
# | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
21
|
+
# | は | は | ADP | 助詞-係助詞 | case |
|
22
|
+
# | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
23
|
+
# | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
24
|
+
# | に | に | ADP | 助詞-格助詞 | case |
|
25
|
+
# | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
26
|
+
# | を | を | ADP | 助詞-格助詞 | case |
|
27
|
+
# | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
28
|
+
# | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
29
|
+
# | で | で | ADP | 助詞-格助詞 | case |
|
30
|
+
# | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
31
|
+
# | し | する | AUX | 動詞-非自立可能 | aux |
|
32
|
+
# | た | た | AUX | 助動詞 | aux |
|
33
|
+
# | 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
34
|
+
# +------------+------------+-------+--------------------------+--------+
|
data/lib/ruby-spacy.rb
CHANGED
@@ -252,6 +252,26 @@ module Spacy
|
|
252
252
|
@text
|
253
253
|
end
|
254
254
|
|
255
|
+
# Returns a hash or string of morphological information
|
256
|
+
# @param dict [Boolean] if true, a hash will be returned instead of a string
|
257
|
+
# @return [Hash, String]
|
258
|
+
def morphology(hash = true)
|
259
|
+
if @py_token.has_morph
|
260
|
+
morph_analysis = @py_token.morph
|
261
|
+
if hash
|
262
|
+
return morph_analysis.to_dict
|
263
|
+
else
|
264
|
+
return morph_analysis.to_s
|
265
|
+
end
|
266
|
+
else
|
267
|
+
if hash
|
268
|
+
results = {}
|
269
|
+
else
|
270
|
+
return ""
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
255
275
|
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
|
256
276
|
def method_missing(name, *args)
|
257
277
|
@py_token.send(name, *args)
|
data/lib/ruby-spacy/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-spacy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yoichiro Hasebe
|
@@ -75,6 +75,7 @@ files:
|
|
75
75
|
- bin/setup
|
76
76
|
- examples/get_started/lexeme.rb
|
77
77
|
- examples/get_started/linguistic_annotations.rb
|
78
|
+
- examples/get_started/morphology.rb
|
78
79
|
- examples/get_started/most_similar.rb
|
79
80
|
- examples/get_started/named_entities.rb
|
80
81
|
- examples/get_started/outputs/test_dep.svg
|
@@ -111,7 +112,6 @@ files:
|
|
111
112
|
- examples/linguistic_features/iterating_children.rb
|
112
113
|
- examples/linguistic_features/iterating_lefts_and_rights.rb
|
113
114
|
- examples/linguistic_features/lemmatization.rb
|
114
|
-
- examples/linguistic_features/morphology.rb
|
115
115
|
- examples/linguistic_features/named_entity_recognition.rb
|
116
116
|
- examples/linguistic_features/navigating_parse_tree.rb
|
117
117
|
- examples/linguistic_features/noun_chunks.rb
|
@@ -149,7 +149,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
149
|
- !ruby/object:Gem::Version
|
150
150
|
version: '0'
|
151
151
|
requirements: []
|
152
|
-
rubygems_version: 3.2.
|
152
|
+
rubygems_version: 3.2.11
|
153
153
|
signing_key:
|
154
154
|
specification_version: 4
|
155
155
|
summary: A wrapper module for using spaCy natural language processing library from
|
@@ -1,17 +0,0 @@
|
|
1
|
-
require "ruby-spacy"
|
2
|
-
require "terminal-table"
|
3
|
-
|
4
|
-
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
|
6
|
-
puts "Pipeline: " + nlp.pipe_names.to_s
|
7
|
-
|
8
|
-
doc = nlp.read("I was reading the paper.")
|
9
|
-
|
10
|
-
token = doc[0]
|
11
|
-
|
12
|
-
puts "Morph features of the first word: " + token.morph.to_s
|
13
|
-
puts "PronType of the word: " + token.morph.get("PronType").to_s
|
14
|
-
|
15
|
-
# Pipeline: ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"]
|
16
|
-
# Morph features of the first word: Case=Nom|Number=Sing|Person=1|PronType=Prs
|
17
|
-
# PronType of the word: ['Prs']
|