ruby-spacy 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -1
- data/README.md +82 -40
- data/examples/get_started/morphology.rb +45 -0
- data/examples/get_started/pos_tags_and_dependencies.rb +17 -17
- data/examples/japanese/pos_tagging.rb +20 -20
- data/lib/ruby-spacy.rb +20 -0
- data/lib/ruby-spacy/version.rb +1 -1
- metadata +3 -3
- data/examples/linguistic_features/morphology.rb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9add9d3b065bbf5064652cb115f824221d929a20478d182782df5db564cc8f45
|
4
|
+
data.tar.gz: f07d502f79883a452e7f250f0fe784425511a0de4f8a43db0b29ca03801bd755
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 373c795a148034f4191cfaf130a23f464dc2b43927bf6aa3165999c78797365ce2f976021ea8b9ab1dd083736e5f9a1da51a5ccf0156d00ec39dac9fd19bde7c
|
7
|
+
data.tar.gz: e370e503c23d15a0a44be84bf578775b0a4acc5557468c7fc9468cde44e0e084018be8dc17c3e7c21d9efdaf229611ca234614fcd2e811272051c7c2922b408d
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.
|
4
|
+
ruby-spacy (0.1.2)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -23,6 +23,7 @@ GEM
|
|
23
23
|
|
24
24
|
PLATFORMS
|
25
25
|
arm64-darwin-20
|
26
|
+
x86_64-darwin-20
|
26
27
|
|
27
28
|
DEPENDENCIES
|
28
29
|
github-markup
|
data/README.md
CHANGED
@@ -111,12 +111,10 @@ Output:
|
|
111
111
|
|:-----:|:--:|:-------:|:--:|:------:|:----:|:-------:|:---:|:-:|:--:|:-------:|
|
112
112
|
| Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
113
113
|
|
114
|
-
### Part-of-speech
|
114
|
+
### Part-of-speech and dependency
|
115
115
|
|
116
116
|
→ [spaCy: Part-of-speech tags and dependencies](https://spacy.io/usage/spacy-101#annotations-pos-deps)
|
117
117
|
|
118
|
-
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
119
|
-
|
120
118
|
Ruby code:
|
121
119
|
|
122
120
|
```ruby
|
@@ -126,73 +124,117 @@ require "terminal-table"
|
|
126
124
|
nlp = Spacy::Language.new("en_core_web_sm")
|
127
125
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
128
126
|
|
127
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
129
128
|
rows = []
|
130
129
|
|
131
130
|
doc.each do |token|
|
132
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
131
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
133
132
|
end
|
134
133
|
|
135
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
136
134
|
table = Terminal::Table.new rows: rows, headings: headings
|
137
135
|
puts table
|
138
136
|
```
|
139
137
|
|
140
138
|
Output:
|
141
139
|
|
142
|
-
| text | lemma | pos | tag | dep |
|
143
|
-
|
144
|
-
| Apple | Apple | PROPN | NNP | nsubj |
|
145
|
-
| is | be | AUX | VBZ | aux |
|
146
|
-
| looking | look | VERB | VBG | ROOT |
|
147
|
-
| at | at | ADP | IN | prep |
|
148
|
-
| buying | buy | VERB | VBG | pcomp |
|
149
|
-
| U.K. | U.K. | PROPN | NNP | dobj |
|
150
|
-
| startup | startup | NOUN | NN | advcl |
|
151
|
-
| for | for | ADP | IN | prep |
|
152
|
-
| $ | $ | SYM | $ | quantmod |
|
153
|
-
| 1 | 1 | NUM | CD | compound |
|
154
|
-
| billion | billion | NUM | CD | pobj |
|
155
|
-
|
156
|
-
### Part-of-speech
|
140
|
+
| text | lemma | pos | tag | dep |
|
141
|
+
|:--------|:--------|:------|:----|:---------|
|
142
|
+
| Apple | Apple | PROPN | NNP | nsubj |
|
143
|
+
| is | be | AUX | VBZ | aux |
|
144
|
+
| looking | look | VERB | VBG | ROOT |
|
145
|
+
| at | at | ADP | IN | prep |
|
146
|
+
| buying | buy | VERB | VBG | pcomp |
|
147
|
+
| U.K. | U.K. | PROPN | NNP | dobj |
|
148
|
+
| startup | startup | NOUN | NN | advcl |
|
149
|
+
| for | for | ADP | IN | prep |
|
150
|
+
| $ | $ | SYM | $ | quantmod |
|
151
|
+
| 1 | 1 | NUM | CD | compound |
|
152
|
+
| billion | billion | NUM | CD | pobj |
|
153
|
+
|
154
|
+
### Part-of-speech and dependency (Japanese)
|
157
155
|
|
158
156
|
Ruby code:
|
159
157
|
|
160
158
|
```ruby
|
161
|
-
require
|
159
|
+
require "ruby-spacy"
|
162
160
|
require "terminal-table"
|
163
161
|
|
164
162
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
165
|
-
doc = nlp.read("任天堂は1983
|
163
|
+
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
166
164
|
|
165
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
167
166
|
rows = []
|
168
167
|
|
169
168
|
doc.each do |token|
|
170
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
169
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
171
170
|
end
|
172
171
|
|
173
|
-
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
174
172
|
table = Terminal::Table.new rows: rows, headings: headings
|
175
173
|
puts table
|
176
174
|
```
|
177
175
|
|
178
176
|
Output:
|
179
177
|
|
180
|
-
| text | lemma | pos | tag | dep |
|
181
|
-
|
182
|
-
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
183
|
-
| は | は | ADP | 助詞-係助詞 | case |
|
184
|
-
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
185
|
-
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
186
|
-
| に | に | ADP | 助詞-格助詞 | case |
|
187
|
-
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
188
|
-
| を | を | ADP | 助詞-格助詞 | case |
|
189
|
-
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
190
|
-
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
191
|
-
| で | で | ADP | 助詞-格助詞 | case |
|
192
|
-
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
193
|
-
| し | する | AUX | 動詞-非自立可能 | aux |
|
194
|
-
| た | た | AUX | 助動詞 | aux |
|
195
|
-
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
178
|
+
| text | lemma | pos | tag | dep |
|
179
|
+
|:-----------|:-----------|:------|:-------------------------|:-------|
|
180
|
+
| 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
181
|
+
| は | は | ADP | 助詞-係助詞 | case |
|
182
|
+
| 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
183
|
+
| 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
184
|
+
| に | に | ADP | 助詞-格助詞 | case |
|
185
|
+
| ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
186
|
+
| を | を | ADP | 助詞-格助詞 | case |
|
187
|
+
| 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
188
|
+
| 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
189
|
+
| で | で | ADP | 助詞-格助詞 | case |
|
190
|
+
| 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
191
|
+
| し | する | AUX | 動詞-非自立可能 | aux |
|
192
|
+
| た | た | AUX | 助動詞 | aux |
|
193
|
+
| 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
194
|
+
|
195
|
+
### Morphology
|
196
|
+
|
197
|
+
→ [POS and morphology tags](https://github.com/explosion/spaCy/blob/master/spacy/glossary.py)
|
198
|
+
|
199
|
+
Ruby code:
|
200
|
+
|
201
|
+
```ruby
|
202
|
+
require "ruby-spacy"
|
203
|
+
require "terminal-table"
|
204
|
+
|
205
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
206
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
207
|
+
|
208
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
209
|
+
rows = []
|
210
|
+
|
211
|
+
doc.each do |token|
|
212
|
+
morph = token.morphology.map do |k, v|
|
213
|
+
"#{k} = #{v}"
|
214
|
+
end.join("\n")
|
215
|
+
rows << [token.text, token.shape_, token.is_alpha, token.is_stop, morph]
|
216
|
+
end
|
217
|
+
|
218
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
219
|
+
puts table
|
220
|
+
|
221
|
+
```
|
222
|
+
|
223
|
+
Output:
|
224
|
+
|
225
|
+
| text | shape | is_alpha | is_stop | morphology |
|
226
|
+
|:--------|:------|:---------|:--------|:------------------------------------------------------------------------------------|
|
227
|
+
| Apple | Xxxxx | true | false | NounType = Prop<br />Number = Sing |
|
228
|
+
| is | xx | true | true | Mood = Ind<br />Number = Sing<br />Person = 3<br />Tense = Pres<br />VerbForm = Fin |
|
229
|
+
| looking | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
230
|
+
| at | xx | true | true | |
|
231
|
+
| buying | xxxx | true | false | Aspect = Prog<br />Tense = Pres<br />VerbForm = Part |
|
232
|
+
| U.K. | X.X. | false | false | NounType = Prop<br />Number = Sing |
|
233
|
+
| startup | xxxx | true | false | Number = Sing |
|
234
|
+
| for | xxx | true | true | |
|
235
|
+
| $ | $ | false | false | |
|
236
|
+
| 1 | d | false | false | NumType = Card |
|
237
|
+
| billion | xxxx | true | false | NumType = Card |
|
196
238
|
|
197
239
|
### Visualizing dependency
|
198
240
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
morph = token.morphology.map do |k, v|
|
12
|
+
"#{k} = #{v}"
|
13
|
+
end.join("\n")
|
14
|
+
# end.join("<br />")
|
15
|
+
rows << [token.text, token.shape_, token.is_alpha, token.is_stop, morph]
|
16
|
+
end
|
17
|
+
|
18
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
19
|
+
puts table
|
20
|
+
|
21
|
+
# +---------+-------+----------+---------+-----------------+
|
22
|
+
# | text | shape | is_alpha | is_stop | morphology |
|
23
|
+
# +---------+-------+----------+---------+-----------------+
|
24
|
+
# | Apple | Xxxxx | true | false | NounType = Prop |
|
25
|
+
# | | | | | Number = Sing |
|
26
|
+
# | is | xx | true | true | Mood = Ind |
|
27
|
+
# | | | | | Number = Sing |
|
28
|
+
# | | | | | Person = 3 |
|
29
|
+
# | | | | | Tense = Pres |
|
30
|
+
# | | | | | VerbForm = Fin |
|
31
|
+
# | looking | xxxx | true | false | Aspect = Prog |
|
32
|
+
# | | | | | Tense = Pres |
|
33
|
+
# | | | | | VerbForm = Part |
|
34
|
+
# | at | xx | true | true | |
|
35
|
+
# | buying | xxxx | true | false | Aspect = Prog |
|
36
|
+
# | | | | | Tense = Pres |
|
37
|
+
# | | | | | VerbForm = Part |
|
38
|
+
# | U.K. | X.X. | false | false | NounType = Prop |
|
39
|
+
# | | | | | Number = Sing |
|
40
|
+
# | startup | xxxx | true | false | Number = Sing |
|
41
|
+
# | for | xxx | true | true | |
|
42
|
+
# | $ | $ | false | false | |
|
43
|
+
# | 1 | d | false | false | NumType = Card |
|
44
|
+
# | billion | xxxx | true | false | NumType = Card |
|
45
|
+
# +---------+-------+----------+---------+-----------------+
|
@@ -4,28 +4,28 @@ require "terminal-table"
|
|
4
4
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
5
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
6
|
|
7
|
-
headings = ["text", "lemma", "pos", "tag", "dep"
|
7
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
8
8
|
rows = []
|
9
9
|
|
10
10
|
doc.each do |token|
|
11
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
11
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
12
12
|
end
|
13
13
|
|
14
14
|
table = Terminal::Table.new rows: rows, headings: headings
|
15
15
|
puts table
|
16
16
|
|
17
|
-
#
|
18
|
-
# | text | lemma | pos | tag | dep |
|
19
|
-
#
|
20
|
-
# | Apple | Apple | PROPN | NNP | nsubj |
|
21
|
-
# | is | be | AUX | VBZ | aux |
|
22
|
-
# | looking | look | VERB | VBG | ROOT |
|
23
|
-
# | at | at | ADP | IN | prep |
|
24
|
-
# | buying | buy | VERB | VBG | pcomp |
|
25
|
-
# | U.K. | U.K. | PROPN | NNP | dobj |
|
26
|
-
# | startup | startup | NOUN | NN | advcl |
|
27
|
-
# | for | for | ADP | IN | prep |
|
28
|
-
# | $ | $ | SYM | $ | quantmod |
|
29
|
-
# | 1 | 1 | NUM | CD | compound |
|
30
|
-
# | billion | billion | NUM | CD | pobj |
|
31
|
-
#
|
17
|
+
# +---------+---------+-------+-----+----------+
|
18
|
+
# | text | lemma | pos | tag | dep |
|
19
|
+
# +---------+---------+-------+-----+----------+
|
20
|
+
# | Apple | Apple | PROPN | NNP | nsubj |
|
21
|
+
# | is | be | AUX | VBZ | aux |
|
22
|
+
# | looking | look | VERB | VBG | ROOT |
|
23
|
+
# | at | at | ADP | IN | prep |
|
24
|
+
# | buying | buy | VERB | VBG | pcomp |
|
25
|
+
# | U.K. | U.K. | PROPN | NNP | dobj |
|
26
|
+
# | startup | startup | NOUN | NN | advcl |
|
27
|
+
# | for | for | ADP | IN | prep |
|
28
|
+
# | $ | $ | SYM | $ | quantmod |
|
29
|
+
# | 1 | 1 | NUM | CD | compound |
|
30
|
+
# | billion | billion | NUM | CD | pobj |
|
31
|
+
# +---------+---------+-------+-----+----------+
|
@@ -4,31 +4,31 @@ require "terminal-table"
|
|
4
4
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
5
|
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
6
6
|
|
7
|
-
headings = ["text", "lemma", "pos", "tag", "dep"
|
7
|
+
headings = ["text", "lemma", "pos", "tag", "dep"]
|
8
8
|
rows = []
|
9
9
|
|
10
10
|
doc.each do |token|
|
11
|
-
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_
|
11
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
|
12
12
|
end
|
13
13
|
|
14
14
|
table = Terminal::Table.new rows: rows, headings: headings
|
15
15
|
puts table
|
16
16
|
|
17
|
-
#
|
18
|
-
# | text | lemma | pos | tag | dep |
|
19
|
-
#
|
20
|
-
# | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
21
|
-
# | は | は | ADP | 助詞-係助詞 | case |
|
22
|
-
# | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
23
|
-
# | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
24
|
-
# | に | に | ADP | 助詞-格助詞 | case |
|
25
|
-
# | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
26
|
-
# | を | を | ADP | 助詞-格助詞 | case |
|
27
|
-
# | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
28
|
-
# | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
29
|
-
# | で | で | ADP | 助詞-格助詞 | case |
|
30
|
-
# | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
31
|
-
# | し | する | AUX | 動詞-非自立可能 | aux |
|
32
|
-
# | た | た | AUX | 助動詞 | aux |
|
33
|
-
# | 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
34
|
-
#
|
17
|
+
# +------------+------------+-------+--------------------------+--------+
|
18
|
+
# | text | lemma | pos | tag | dep |
|
19
|
+
# +------------+------------+-------+--------------------------+--------+
|
20
|
+
# | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
|
21
|
+
# | は | は | ADP | 助詞-係助詞 | case |
|
22
|
+
# | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
|
23
|
+
# | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
24
|
+
# | に | に | ADP | 助詞-格助詞 | case |
|
25
|
+
# | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
|
26
|
+
# | を | を | ADP | 助詞-格助詞 | case |
|
27
|
+
# | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
|
28
|
+
# | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
|
29
|
+
# | で | で | ADP | 助詞-格助詞 | case |
|
30
|
+
# | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
|
31
|
+
# | し | する | AUX | 動詞-非自立可能 | aux |
|
32
|
+
# | た | た | AUX | 助動詞 | aux |
|
33
|
+
# | 。 | 。 | PUNCT | 補助記号-句点 | punct |
|
34
|
+
# +------------+------------+-------+--------------------------+--------+
|
data/lib/ruby-spacy.rb
CHANGED
@@ -252,6 +252,26 @@ module Spacy
|
|
252
252
|
@text
|
253
253
|
end
|
254
254
|
|
255
|
+
# Returns a hash or string of morphological information
|
256
|
+
# @param dict [Boolean] if true, a hash will be returned instead of a string
|
257
|
+
# @return [Hash, String]
|
258
|
+
def morphology(hash = true)
|
259
|
+
if @py_token.has_morph
|
260
|
+
morph_analysis = @py_token.morph
|
261
|
+
if hash
|
262
|
+
return morph_analysis.to_dict
|
263
|
+
else
|
264
|
+
return morph_analysis.to_s
|
265
|
+
end
|
266
|
+
else
|
267
|
+
if hash
|
268
|
+
results = {}
|
269
|
+
else
|
270
|
+
return ""
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
255
275
|
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
|
256
276
|
def method_missing(name, *args)
|
257
277
|
@py_token.send(name, *args)
|
data/lib/ruby-spacy/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-spacy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yoichiro Hasebe
|
@@ -75,6 +75,7 @@ files:
|
|
75
75
|
- bin/setup
|
76
76
|
- examples/get_started/lexeme.rb
|
77
77
|
- examples/get_started/linguistic_annotations.rb
|
78
|
+
- examples/get_started/morphology.rb
|
78
79
|
- examples/get_started/most_similar.rb
|
79
80
|
- examples/get_started/named_entities.rb
|
80
81
|
- examples/get_started/outputs/test_dep.svg
|
@@ -111,7 +112,6 @@ files:
|
|
111
112
|
- examples/linguistic_features/iterating_children.rb
|
112
113
|
- examples/linguistic_features/iterating_lefts_and_rights.rb
|
113
114
|
- examples/linguistic_features/lemmatization.rb
|
114
|
-
- examples/linguistic_features/morphology.rb
|
115
115
|
- examples/linguistic_features/named_entity_recognition.rb
|
116
116
|
- examples/linguistic_features/navigating_parse_tree.rb
|
117
117
|
- examples/linguistic_features/noun_chunks.rb
|
@@ -149,7 +149,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
149
|
- !ruby/object:Gem::Version
|
150
150
|
version: '0'
|
151
151
|
requirements: []
|
152
|
-
rubygems_version: 3.2.
|
152
|
+
rubygems_version: 3.2.11
|
153
153
|
signing_key:
|
154
154
|
specification_version: 4
|
155
155
|
summary: A wrapper module for using spaCy natural language processing library from
|
@@ -1,17 +0,0 @@
|
|
1
|
-
require "ruby-spacy"
|
2
|
-
require "terminal-table"
|
3
|
-
|
4
|
-
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
|
6
|
-
puts "Pipeline: " + nlp.pipe_names.to_s
|
7
|
-
|
8
|
-
doc = nlp.read("I was reading the paper.")
|
9
|
-
|
10
|
-
token = doc[0]
|
11
|
-
|
12
|
-
puts "Morph features of the first word: " + token.morph.to_s
|
13
|
-
puts "PronType of the word: " + token.morph.get("PronType").to_s
|
14
|
-
|
15
|
-
# Pipeline: ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"]
|
16
|
-
# Morph features of the first word: Case=Nom|Number=Sing|Person=1|PronType=Prs
|
17
|
-
# PronType of the word: ['Prs']
|