ruby-spacy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +58 -0
- data/.yardopts +2 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +39 -0
- data/LICENSE.txt +21 -0
- data/README.md +498 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/examples/get_started/lexeme.rb +24 -0
- data/examples/get_started/linguistic_annotations.rb +32 -0
- data/examples/get_started/most_similar.rb +46 -0
- data/examples/get_started/named_entities.rb +24 -0
- data/examples/get_started/outputs/test_dep.svg +84 -0
- data/examples/get_started/outputs/test_dep_compact.svg +84 -0
- data/examples/get_started/outputs/test_ent.html +11 -0
- data/examples/get_started/pos_tags_and_dependencies.rb +31 -0
- data/examples/get_started/similarity.rb +13 -0
- data/examples/get_started/tokenization.rb +22 -0
- data/examples/get_started/visualizing_dependencies.rb +14 -0
- data/examples/get_started/visualizing_dependencies_compact.rb +12 -0
- data/examples/get_started/visualizing_named_entities.rb +12 -0
- data/examples/get_started/vocab.rb +10 -0
- data/examples/get_started/word_vectors.rb +24 -0
- data/examples/japanese/ancestors.rb +44 -0
- data/examples/japanese/entity_annotations_and_labels.rb +45 -0
- data/examples/japanese/information_extraction.rb +27 -0
- data/examples/japanese/lemmatization.rb +32 -0
- data/examples/japanese/most_similar.rb +46 -0
- data/examples/japanese/named_entity_recognition.rb +27 -0
- data/examples/japanese/navigating_parse_tree.rb +34 -0
- data/examples/japanese/noun_chunks.rb +23 -0
- data/examples/japanese/outputs/test_dep.svg +149 -0
- data/examples/japanese/outputs/test_ent.html +16 -0
- data/examples/japanese/pos_tagging.rb +34 -0
- data/examples/japanese/sentence_segmentation.rb +16 -0
- data/examples/japanese/similarity.rb +12 -0
- data/examples/japanese/tokenization.rb +38 -0
- data/examples/japanese/visualizing_dependencies.rb +13 -0
- data/examples/japanese/visualizing_named_entities.rb +14 -0
- data/examples/linguistic_features/ancestors.rb +41 -0
- data/examples/linguistic_features/entity_annotations_and_labels.rb +29 -0
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +20 -0
- data/examples/linguistic_features/information_extraction.rb +36 -0
- data/examples/linguistic_features/iterating_children.rb +24 -0
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +20 -0
- data/examples/linguistic_features/lemmatization.rb +31 -0
- data/examples/linguistic_features/morphology.rb +17 -0
- data/examples/linguistic_features/named_entity_recognition.rb +25 -0
- data/examples/linguistic_features/navigating_parse_tree.rb +32 -0
- data/examples/linguistic_features/noun_chunks.rb +27 -0
- data/examples/linguistic_features/outputs/test_ent.html +11 -0
- data/examples/linguistic_features/pos_tagging.rb +31 -0
- data/examples/linguistic_features/retokenize_1.rb +29 -0
- data/examples/linguistic_features/retokenize_2.rb +16 -0
- data/examples/linguistic_features/rule_based_morphology.rb +12 -0
- data/examples/linguistic_features/sentence_segmentation.rb +16 -0
- data/examples/linguistic_features/similarity.rb +14 -0
- data/examples/linguistic_features/similarity_between_spans.rb +23 -0
- data/examples/linguistic_features/special_case_tokenization_rules.rb +19 -0
- data/examples/linguistic_features/tokenization.rb +23 -0
- data/examples/rule_based_matching/creating_spans_from_matches.rb +16 -0
- data/examples/rule_based_matching/matcher.rb +19 -0
- data/lib/ruby-spacy.rb +567 -0
- data/lib/ruby-spacy/version.rb +6 -0
- data/ruby-spacy.gemspec +42 -0
- metadata +157 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
<div class="entities" style="line-height: 2.5; direction: ltr">
|
2
|
+
<mark class="entity" style="background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
3
|
+
セバスチアン・スラン
|
4
|
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
|
5
|
+
</mark>
|
6
|
+
が
|
7
|
+
<mark class="entity" style="background: #bfe1d9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
8
|
+
2007年
|
9
|
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">DATE</span>
|
10
|
+
</mark>
|
11
|
+
に
|
12
|
+
<mark class="entity" style="background: #bfeeb7; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
13
|
+
グーグル
|
14
|
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PRODUCT</span>
|
15
|
+
</mark>
|
16
|
+
で自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。</div>
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
|
+
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
6
|
+
|
7
|
+
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
|
12
|
+
end
|
13
|
+
|
14
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
15
|
+
puts table
|
16
|
+
|
17
|
+
# +------------+------------+-------+--------------------------+--------+--------+----------+---------+
|
18
|
+
# | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
|
19
|
+
# +------------+------------+-------+--------------------------+--------+--------+----------+---------+
|
20
|
+
# | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj | xxx | true | false |
|
21
|
+
# | は | は | ADP | 助詞-係助詞 | case | x | true | true |
|
22
|
+
# | 1983 | 1983 | NUM | 名詞-数詞 | nummod | dddd | false | false |
|
23
|
+
# | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
|
24
|
+
# | に | に | ADP | 助詞-格助詞 | case | x | true | true |
|
25
|
+
# | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj | xxxx | true | false |
|
26
|
+
# | を | を | ADP | 助詞-格助詞 | case | x | true | true |
|
27
|
+
# | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed | dd,ddd | false | false |
|
28
|
+
# | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
|
29
|
+
# | で | で | ADP | 助詞-格助詞 | case | x | true | true |
|
30
|
+
# | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT | xx | true | false |
|
31
|
+
# | し | する | AUX | 動詞-非自立可能 | aux | x | true | true |
|
32
|
+
# | た | た | AUX | 助動詞 | aux | x | true | true |
|
33
|
+
# | 。 | 。 | PUNCT | 補助記号-句点 | punct | 。 | false | false |
|
34
|
+
# +------------+------------+-------+--------------------------+--------+--------+----------+---------+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
|
3
|
+
nlp = Spacy::Language.new("ja_core_news_sm")
|
4
|
+
|
5
|
+
doc = nlp.read("これは文です。今私は「これは文です」と言いました。")
|
6
|
+
|
7
|
+
|
8
|
+
puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
|
9
|
+
|
10
|
+
doc.sents.each do |sent|
|
11
|
+
puts sent.text
|
12
|
+
end
|
13
|
+
|
14
|
+
# doc has annotation SENT_START: true
|
15
|
+
# これは文です。
|
16
|
+
# 今私は「これは文です」と言いました。
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
|
3
|
+
nlp = Spacy::Language.new("ja_core_news_lg")
|
4
|
+
ja_doc1 = nlp.read("今日は雨ばっかり降って、嫌な天気ですね。")
|
5
|
+
puts "doc1: #{ja_doc1.text}"
|
6
|
+
ja_doc2 = nlp.read("あいにくの悪天候で残念です。")
|
7
|
+
puts "doc2: #{ja_doc2.text}"
|
8
|
+
puts "Similarity: #{ja_doc1.similarity(ja_doc2)}"
|
9
|
+
|
10
|
+
# doc1: 今日は雨ばっかり降って、嫌な天気ですね。
|
11
|
+
# doc2: あいにくの悪天候で残念です。
|
12
|
+
# Similarity: 0.8684192637149641
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("ja_core_news_sm")
|
5
|
+
|
6
|
+
doc = nlp.read("アップルはイギリスの新興企業を10億ドルで買収しようとしている。")
|
7
|
+
|
8
|
+
headings = ["text"]
|
9
|
+
rows = []
|
10
|
+
|
11
|
+
doc.each do |token|
|
12
|
+
rows << [token.text]
|
13
|
+
end
|
14
|
+
|
15
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
16
|
+
puts table
|
17
|
+
|
18
|
+
# +----------+
|
19
|
+
# | text |
|
20
|
+
# +----------+
|
21
|
+
# | アップル |
|
22
|
+
# | は |
|
23
|
+
# | イギリス |
|
24
|
+
# | の |
|
25
|
+
# | 新興 |
|
26
|
+
# | 企業 |
|
27
|
+
# | を |
|
28
|
+
# | 10億 |
|
29
|
+
# | ドル |
|
30
|
+
# | で |
|
31
|
+
# | 買収 |
|
32
|
+
# | しよう |
|
33
|
+
# | と |
|
34
|
+
# | し |
|
35
|
+
# | て |
|
36
|
+
# | いる |
|
37
|
+
# | 。 |
|
38
|
+
# +----------+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("ja_core_news_sm")
|
5
|
+
|
6
|
+
sentence = "自動運転車は保険責任を製造者に転嫁する。"
|
7
|
+
doc = nlp.read(sentence)
|
8
|
+
|
9
|
+
dep_svg = doc.displacy('dep', false)
|
10
|
+
|
11
|
+
File.open(File.join(File.dirname(__FILE__), "outputs/test_dep.svg"), "w") do |file|
|
12
|
+
file.write(dep_svg)
|
13
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
|
+
|
6
|
+
sentence ="セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
|
7
|
+
|
8
|
+
doc = nlp.read(sentence)
|
9
|
+
|
10
|
+
ent_html = doc.displacy('ent')
|
11
|
+
|
12
|
+
File.open(File.join(File.dirname(__FILE__), "outputs/test_ent.html"), "w") do |file|
|
13
|
+
file.write(ent_html)
|
14
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
sentence = "Credit and mortgage account holders must submit their requests"
|
7
|
+
doc = nlp.read(sentence)
|
8
|
+
|
9
|
+
headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
|
10
|
+
rows = []
|
11
|
+
|
12
|
+
root = doc.tokens.select do |t|
|
13
|
+
# need to compare token and its head using their indices
|
14
|
+
t.i == t.head.i
|
15
|
+
end.first
|
16
|
+
|
17
|
+
puts "The sentence: " + sentence
|
18
|
+
|
19
|
+
subject = Spacy::Token.new(root.lefts[0])
|
20
|
+
|
21
|
+
puts "The root of the sentence is: " + root.text
|
22
|
+
puts "The subject of the sentence is: " + subject.text
|
23
|
+
|
24
|
+
subject.subtree.each do |descendant|
|
25
|
+
# need to convert "ancestors" object from a python generator to a ruby array
|
26
|
+
ancestors = Spacy::generator_to_array(descendant.ancestors)
|
27
|
+
rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
|
28
|
+
end
|
29
|
+
|
30
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
31
|
+
print table
|
32
|
+
|
33
|
+
# +----------+----------+---------+----------+------------------------------------+
|
34
|
+
# | text | dep | n_lefts | n_rights | ancestors |
|
35
|
+
# +----------+----------+---------+----------+------------------------------------+
|
36
|
+
# | Credit | nmod | 0 | 2 | [holders, submit] |
|
37
|
+
# | and | cc | 0 | 0 | [Credit, holders, submit] |
|
38
|
+
# | mortgage | compound | 0 | 0 | [account, Credit, holders, submit] |
|
39
|
+
# | account | conj | 1 | 0 | [Credit, holders, submit] |
|
40
|
+
# | holders | nsubj | 1 | 0 | [submit] |
|
41
|
+
# +----------+----------+---------+----------+------------------------------------+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
sentence = "San Francisco considers banning sidewalk delivery robots"
|
7
|
+
doc = nlp.read(sentence)
|
8
|
+
|
9
|
+
headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
|
10
|
+
rows = []
|
11
|
+
|
12
|
+
doc.each do |ent|
|
13
|
+
rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
|
14
|
+
end
|
15
|
+
|
16
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
17
|
+
print table
|
18
|
+
|
19
|
+
# +-----------+---------+----------+-----------+
|
20
|
+
# | text | ent_iob | ent_iob_ | ent_type_ |
|
21
|
+
# +-----------+---------+----------+-----------+
|
22
|
+
# | San | 3 | B | GPE |
|
23
|
+
# | Francisco | 1 | I | GPE |
|
24
|
+
# | considers | 2 | O | |
|
25
|
+
# | banning | 2 | O | |
|
26
|
+
# | sidewalk | 2 | O | |
|
27
|
+
# | delivery | 2 | O | |
|
28
|
+
# | robots | 2 | O | |
|
29
|
+
# +-----------+---------+----------+-----------+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
|
+
|
8
|
+
|
9
|
+
results = []
|
10
|
+
|
11
|
+
doc.each do |token|
|
12
|
+
if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
13
|
+
results << token.head
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
puts results.to_s
|
18
|
+
|
19
|
+
# [shift]
|
20
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
nlp.add_pipe("merge_entities")
|
7
|
+
nlp.add_pipe("merge_noun_chunks")
|
8
|
+
|
9
|
+
sentence = "Credit and mortgage account holders must submit their requests"
|
10
|
+
doc = nlp.read(sentence)
|
11
|
+
|
12
|
+
texts = [
|
13
|
+
"Net income was $9.4 million compared to the prior year of $2.7 million.",
|
14
|
+
"Revenue exceeded twelve billion dollars, with a loss of $1b.",
|
15
|
+
]
|
16
|
+
|
17
|
+
texts.each do |text|
|
18
|
+
doc = nlp.read(text)
|
19
|
+
doc.each do |token|
|
20
|
+
if token.ent_type_ == "MONEY"
|
21
|
+
if ["attr", "dobj"].index token.dep_
|
22
|
+
subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep_ == "nsubj"}
|
23
|
+
if !subj.empty?
|
24
|
+
puts(subj[0].text + " --> " + token.text)
|
25
|
+
end
|
26
|
+
elsif token.dep_ == "pobj" and token.head.dep_ == "prep"
|
27
|
+
puts token.head.head.text + " --> " + token.text
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Net income --> $9.4 million
|
34
|
+
# the prior year --> $2.7 million
|
35
|
+
# Revenue --> twelve billion dollars
|
36
|
+
# a loss --> 1b
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
|
+
|
8
|
+
|
9
|
+
results = []
|
10
|
+
|
11
|
+
doc.each do |token|
|
12
|
+
if token.pos_ == "VERB"
|
13
|
+
token.children.each do |child|
|
14
|
+
if child.dep_ == "nsubj"
|
15
|
+
results << child.head
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
puts results.to_s
|
22
|
+
|
23
|
+
# [shift]
|
24
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
doc = nlp.read("bright red apples on the tree")
|
7
|
+
|
8
|
+
puts "Text: " + doc
|
9
|
+
|
10
|
+
puts "Words to the left of 'apple': " + Spacy.generator_to_array(doc[2].lefts).to_s
|
11
|
+
puts "Words to the right of 'apple': " + Spacy.generator_to_array(doc[2].rights).to_s
|
12
|
+
|
13
|
+
puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
|
14
|
+
puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
|
15
|
+
|
16
|
+
# Text: bright red apples on the tree
|
17
|
+
# Words to the left of 'apple': [bright, red]
|
18
|
+
# Words to the right of 'apple': [on]
|
19
|
+
# Num of the words to the left of 'apple': 2
|
20
|
+
# Num of the words to the right of 'apple': 1
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
+
puts "Lemmatizer mode: " + lemmatizer.mode
|
8
|
+
|
9
|
+
doc = nlp.read("I was reading the paper.")
|
10
|
+
|
11
|
+
headings = ["lemma"]
|
12
|
+
rows = []
|
13
|
+
|
14
|
+
doc.each do |token|
|
15
|
+
rows << [token.lemma_]
|
16
|
+
end
|
17
|
+
|
18
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
19
|
+
puts table
|
20
|
+
|
21
|
+
# Lemmatizer mode: rule
|
22
|
+
# +-------+
|
23
|
+
# | lemma |
|
24
|
+
# +-------+
|
25
|
+
# | I |
|
26
|
+
# | be |
|
27
|
+
# | read |
|
28
|
+
# | the |
|
29
|
+
# | paper |
|
30
|
+
# | . |
|
31
|
+
# +-------+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
puts "Pipeline: " + nlp.pipe_names.to_s
|
7
|
+
|
8
|
+
doc = nlp.read("I was reading the paper.")
|
9
|
+
|
10
|
+
token = doc[0]
|
11
|
+
|
12
|
+
puts "Morph features of the first word: " + token.morph.to_s
|
13
|
+
puts "PronType of the word: " + token.morph.get("PronType").to_s
|
14
|
+
|
15
|
+
# Pipeline: ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"]
|
16
|
+
# Morph features of the first word: Case=Nom|Number=Sing|Person=1|PronType=Prs
|
17
|
+
# PronType of the word: ['Prs']
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
sentence = "Apple is looking at buying U.K. startup for $1 billion"
|
7
|
+
doc = nlp.read(sentence)
|
8
|
+
|
9
|
+
headings = ["text", "start", "end", "label"]
|
10
|
+
rows = []
|
11
|
+
|
12
|
+
doc.ents.each do |ent|
|
13
|
+
rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
|
14
|
+
end
|
15
|
+
|
16
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
17
|
+
puts table
|
18
|
+
|
19
|
+
# +------------+-------+-----+-------+
|
20
|
+
# | text | start | end | label |
|
21
|
+
# +------------+-------+-----+-------+
|
22
|
+
# | Apple | 0 | 5 | ORG |
|
23
|
+
# | U.K. | 27 | 31 | GPE |
|
24
|
+
# | $1 billion | 44 | 54 | MONEY |
|
25
|
+
# +------------+-------+-----+-------+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
+
puts "Lemmatizer mode: " + lemmatizer.mode
|
8
|
+
|
9
|
+
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
|
+
|
11
|
+
headings = ["text", "dep", "head text", "head pos", "children"]
|
12
|
+
rows = []
|
13
|
+
|
14
|
+
doc.each do |token|
|
15
|
+
rows << [token.text, token.dep_, token.head.text, token.head.pos_, token.children.to_s]
|
16
|
+
end
|
17
|
+
|
18
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
19
|
+
puts table
|
20
|
+
|
21
|
+
# Lemmatizer mode: rule
|
22
|
+
# +---------------+----------+-----------+----------+---------------------------+
|
23
|
+
# | text | dep | head text | head pos | children |
|
24
|
+
# +---------------+----------+-----------+----------+---------------------------+
|
25
|
+
# | Autonomous | amod | cars | NOUN | [] |
|
26
|
+
# | cars | nsubj | shift | VERB | [Autonomous] |
|
27
|
+
# | shift | ROOT | shift | VERB | [cars, liability, toward] |
|
28
|
+
# | insurance | compound | liability | NOUN | [] |
|
29
|
+
# | liability | dobj | shift | VERB | [insurance] |
|
30
|
+
# | toward | prep | shift | VERB | [manufacturers] |
|
31
|
+
# | manufacturers | pobj | toward | ADP | [] |
|
32
|
+
# +---------------+----------+-----------+----------+---------------------------+
|