ruby-spacy 0.1.4 → 0.1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/CHANGELOG.md +5 -1
- data/Gemfile +7 -7
- data/Gemfile.lock +3 -3
- data/README.md +40 -39
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +30 -27
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +30 -27
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +142 -136
- data/ruby-spacy.gemspec +15 -17
- data/tags +132 -0
- metadata +69 -10
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,38 +11,39 @@ france = nlp.get_lexeme("フランス")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
16
|
-
results.
|
17
|
-
|
18
|
+
results.each_with_index do |lexeme, i|
|
19
|
+
index = (i + 1).to_s
|
20
|
+
rows << [index, lexeme.text, lexeme.score]
|
18
21
|
end
|
19
22
|
|
20
23
|
table = Terminal::Table.new rows: rows, headings: headings
|
21
24
|
puts table
|
22
25
|
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
# |
|
28
|
-
# |
|
29
|
-
# |
|
30
|
-
# |
|
31
|
-
# |
|
32
|
-
# |
|
33
|
-
# |
|
34
|
-
# |
|
35
|
-
# |
|
36
|
-
# |
|
37
|
-
# |
|
38
|
-
# |
|
39
|
-
# |
|
40
|
-
# |
|
41
|
-
# |
|
42
|
-
# |
|
43
|
-
# |
|
44
|
-
# |
|
45
|
-
# |
|
46
|
-
#
|
26
|
+
# +------+----------------+--------------------+
|
27
|
+
# | rank | text | score |
|
28
|
+
# +------+----------------+--------------------+
|
29
|
+
# | 1 | パリ | 0.7376999855041504 |
|
30
|
+
# | 2 | フランス | 0.7221999764442444 |
|
31
|
+
# | 3 | 東京 | 0.6697999835014343 |
|
32
|
+
# | 4 | ストラスブール | 0.631600022315979 |
|
33
|
+
# | 5 | リヨン | 0.5939000248908997 |
|
34
|
+
# | 6 | Paris | 0.574400007724762 |
|
35
|
+
# | 7 | ベルギー | 0.5683000087738037 |
|
36
|
+
# | 8 | ニース | 0.5679000020027161 |
|
37
|
+
# | 9 | アルザス | 0.5644999742507935 |
|
38
|
+
# | 10 | 南仏 | 0.5547999739646912 |
|
39
|
+
# | 11 | ロンドン | 0.5525000095367432 |
|
40
|
+
# | 12 | モンマルトル | 0.5453000068664551 |
|
41
|
+
# | 13 | ブローニュ | 0.5338000059127808 |
|
42
|
+
# | 14 | トゥールーズ | 0.5275999903678894 |
|
43
|
+
# | 15 | バスティーユ | 0.5213000178337097 |
|
44
|
+
# | 16 | フランス人 | 0.5194000005722046 |
|
45
|
+
# | 17 | ロレーヌ | 0.5148000121116638 |
|
46
|
+
# | 18 | モンパルナス | 0.513700008392334 |
|
47
|
+
# | 19 | 渡仏 | 0.5131000280380249 |
|
48
|
+
# | 20 | イタリア | 0.5127000212669373 |
|
49
|
+
# +------+----------------+--------------------+
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "任天堂は1983年にファミコンを14,800円で発売した。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text start end label]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.ents.each do |ent|
|
@@ -24,4 +26,3 @@ puts table
|
|
24
26
|
# | ファミコン | 10 | 15 | PRODUCT |
|
25
27
|
# | 14,800円 | 16 | 23 | MONEY |
|
26
28
|
# +------------+-------+-----+---------+
|
27
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -15,20 +17,20 @@ end
|
|
15
17
|
table = Terminal::Table.new rows: rows, headings: headings
|
16
18
|
puts table
|
17
19
|
|
18
|
-
+------+----------+-----------+----------+------------------------+
|
19
|
-
| text | dep | head text | head pos | children |
|
20
|
-
+------+----------+-----------+----------+------------------------+
|
21
|
-
| 自動 | compound | 車 | 92 | |
|
22
|
-
| 運転 | compound | 車 | 92 | |
|
23
|
-
| 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
|
24
|
-
| は | case | 車 | 92 | |
|
25
|
-
| 保険 | compound | 責任 | 92 | |
|
26
|
-
| 責任 | obj | 転嫁 | 100 | 保険, を |
|
27
|
-
| を | case | 責任 | 92 | |
|
28
|
-
| 製造 | compound | 者 | 92 | |
|
29
|
-
| 者 | obl | 転嫁 | 100 | 製造, に |
|
30
|
-
| に | case | 者 | 92 | |
|
31
|
-
| 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
|
32
|
-
| する | aux | 転嫁 | 100 | |
|
33
|
-
| 。 | punct | 転嫁 | 100 | |
|
34
|
-
+------+----------+-----------+----------+------------------------+
|
20
|
+
# +------+----------+-----------+----------+------------------------+
|
21
|
+
# | text | dep | head text | head pos | children |
|
22
|
+
# +------+----------+-----------+----------+------------------------+
|
23
|
+
# | 自動 | compound | 車 | 92 | |
|
24
|
+
# | 運転 | compound | 車 | 92 | |
|
25
|
+
# | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
|
26
|
+
# | は | case | 車 | 92 | |
|
27
|
+
# | 保険 | compound | 責任 | 92 | |
|
28
|
+
# | 責任 | obj | 転嫁 | 100 | 保険, を |
|
29
|
+
# | を | case | 責任 | 92 | |
|
30
|
+
# | 製造 | compound | 者 | 92 | |
|
31
|
+
# | 者 | obl | 転嫁 | 100 | 製造, に |
|
32
|
+
# | に | case | 者 | 92 | |
|
33
|
+
# | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
|
34
|
+
# | する | aux | 転嫁 | 100 | |
|
35
|
+
# | 。 | punct | 転嫁 | 100 | |
|
36
|
+
# +------+----------+-----------+----------+------------------------+
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("ja_core_news_sm")
|
4
6
|
|
5
7
|
doc = nlp.read("これは文です。今私は「これは文です」と言いました。")
|
6
8
|
|
7
|
-
|
8
|
-
puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
|
9
|
+
puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
|
9
10
|
|
10
11
|
doc.sents.each do |sent|
|
11
12
|
puts sent.text
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
6
8
|
sentence = "自動運転車は保険責任を製造者に転嫁する。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
dep_svg = doc.displacy(style:
|
11
|
+
dep_svg = doc.displacy(style: "dep", compact: false)
|
10
12
|
|
11
13
|
File.open(File.join(File.dirname(__FILE__), "test_dep.svg"), "w") do |file|
|
12
14
|
file.write(dep_svg)
|
@@ -1,13 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
|
6
|
-
sentence ="セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
|
8
|
+
sentence = "セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
|
7
9
|
|
8
10
|
doc = nlp.read(sentence)
|
9
11
|
|
10
|
-
ent_html = doc.displacy(style:
|
12
|
+
ent_html = doc.displacy(style: "ent")
|
11
13
|
|
12
14
|
File.open(File.join(File.dirname(__FILE__), "outputs/test_ent.html"), "w") do |file|
|
13
15
|
file.write(ent_html)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "Credit and mortgage account holders must submit their requests"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
|
21
|
-
puts "The root of the sentence is:
|
22
|
-
puts "The subject of the sentence is:
|
23
|
+
puts "The root of the sentence is: #{root.text}"
|
24
|
+
puts "The subject of the sentence is: #{subject.text}"
|
23
25
|
|
24
26
|
subject.subtree.each do |descendant|
|
25
27
|
# need to convert "ancestors" object from a python generator to a ruby array
|
26
|
-
ancestors = Spacy
|
28
|
+
ancestors = Spacy.generator_to_array(descendant.ancestors)
|
27
29
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
|
28
30
|
end
|
29
31
|
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
|
-
sentence = "San Francisco considers banning sidewalk delivery robots"
|
8
|
+
sentence = "San Francisco considers banning sidewalk delivery robots"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
9
|
|
8
|
-
|
9
10
|
results = []
|
10
11
|
|
11
12
|
doc.each do |token|
|
12
|
-
if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
13
|
-
results << token.head.text
|
14
|
-
end
|
13
|
+
results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
15
14
|
end
|
16
15
|
|
17
16
|
puts results.to_s
|
18
17
|
|
19
18
|
# ["shift"]
|
20
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
|
|
10
12
|
doc = nlp.read(sentence)
|
11
13
|
|
12
14
|
texts = [
|
13
|
-
|
14
|
-
|
15
|
+
"Net income was $9.4 million compared to the prior year of $2.7 million.",
|
16
|
+
"Revenue exceeded twelve billion dollars, with a loss of $1b."
|
15
17
|
]
|
16
18
|
|
17
19
|
texts.each do |text|
|
18
20
|
doc = nlp.read(text)
|
19
21
|
doc.each do |token|
|
20
22
|
if token.ent_type_ == "MONEY"
|
21
|
-
if [
|
22
|
-
subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
elsif token.dep_ == "pobj" and token.head.dep == "prep"
|
27
|
-
puts token.head.head.text + " --> " + token.text
|
23
|
+
if %w[attr dobj].index token.dep_
|
24
|
+
subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
|
25
|
+
puts("#{subj[0].text} --> #{token.text}") unless subj.empty?
|
26
|
+
elsif token.dep_ == "pobj" && token.head.dep == "prep"
|
27
|
+
puts "#{token.head.head.text} --> #{token.text}"
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
9
|
|
8
|
-
|
9
10
|
results = []
|
10
11
|
|
11
12
|
doc.each do |token|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
13
|
+
next unless token.pos_ == "VERB"
|
14
|
+
|
15
|
+
token.children.each do |child|
|
16
|
+
results << child.head.text if child.dep_ == "nsubj"
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
21
20
|
puts results.to_s
|
22
21
|
|
23
22
|
# ["shift"]
|
24
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("bright red apples on the tree")
|
7
9
|
|
8
|
-
puts "Text:
|
10
|
+
puts "Text: #{doc.text}"
|
9
11
|
|
10
|
-
puts "Words to the left of 'apple':
|
11
|
-
puts "Words to the right of 'apple':
|
12
|
+
puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
|
13
|
+
puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
|
12
14
|
|
13
|
-
puts "Num of the words to the left of 'apple':
|
14
|
-
puts "Num of the words to the right of 'apple':
|
15
|
+
puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
|
16
|
+
puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
|
15
17
|
|
16
18
|
# Text: bright red apples on the tree
|
17
19
|
# Words to the left of 'apple': bright, red
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("I was reading the paper.")
|
10
12
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "Apple is looking at buying U.K. startup for $1 billion"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text start end label]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
12
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
12
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep shape is_alpha is_stop]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "I live in New York"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
puts "Before:
|
11
|
+
puts "Before: #{doc.tokens.map(&:text).join(", ")}"
|
10
12
|
|
11
13
|
doc.retokenize(3, 4)
|
12
14
|
|
13
|
-
puts "After:
|
15
|
+
puts "After: #{doc.tokens.map(&:text).join(", ")}"
|
14
16
|
|
15
17
|
# Before: I, live, in, New, York
|
16
18
|
# After: I, live, in, New York
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Where are you?")
|
7
9
|
|
8
|
-
puts "Morph features of the third word:
|
9
|
-
puts "POS of the third word:
|
10
|
+
puts "Morph features of the third word: #{doc[2].morph}"
|
11
|
+
puts "POS of the third word: #{doc[2].pos}"
|
10
12
|
|
11
13
|
# Morph features of the third word: Case=Nom|Person=2|PronType=Prs
|
12
14
|
# POS of the third word: PRON
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
7
|
doc = nlp.read("This is a sentence. This is another sentence.")
|
6
8
|
|
7
|
-
|
8
|
-
puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
|
9
|
+
puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
|
9
10
|
|
10
11
|
doc.sents.each do |sent|
|
11
12
|
puts sent.text
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
7
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
8
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
9
|
|
8
|
-
puts "Doc 1:
|
9
|
-
puts "Doc 2:
|
10
|
+
puts "Doc 1: #{doc1.text}"
|
11
|
+
puts "Doc 2: #{doc2.text}"
|
10
12
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
13
|
|
12
14
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
7
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
8
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
9
|
|
8
|
-
puts "Doc 1:
|
9
|
-
puts "Doc 2:
|
10
|
+
puts "Doc 1: #{doc1.text}"
|
11
|
+
puts "Doc 2: #{doc2.text}"
|
10
12
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
13
|
|
12
14
|
span1 = doc1.span(2, 2) # salty fries
|
13
|
-
span2 = doc1.span(5
|
14
|
-
puts "Span 1:
|
15
|
-
puts "Span 2:
|
15
|
+
span2 = doc1.span(5..5) # hamberger
|
16
|
+
puts "Span 1: #{span1.text}"
|
17
|
+
puts "Span 2: #{span2.text}"
|
16
18
|
puts "Similarity: #{span1.similarity(span2)}"
|
17
19
|
|
18
20
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -20,4 +22,3 @@ puts table
|
|
20
22
|
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
21
23
|
# | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
22
24
|
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
23
|
-
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
matcher = nlp.matcher
|
6
|
-
matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
|
8
|
+
matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
|
7
9
|
doc = nlp.read("Barack Obama was the 44th president of the United States")
|
8
10
|
|
9
11
|
matches = matcher.match(doc)
|
10
12
|
|
11
13
|
matches.each do |match|
|
12
|
-
span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
|
13
|
-
puts span.text
|
14
|
+
span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
|
15
|
+
puts "#{span.text} / #{span.label}"
|
14
16
|
end
|
15
17
|
|
16
18
|
# Barack Obama / US_PRESIDENT
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
|
7
|
+
pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
|
6
8
|
|
7
9
|
matcher = nlp.matcher
|
8
10
|
matcher.add("HelloWorld", pattern)
|
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
|
|
10
12
|
doc = nlp.read("Hello, world! Hello world!")
|
11
13
|
matches = matcher.match(doc)
|
12
14
|
|
13
|
-
matches.each do |
|
15
|
+
matches.each do |match|
|
14
16
|
string_id = nlp.vocab_string_lookup(match[:match_id])
|
15
17
|
span = doc.span(match[:start_index]..match[:end_index])
|
16
18
|
puts "#{string_id}, #{span.text}"
|