ruby-spacy 0.1.4.1 → 0.1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/Gemfile +7 -7
- data/Gemfile.lock +2 -2
- data/README.md +7 -10
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +3 -1
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +3 -1
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +139 -141
- data/ruby-spacy.gemspec +15 -17
- data/tags +132 -0
- metadata +69 -10
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "Credit and mortgage account holders must submit their requests"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
|
21
|
-
puts "The root of the sentence is:
|
22
|
-
puts "The subject of the sentence is:
|
23
|
+
puts "The root of the sentence is: #{root.text}"
|
24
|
+
puts "The subject of the sentence is: #{subject.text}"
|
23
25
|
|
24
26
|
subject.subtree.each do |descendant|
|
25
27
|
# need to convert "ancestors" object from a python generator to a ruby array
|
26
|
-
ancestors = Spacy
|
28
|
+
ancestors = Spacy.generator_to_array(descendant.ancestors)
|
27
29
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
|
28
30
|
end
|
29
31
|
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
|
-
sentence = "San Francisco considers banning sidewalk delivery robots"
|
8
|
+
sentence = "San Francisco considers banning sidewalk delivery robots"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
9
|
|
8
|
-
|
9
10
|
results = []
|
10
11
|
|
11
12
|
doc.each do |token|
|
12
|
-
if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
13
|
-
results << token.head.text
|
14
|
-
end
|
13
|
+
results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
15
14
|
end
|
16
15
|
|
17
16
|
puts results.to_s
|
18
17
|
|
19
18
|
# ["shift"]
|
20
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
|
|
10
12
|
doc = nlp.read(sentence)
|
11
13
|
|
12
14
|
texts = [
|
13
|
-
|
14
|
-
|
15
|
+
"Net income was $9.4 million compared to the prior year of $2.7 million.",
|
16
|
+
"Revenue exceeded twelve billion dollars, with a loss of $1b."
|
15
17
|
]
|
16
18
|
|
17
19
|
texts.each do |text|
|
18
20
|
doc = nlp.read(text)
|
19
21
|
doc.each do |token|
|
20
22
|
if token.ent_type_ == "MONEY"
|
21
|
-
if [
|
22
|
-
subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
elsif token.dep_ == "pobj" and token.head.dep == "prep"
|
27
|
-
puts token.head.head.text + " --> " + token.text
|
23
|
+
if %w[attr dobj].index token.dep_
|
24
|
+
subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
|
25
|
+
puts("#{subj[0].text} --> #{token.text}") unless subj.empty?
|
26
|
+
elsif token.dep_ == "pobj" && token.head.dep == "prep"
|
27
|
+
puts "#{token.head.head.text} --> #{token.text}"
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
9
|
|
8
|
-
|
9
10
|
results = []
|
10
11
|
|
11
12
|
doc.each do |token|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
13
|
+
next unless token.pos_ == "VERB"
|
14
|
+
|
15
|
+
token.children.each do |child|
|
16
|
+
results << child.head.text if child.dep_ == "nsubj"
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
21
20
|
puts results.to_s
|
22
21
|
|
23
22
|
# ["shift"]
|
24
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("bright red apples on the tree")
|
7
9
|
|
8
|
-
puts "Text:
|
10
|
+
puts "Text: #{doc.text}"
|
9
11
|
|
10
|
-
puts "Words to the left of 'apple':
|
11
|
-
puts "Words to the right of 'apple':
|
12
|
+
puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
|
13
|
+
puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
|
12
14
|
|
13
|
-
puts "Num of the words to the left of 'apple':
|
14
|
-
puts "Num of the words to the right of 'apple':
|
15
|
+
puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
|
16
|
+
puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
|
15
17
|
|
16
18
|
# Text: bright red apples on the tree
|
17
19
|
# Words to the left of 'apple': bright, red
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("I was reading the paper.")
|
10
12
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "Apple is looking at buying U.K. startup for $1 billion"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text start end label]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
12
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
12
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep shape is_alpha is_stop]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "I live in New York"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
puts "Before:
|
11
|
+
puts "Before: #{doc.tokens.map(&:text).join(", ")}"
|
10
12
|
|
11
13
|
doc.retokenize(3, 4)
|
12
14
|
|
13
|
-
puts "After:
|
15
|
+
puts "After: #{doc.tokens.map(&:text).join(", ")}"
|
14
16
|
|
15
17
|
# Before: I, live, in, New, York
|
16
18
|
# After: I, live, in, New York
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Where are you?")
|
7
9
|
|
8
|
-
puts "Morph features of the third word:
|
9
|
-
puts "POS of the third word:
|
10
|
+
puts "Morph features of the third word: #{doc[2].morph}"
|
11
|
+
puts "POS of the third word: #{doc[2].pos}"
|
10
12
|
|
11
13
|
# Morph features of the third word: Case=Nom|Person=2|PronType=Prs
|
12
14
|
# POS of the third word: PRON
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
7
|
doc = nlp.read("This is a sentence. This is another sentence.")
|
6
8
|
|
7
|
-
|
8
|
-
puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
|
9
|
+
puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
|
9
10
|
|
10
11
|
doc.sents.each do |sent|
|
11
12
|
puts sent.text
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
7
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
8
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
9
|
|
8
|
-
puts "Doc 1:
|
9
|
-
puts "Doc 2:
|
10
|
+
puts "Doc 1: #{doc1.text}"
|
11
|
+
puts "Doc 2: #{doc2.text}"
|
10
12
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
13
|
|
12
14
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
7
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
8
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
9
|
|
8
|
-
puts "Doc 1:
|
9
|
-
puts "Doc 2:
|
10
|
+
puts "Doc 1: #{doc1.text}"
|
11
|
+
puts "Doc 2: #{doc2.text}"
|
10
12
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
13
|
|
12
14
|
span1 = doc1.span(2, 2) # salty fries
|
13
|
-
span2 = doc1.span(5
|
14
|
-
puts "Span 1:
|
15
|
-
puts "Span 2:
|
15
|
+
span2 = doc1.span(5..5) # hamberger
|
16
|
+
puts "Span 1: #{span1.text}"
|
17
|
+
puts "Span 2: #{span2.text}"
|
16
18
|
puts "Similarity: #{span1.similarity(span2)}"
|
17
19
|
|
18
20
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -20,4 +22,3 @@ puts table
|
|
20
22
|
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
21
23
|
# | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
22
24
|
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
23
|
-
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
matcher = nlp.matcher
|
6
|
-
matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
|
8
|
+
matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
|
7
9
|
doc = nlp.read("Barack Obama was the 44th president of the United States")
|
8
10
|
|
9
11
|
matches = matcher.match(doc)
|
10
12
|
|
11
13
|
matches.each do |match|
|
12
|
-
span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
|
13
|
-
puts span.text
|
14
|
+
span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
|
15
|
+
puts "#{span.text} / #{span.label}"
|
14
16
|
end
|
15
17
|
|
16
18
|
# Barack Obama / US_PRESIDENT
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
|
7
|
+
pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
|
6
8
|
|
7
9
|
matcher = nlp.matcher
|
8
10
|
matcher.add("HelloWorld", pattern)
|
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
|
|
10
12
|
doc = nlp.read("Hello, world! Hello world!")
|
11
13
|
matches = matcher.match(doc)
|
12
14
|
|
13
|
-
matches.each do |
|
15
|
+
matches.each do |match|
|
14
16
|
string_id = nlp.vocab_string_lookup(match[:match_id])
|
15
17
|
span = doc.span(match[:start_index]..match[:end_index])
|
16
18
|
puts "#{string_id}, #{span.text}"
|
data/lib/ruby-spacy/version.rb
CHANGED