ruby-spacy 0.1.4.1 → 0.1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/Gemfile +7 -7
- data/Gemfile.lock +2 -2
- data/README.md +7 -10
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +3 -1
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +3 -1
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +139 -141
- data/ruby-spacy.gemspec +15 -17
- data/tags +132 -0
- metadata +69 -10
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "Credit and mortgage account holders must submit their requests"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
|
21
|
-
puts "The root of the sentence is:
|
22
|
-
puts "The subject of the sentence is:
|
23
|
+
puts "The root of the sentence is: #{root.text}"
|
24
|
+
puts "The subject of the sentence is: #{subject.text}"
|
23
25
|
|
24
26
|
subject.subtree.each do |descendant|
|
25
27
|
# need to convert "ancestors" object from a python generator to a ruby array
|
26
|
-
ancestors = Spacy
|
28
|
+
ancestors = Spacy.generator_to_array(descendant.ancestors)
|
27
29
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
|
28
30
|
end
|
29
31
|
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
|
-
sentence = "San Francisco considers banning sidewalk delivery robots"
|
8
|
+
sentence = "San Francisco considers banning sidewalk delivery robots"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
9
|
|
8
|
-
|
9
10
|
results = []
|
10
11
|
|
11
12
|
doc.each do |token|
|
12
|
-
if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
13
|
-
results << token.head.text
|
14
|
-
end
|
13
|
+
results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
|
15
14
|
end
|
16
15
|
|
17
16
|
puts results.to_s
|
18
17
|
|
19
18
|
# ["shift"]
|
20
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
|
|
10
12
|
doc = nlp.read(sentence)
|
11
13
|
|
12
14
|
texts = [
|
13
|
-
|
14
|
-
|
15
|
+
"Net income was $9.4 million compared to the prior year of $2.7 million.",
|
16
|
+
"Revenue exceeded twelve billion dollars, with a loss of $1b."
|
15
17
|
]
|
16
18
|
|
17
19
|
texts.each do |text|
|
18
20
|
doc = nlp.read(text)
|
19
21
|
doc.each do |token|
|
20
22
|
if token.ent_type_ == "MONEY"
|
21
|
-
if [
|
22
|
-
subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
elsif token.dep_ == "pobj" and token.head.dep == "prep"
|
27
|
-
puts token.head.head.text + " --> " + token.text
|
23
|
+
if %w[attr dobj].index token.dep_
|
24
|
+
subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
|
25
|
+
puts("#{subj[0].text} --> #{token.text}") unless subj.empty?
|
26
|
+
elsif token.dep_ == "pobj" && token.head.dep == "prep"
|
27
|
+
puts "#{token.head.head.text} --> #{token.text}"
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
7
9
|
|
8
|
-
|
9
10
|
results = []
|
10
11
|
|
11
12
|
doc.each do |token|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
13
|
+
next unless token.pos_ == "VERB"
|
14
|
+
|
15
|
+
token.children.each do |child|
|
16
|
+
results << child.head.text if child.dep_ == "nsubj"
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
21
20
|
puts results.to_s
|
22
21
|
|
23
22
|
# ["shift"]
|
24
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("bright red apples on the tree")
|
7
9
|
|
8
|
-
puts "Text:
|
10
|
+
puts "Text: #{doc.text}"
|
9
11
|
|
10
|
-
puts "Words to the left of 'apple':
|
11
|
-
puts "Words to the right of 'apple':
|
12
|
+
puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
|
13
|
+
puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
|
12
14
|
|
13
|
-
puts "Num of the words to the left of 'apple':
|
14
|
-
puts "Num of the words to the right of 'apple':
|
15
|
+
puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
|
16
|
+
puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
|
15
17
|
|
16
18
|
# Text: bright red apples on the tree
|
17
19
|
# Words to the left of 'apple': bright, red
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("I was reading the paper.")
|
10
12
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "Apple is looking at buying U.K. startup for $1 billion"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text start end label]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
12
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
|
6
8
|
lemmatizer = nlp.get_pipe("lemmatizer")
|
7
|
-
puts "Lemmatizer mode:
|
9
|
+
puts "Lemmatizer mode: #{lemmatizer.mode}"
|
8
10
|
|
9
11
|
doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
|
10
12
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep shape is_alpha is_stop]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
6
8
|
sentence = "I live in New York"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
puts "Before:
|
11
|
+
puts "Before: #{doc.tokens.map(&:text).join(", ")}"
|
10
12
|
|
11
13
|
doc.retokenize(3, 4)
|
12
14
|
|
13
|
-
puts "After:
|
15
|
+
puts "After: #{doc.tokens.map(&:text).join(", ")}"
|
14
16
|
|
15
17
|
# Before: I, live, in, New, York
|
16
18
|
# After: I, live, in, New York
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Where are you?")
|
7
9
|
|
8
|
-
puts "Morph features of the third word:
|
9
|
-
puts "POS of the third word:
|
10
|
+
puts "Morph features of the third word: #{doc[2].morph}"
|
11
|
+
puts "POS of the third word: #{doc[2].pos}"
|
10
12
|
|
11
13
|
# Morph features of the third word: Case=Nom|Person=2|PronType=Prs
|
12
14
|
# POS of the third word: PRON
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
7
|
doc = nlp.read("This is a sentence. This is another sentence.")
|
6
8
|
|
7
|
-
|
8
|
-
puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
|
9
|
+
puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
|
9
10
|
|
10
11
|
doc.sents.each do |sent|
|
11
12
|
puts sent.text
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
7
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
8
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
9
|
|
8
|
-
puts "Doc 1:
|
9
|
-
puts "Doc 2:
|
10
|
+
puts "Doc 1: #{doc1.text}"
|
11
|
+
puts "Doc 2: #{doc2.text}"
|
10
12
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
13
|
|
12
14
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
|
|
5
7
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
6
8
|
doc2 = nlp.read("Fast food tastes very good.")
|
7
9
|
|
8
|
-
puts "Doc 1:
|
9
|
-
puts "Doc 2:
|
10
|
+
puts "Doc 1: #{doc1.text}"
|
11
|
+
puts "Doc 2: #{doc2.text}"
|
10
12
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
11
13
|
|
12
14
|
span1 = doc1.span(2, 2) # salty fries
|
13
|
-
span2 = doc1.span(5
|
14
|
-
puts "Span 1:
|
15
|
-
puts "Span 2:
|
15
|
+
span2 = doc1.span(5..5) # hamberger
|
16
|
+
puts "Span 1: #{span1.text}"
|
17
|
+
puts "Span 2: #{span2.text}"
|
16
18
|
puts "Similarity: #{span1.similarity(span2)}"
|
17
19
|
|
18
20
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -20,4 +22,3 @@ puts table
|
|
20
22
|
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
21
23
|
# | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
22
24
|
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
23
|
-
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
matcher = nlp.matcher
|
6
|
-
matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
|
8
|
+
matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
|
7
9
|
doc = nlp.read("Barack Obama was the 44th president of the United States")
|
8
10
|
|
9
11
|
matches = matcher.match(doc)
|
10
12
|
|
11
13
|
matches.each do |match|
|
12
|
-
span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
|
13
|
-
puts span.text
|
14
|
+
span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
|
15
|
+
puts "#{span.text} / #{span.label}"
|
14
16
|
end
|
15
17
|
|
16
18
|
# Barack Obama / US_PRESIDENT
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
|
7
|
+
pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
|
6
8
|
|
7
9
|
matcher = nlp.matcher
|
8
10
|
matcher.add("HelloWorld", pattern)
|
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
|
|
10
12
|
doc = nlp.read("Hello, world! Hello world!")
|
11
13
|
matches = matcher.match(doc)
|
12
14
|
|
13
|
-
matches.each do |
|
15
|
+
matches.each do |match|
|
14
16
|
string_id = nlp.vocab_string_lookup(match[:match_id])
|
15
17
|
span = doc.span(match[:start_index]..match[:end_index])
|
16
18
|
puts "#{string_id}, #{span.text}"
|
data/lib/ruby-spacy/version.rb
CHANGED