RubyGems - ruby-spacy - Versions diffs - 0.1.4.1 → 0.1.5.0 - Mend

ruby-spacy 0.1.4.1 → 0.1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

checksums.yaml +4 -4
data/.rubocop.yml +48 -0
data/.solargraph.yml +22 -0
data/Gemfile +7 -7
data/Gemfile.lock +2 -2
data/README.md +7 -10
data/examples/get_started/lexeme.rb +3 -1
data/examples/get_started/linguistic_annotations.rb +3 -1
data/examples/get_started/morphology.rb +3 -1
data/examples/get_started/most_similar.rb +3 -1
data/examples/get_started/named_entities.rb +4 -2
data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
data/examples/get_started/similarity.rb +4 -2
data/examples/get_started/tokenization.rb +3 -1
data/examples/get_started/visualizing_dependencies.rb +2 -2
data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
data/examples/get_started/visualizing_named_entities.rb +4 -2
data/examples/get_started/vocab.rb +3 -1
data/examples/get_started/word_vectors.rb +3 -1
data/examples/japanese/ancestors.rb +6 -4
data/examples/japanese/entity_annotations_and_labels.rb +4 -2
data/examples/japanese/information_extraction.rb +6 -6
data/examples/japanese/lemmatization.rb +3 -1
data/examples/japanese/most_similar.rb +3 -1
data/examples/japanese/named_entity_recognition.rb +3 -2
data/examples/japanese/navigating_parse_tree.rb +19 -17
data/examples/japanese/noun_chunks.rb +2 -0
data/examples/japanese/pos_tagging.rb +3 -1
data/examples/japanese/sentence_segmentation.rb +3 -2
data/examples/japanese/similarity.rb +2 -0
data/examples/japanese/tokenization.rb +2 -0
data/examples/japanese/visualizing_dependencies.rb +3 -1
data/examples/japanese/visualizing_named_entities.rb +4 -2
data/examples/linguistic_features/ancestors.rb +7 -5
data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
data/examples/linguistic_features/information_extraction.rb +9 -9
data/examples/linguistic_features/iterating_children.rb +6 -8
data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
data/examples/linguistic_features/lemmatization.rb +3 -1
data/examples/linguistic_features/named_entity_recognition.rb +3 -1
data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
data/examples/linguistic_features/noun_chunks.rb +3 -1
data/examples/linguistic_features/pos_tagging.rb +3 -1
data/examples/linguistic_features/retokenize_1.rb +2 -0
data/examples/linguistic_features/retokenize_2.rb +4 -2
data/examples/linguistic_features/rule_based_morphology.rb +4 -2
data/examples/linguistic_features/sentence_segmentation.rb +3 -2
data/examples/linguistic_features/similarity.rb +4 -2
data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
data/examples/linguistic_features/similarity_between_spans.rb +7 -5
data/examples/linguistic_features/tokenization.rb +3 -2
data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
data/examples/rule_based_matching/matcher.rb +4 -2
data/lib/ruby-spacy/version.rb +1 -1
data/lib/ruby-spacy.rb +139 -141
data/ruby-spacy.gemspec +15 -17
data/tags +132 -0
metadata +69 -10

data/examples/linguistic_features/ancestors.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
 sentence = "Credit and mortgage account holders must submit their requests"
 doc = nlp.read(sentence)
-headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
+headings = %w[text dep n_lefts n_rights ancestors]
 rows = []
 root = doc.tokens.select do |t|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
   t.i == t.head.i
 end.first
-puts "The sentence: " + sentence
+puts "The sentence: #{sentence}"
 subject = Spacy::Token.new(root.lefts[0])
-puts "The root of the sentence is: " + root.text
-puts "The subject of the sentence is: " + subject.text
+puts "The root of the sentence is: #{root.text}"
+puts "The subject of the sentence is: #{subject.text}"
 subject.subtree.each do |descendant|
   # need to convert "ancestors" object from a python generator to a ruby array
-  ancestors = Spacy::generator_to_array(descendant.ancestors)
+  ancestors = Spacy.generator_to_array(descendant.ancestors)
   rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
 end

data/examples/linguistic_features/entity_annotations_and_labels.rb CHANGED Viewed

@@ -1,12 +1,14 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
 nlp = Spacy::Language.new("en_core_web_sm")
-sentence = "San Francisco considers banning sidewalk delivery robots"
+sentence = "San Francisco considers banning sidewalk delivery robots"
 doc = nlp.read(sentence)
-headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
+headings = %w[text ent_iob ent_iob_ ent_type_]
 rows = []
 doc.each do |ent|

data/examples/linguistic_features/finding_a_verb_with_a_subject.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
 results = []
 doc.each do |token|
-  if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
-    results << token.head.text
-  end
+  results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
 end
 puts results.to_s
 # ["shift"]

data/examples/linguistic_features/information_extraction.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
 doc = nlp.read(sentence)
 texts = [
-    "Net income was $9.4 million compared to the prior year of $2.7 million.",
-    "Revenue exceeded twelve billion dollars, with a loss of $1b.",
+  "Net income was $9.4 million compared to the prior year of $2.7 million.",
+  "Revenue exceeded twelve billion dollars, with a loss of $1b."
 ]
 texts.each do |text|
   doc = nlp.read(text)
   doc.each do |token|
     if token.ent_type_ == "MONEY"
-      if ["attr", "dobj"].index token.dep_
-        subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
-        if !subj.empty?
-          puts(subj[0].text + " --> " + token.text)
-        end
-      elsif token.dep_ == "pobj" and token.head.dep == "prep"
-        puts token.head.head.text + " --> " + token.text
+      if %w[attr dobj].index token.dep_
+        subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
+        puts("#{subj[0].text}  --> #{token.text}") unless subj.empty?
+      elsif token.dep_ == "pobj" && token.head.dep == "prep"
+        puts "#{token.head.head.text} --> #{token.text}"
       end
     end
   end

data/examples/linguistic_features/iterating_children.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
 results = []
 doc.each do |token|
-  if token.pos_ == "VERB"
-    token.children.each do |child|
-      if child.dep_ == "nsubj"
-        results << child.head.text
-      end
-    end
+  next unless token.pos_ == "VERB"
+  token.children.each do |child|
+    results << child.head.text if child.dep_ == "nsubj"
   end
 end
 puts results.to_s
 # ["shift"]

data/examples/linguistic_features/iterating_lefts_and_rights.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("bright red apples on the tree")
-puts "Text: " + doc.text
+puts "Text: #{doc.text}"
-puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
-puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
+puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
+puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
-puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
-puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
+puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
+puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
 # Text: bright red apples on the tree
 # Words to the left of 'apple': bright, red

data/examples/linguistic_features/lemmatization.rb CHANGED Viewed

@@ -1,10 +1,12 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
 nlp = Spacy::Language.new("en_core_web_sm")
 lemmatizer = nlp.get_pipe("lemmatizer")
-puts "Lemmatizer mode: " + lemmatizer.mode
+puts "Lemmatizer mode: #{lemmatizer.mode}"
 doc = nlp.read("I was reading the paper.")

data/examples/linguistic_features/named_entity_recognition.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
 sentence = "Apple is looking at buying U.K. startup for $1 billion"
 doc = nlp.read(sentence)
-headings = ["text", "start", "end", "label"]
+headings = %w[text start end label]
 rows = []
 doc.ents.each do |ent|

data/examples/linguistic_features/navigating_parse_tree.rb CHANGED Viewed

@@ -1,10 +1,12 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
 nlp = Spacy::Language.new("en_core_web_sm")
 lemmatizer = nlp.get_pipe("lemmatizer")
-puts "Lemmatizer mode: " + lemmatizer.mode
+puts "Lemmatizer mode: #{lemmatizer.mode}"
 doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")

data/examples/linguistic_features/noun_chunks.rb CHANGED Viewed

@@ -1,10 +1,12 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
 nlp = Spacy::Language.new("en_core_web_sm")
 lemmatizer = nlp.get_pipe("lemmatizer")
-puts "Lemmatizer mode: " + lemmatizer.mode
+puts "Lemmatizer mode: #{lemmatizer.mode}"
 doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")

data/examples/linguistic_features/pos_tagging.rb CHANGED Viewed

@@ -1,10 +1,12 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
 nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
-headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
+headings = %w[text lemma pos tag dep shape is_alpha is_stop]
 rows = []
 doc.each do |token|

data/examples/linguistic_features/retokenize_1.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"

data/examples/linguistic_features/retokenize_2.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
 sentence = "I live in New York"
 doc = nlp.read(sentence)
-puts "Before: " + doc.tokens.map(&:text).join(", ")
+puts "Before: #{doc.tokens.map(&:text).join(", ")}"
 doc.retokenize(3, 4)
-puts "After: " + doc.tokens.map(&:text).join(", ")
+puts "After: #{doc.tokens.map(&:text).join(", ")}"
 # Before: I, live, in, New, York
 # After: I, live, in, New York

data/examples/linguistic_features/rule_based_morphology.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("Where are you?")
-puts "Morph features of the third word: " + doc[2].morph.to_s
-puts "POS of the third word: " + doc[2].pos
+puts "Morph features of the third word: #{doc[2].morph}"
+puts "POS of the third word: #{doc[2].pos}"
 # Morph features of the third word: Case=Nom|Person=2|PronType=Prs
 # POS of the third word: PRON

data/examples/linguistic_features/sentence_segmentation.rb CHANGED Viewed

@@ -1,11 +1,12 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("This is a sentence. This is another sentence.")
-puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
+puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
 doc.sents.each do |sent|
   puts sent.text

data/examples/linguistic_features/similarity.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
 doc1 = nlp.read("I like salty fries and hamburgers.")
 doc2 = nlp.read("Fast food tastes very good.")
-puts "Doc 1: " + doc1.text
-puts "Doc 2: " + doc2.text
+puts "Doc 1: #{doc1.text}"
+puts "Doc 2: #{doc2.text}"
 puts "Similarity: #{doc1.similarity(doc2)}"
 # Doc 1: I like salty fries and hamburgers.

data/examples/linguistic_features/similarity_between_lexemes.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"

data/examples/linguistic_features/similarity_between_spans.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
 doc1 = nlp.read("I like salty fries and hamburgers.")
 doc2 = nlp.read("Fast food tastes very good.")
-puts "Doc 1: " + doc1.text
-puts "Doc 2: " + doc2.text
+puts "Doc 1: #{doc1.text}"
+puts "Doc 2: #{doc2.text}"
 puts "Similarity: #{doc1.similarity(doc2)}"
 span1 = doc1.span(2, 2) # salty fries
-span2 = doc1.span(5 .. 5) # hamberger
-puts "Span 1: " + span1.text
-puts "Span 2: " + span2.text
+span2 = doc1.span(5..5) # hamberger
+puts "Span 1: #{span1.text}"
+puts "Span 2: #{span2.text}"
 puts "Similarity: #{span1.similarity(span2)}"
 # Doc 1: I like salty fries and hamburgers.

data/examples/linguistic_features/tokenization.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
-headings = [1,2,3,4,5,6,7,8,9,10,11]
+headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
 row = []
 doc.each do |token|
@@ -20,4 +22,3 @@ puts table
 # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
 # | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1  | billion |
 # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+

data/examples/rule_based_matching/creating_spans_from_matches.rb CHANGED Viewed

@@ -1,16 +1,18 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 require "terminal-table"
 nlp = Spacy::Language.new("en_core_web_lg")
 matcher = nlp.matcher
-matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
+matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
 doc = nlp.read("Barack Obama was the 44th president of the United States")
 matches = matcher.match(doc)
 matches.each do |match|
-  span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
-  puts span.text + " / " + span.label
+  span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
+  puts "#{span.text} / #{span.label}"
 end
 # Barack Obama / US_PRESIDENT

data/examples/rule_based_matching/matcher.rb CHANGED Viewed

@@ -1,8 +1,10 @@
+# frozen_string_literal: true
 require "ruby-spacy"
 nlp = Spacy::Language.new("en_core_web_sm")
-pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
+pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
 matcher = nlp.matcher
 matcher.add("HelloWorld", pattern)
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
 doc = nlp.read("Hello, world! Hello world!")
 matches = matcher.match(doc)
-matches.each do | match |
+matches.each do |match|
   string_id = nlp.vocab_string_lookup(match[:match_id])
   span = doc.span(match[:start_index]..match[:end_index])
   puts "#{string_id}, #{span.text}"

data/lib/ruby-spacy/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Spacy
   # The version number of the module
-  VERSION = "0.1.4.1"
+  VERSION = "0.1.5.0"
 end