ruby-spacy 0.1.4.1 → 0.1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/Gemfile +7 -7
  5. data/Gemfile.lock +2 -2
  6. data/README.md +7 -10
  7. data/examples/get_started/lexeme.rb +3 -1
  8. data/examples/get_started/linguistic_annotations.rb +3 -1
  9. data/examples/get_started/morphology.rb +3 -1
  10. data/examples/get_started/most_similar.rb +3 -1
  11. data/examples/get_started/named_entities.rb +4 -2
  12. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  13. data/examples/get_started/similarity.rb +4 -2
  14. data/examples/get_started/tokenization.rb +3 -1
  15. data/examples/get_started/visualizing_dependencies.rb +2 -2
  16. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  17. data/examples/get_started/visualizing_named_entities.rb +4 -2
  18. data/examples/get_started/vocab.rb +3 -1
  19. data/examples/get_started/word_vectors.rb +3 -1
  20. data/examples/japanese/ancestors.rb +6 -4
  21. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  22. data/examples/japanese/information_extraction.rb +6 -6
  23. data/examples/japanese/lemmatization.rb +3 -1
  24. data/examples/japanese/most_similar.rb +3 -1
  25. data/examples/japanese/named_entity_recognition.rb +3 -2
  26. data/examples/japanese/navigating_parse_tree.rb +19 -17
  27. data/examples/japanese/noun_chunks.rb +2 -0
  28. data/examples/japanese/pos_tagging.rb +3 -1
  29. data/examples/japanese/sentence_segmentation.rb +3 -2
  30. data/examples/japanese/similarity.rb +2 -0
  31. data/examples/japanese/tokenization.rb +2 -0
  32. data/examples/japanese/visualizing_dependencies.rb +3 -1
  33. data/examples/japanese/visualizing_named_entities.rb +4 -2
  34. data/examples/linguistic_features/ancestors.rb +7 -5
  35. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  36. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  37. data/examples/linguistic_features/information_extraction.rb +9 -9
  38. data/examples/linguistic_features/iterating_children.rb +6 -8
  39. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  40. data/examples/linguistic_features/lemmatization.rb +3 -1
  41. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  42. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  43. data/examples/linguistic_features/noun_chunks.rb +3 -1
  44. data/examples/linguistic_features/pos_tagging.rb +3 -1
  45. data/examples/linguistic_features/retokenize_1.rb +2 -0
  46. data/examples/linguistic_features/retokenize_2.rb +4 -2
  47. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  48. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  49. data/examples/linguistic_features/similarity.rb +4 -2
  50. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  51. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  52. data/examples/linguistic_features/tokenization.rb +3 -2
  53. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  54. data/examples/rule_based_matching/matcher.rb +4 -2
  55. data/lib/ruby-spacy/version.rb +1 -1
  56. data/lib/ruby-spacy.rb +139 -141
  57. data/ruby-spacy.gemspec +15 -17
  58. data/tags +132 -0
  59. metadata +69 -10
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "Credit and mortgage account holders must submit their requests"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  subject = Spacy::Token.new(root.lefts[0])
20
22
 
21
- puts "The root of the sentence is: " + root.text
22
- puts "The subject of the sentence is: " + subject.text
23
+ puts "The root of the sentence is: #{root.text}"
24
+ puts "The subject of the sentence is: #{subject.text}"
23
25
 
24
26
  subject.subtree.each do |descendant|
25
27
  # need to convert "ancestors" object from a python generator to a ruby array
26
- ancestors = Spacy::generator_to_array(descendant.ancestors)
28
+ ancestors = Spacy.generator_to_array(descendant.ancestors)
27
29
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
28
30
  end
29
31
 
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
- sentence = "San Francisco considers banning sidewalk delivery robots"
8
+ sentence = "San Francisco considers banning sidewalk delivery robots"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
7
9
 
8
-
9
10
  results = []
10
11
 
11
12
  doc.each do |token|
12
- if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
13
- results << token.head.text
14
- end
13
+ results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
15
14
  end
16
15
 
17
16
  puts results.to_s
18
17
 
19
18
  # ["shift"]
20
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
10
12
  doc = nlp.read(sentence)
11
13
 
12
14
  texts = [
13
- "Net income was $9.4 million compared to the prior year of $2.7 million.",
14
- "Revenue exceeded twelve billion dollars, with a loss of $1b.",
15
+ "Net income was $9.4 million compared to the prior year of $2.7 million.",
16
+ "Revenue exceeded twelve billion dollars, with a loss of $1b."
15
17
  ]
16
18
 
17
19
  texts.each do |text|
18
20
  doc = nlp.read(text)
19
21
  doc.each do |token|
20
22
  if token.ent_type_ == "MONEY"
21
- if ["attr", "dobj"].index token.dep_
22
- subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
23
- if !subj.empty?
24
- puts(subj[0].text + " --> " + token.text)
25
- end
26
- elsif token.dep_ == "pobj" and token.head.dep == "prep"
27
- puts token.head.head.text + " --> " + token.text
23
+ if %w[attr dobj].index token.dep_
24
+ subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
25
+ puts("#{subj[0].text} --> #{token.text}") unless subj.empty?
26
+ elsif token.dep_ == "pobj" && token.head.dep == "prep"
27
+ puts "#{token.head.head.text} --> #{token.text}"
28
28
  end
29
29
  end
30
30
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
7
9
 
8
-
9
10
  results = []
10
11
 
11
12
  doc.each do |token|
12
- if token.pos_ == "VERB"
13
- token.children.each do |child|
14
- if child.dep_ == "nsubj"
15
- results << child.head.text
16
- end
17
- end
13
+ next unless token.pos_ == "VERB"
14
+
15
+ token.children.each do |child|
16
+ results << child.head.text if child.dep_ == "nsubj"
18
17
  end
19
18
  end
20
19
 
21
20
  puts results.to_s
22
21
 
23
22
  # ["shift"]
24
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("bright red apples on the tree")
7
9
 
8
- puts "Text: " + doc.text
10
+ puts "Text: #{doc.text}"
9
11
 
10
- puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
11
- puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
12
+ puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
13
+ puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
12
14
 
13
- puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
14
- puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
15
+ puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
16
+ puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
15
17
 
16
18
  # Text: bright red apples on the tree
17
19
  # Words to the left of 'apple': bright, red
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("I was reading the paper.")
10
12
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "Apple is looking at buying U.K. startup for $1 billion"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "start", "end", "label"]
11
+ headings = %w[text start end label]
10
12
  rows = []
11
13
 
12
14
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
10
12
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
10
12
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
9
+ headings = %w[text lemma pos tag dep shape is_alpha is_stop]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "I live in New York"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- puts "Before: " + doc.tokens.map(&:text).join(", ")
11
+ puts "Before: #{doc.tokens.map(&:text).join(", ")}"
10
12
 
11
13
  doc.retokenize(3, 4)
12
14
 
13
- puts "After: " + doc.tokens.map(&:text).join(", ")
15
+ puts "After: #{doc.tokens.map(&:text).join(", ")}"
14
16
 
15
17
  # Before: I, live, in, New, York
16
18
  # After: I, live, in, New York
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Where are you?")
7
9
 
8
- puts "Morph features of the third word: " + doc[2].morph.to_s
9
- puts "POS of the third word: " + doc[2].pos
10
+ puts "Morph features of the third word: #{doc[2].morph}"
11
+ puts "POS of the third word: #{doc[2].pos}"
10
12
 
11
13
  # Morph features of the third word: Case=Nom|Person=2|PronType=Prs
12
14
  # POS of the third word: PRON
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
7
  doc = nlp.read("This is a sentence. This is another sentence.")
6
8
 
7
-
8
- puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
9
+ puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
9
10
 
10
11
  doc.sents.each do |sent|
11
12
  puts sent.text
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc1 = nlp.read("I like salty fries and hamburgers.")
6
8
  doc2 = nlp.read("Fast food tastes very good.")
7
9
 
8
- puts "Doc 1: " + doc1.text
9
- puts "Doc 2: " + doc2.text
10
+ puts "Doc 1: #{doc1.text}"
11
+ puts "Doc 2: #{doc2.text}"
10
12
  puts "Similarity: #{doc1.similarity(doc2)}"
11
13
 
12
14
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc1 = nlp.read("I like salty fries and hamburgers.")
6
8
  doc2 = nlp.read("Fast food tastes very good.")
7
9
 
8
- puts "Doc 1: " + doc1.text
9
- puts "Doc 2: " + doc2.text
10
+ puts "Doc 1: #{doc1.text}"
11
+ puts "Doc 2: #{doc2.text}"
10
12
  puts "Similarity: #{doc1.similarity(doc2)}"
11
13
 
12
14
  span1 = doc1.span(2, 2) # salty fries
13
- span2 = doc1.span(5 .. 5) # hamberger
14
- puts "Span 1: " + span1.text
15
- puts "Span 2: " + span2.text
15
+ span2 = doc1.span(5..5) # hamberger
16
+ puts "Span 1: #{span1.text}"
17
+ puts "Span 2: #{span2.text}"
16
18
  puts "Similarity: #{span1.similarity(span2)}"
17
19
 
18
20
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -20,4 +22,3 @@ puts table
20
22
  # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
21
23
  # | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
22
24
  # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
23
-
@@ -1,16 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  matcher = nlp.matcher
6
- matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
8
+ matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
7
9
  doc = nlp.read("Barack Obama was the 44th president of the United States")
8
10
 
9
11
  matches = matcher.match(doc)
10
12
 
11
13
  matches.each do |match|
12
- span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
13
- puts span.text + " / " + span.label
14
+ span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
15
+ puts "#{span.text} / #{span.label}"
14
16
  end
15
17
 
16
18
  # Barack Obama / US_PRESIDENT
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
7
+ pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
6
8
 
7
9
  matcher = nlp.matcher
8
10
  matcher.add("HelloWorld", pattern)
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
10
12
  doc = nlp.read("Hello, world! Hello world!")
11
13
  matches = matcher.match(doc)
12
14
 
13
- matches.each do | match |
15
+ matches.each do |match|
14
16
  string_id = nlp.vocab_string_lookup(match[:match_id])
15
17
  span = doc.span(match[:start_index]..match[:end_index])
16
18
  puts "#{string_id}, #{span.text}"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Spacy
4
4
  # The version number of the module
5
- VERSION = "0.1.4.1"
5
+ VERSION = "0.1.5.0"
6
6
  end