ruby-spacy 0.1.4.1 → 0.1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/Gemfile +7 -7
  5. data/Gemfile.lock +2 -2
  6. data/README.md +7 -10
  7. data/examples/get_started/lexeme.rb +3 -1
  8. data/examples/get_started/linguistic_annotations.rb +3 -1
  9. data/examples/get_started/morphology.rb +3 -1
  10. data/examples/get_started/most_similar.rb +3 -1
  11. data/examples/get_started/named_entities.rb +4 -2
  12. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  13. data/examples/get_started/similarity.rb +4 -2
  14. data/examples/get_started/tokenization.rb +3 -1
  15. data/examples/get_started/visualizing_dependencies.rb +2 -2
  16. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  17. data/examples/get_started/visualizing_named_entities.rb +4 -2
  18. data/examples/get_started/vocab.rb +3 -1
  19. data/examples/get_started/word_vectors.rb +3 -1
  20. data/examples/japanese/ancestors.rb +6 -4
  21. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  22. data/examples/japanese/information_extraction.rb +6 -6
  23. data/examples/japanese/lemmatization.rb +3 -1
  24. data/examples/japanese/most_similar.rb +3 -1
  25. data/examples/japanese/named_entity_recognition.rb +3 -2
  26. data/examples/japanese/navigating_parse_tree.rb +19 -17
  27. data/examples/japanese/noun_chunks.rb +2 -0
  28. data/examples/japanese/pos_tagging.rb +3 -1
  29. data/examples/japanese/sentence_segmentation.rb +3 -2
  30. data/examples/japanese/similarity.rb +2 -0
  31. data/examples/japanese/tokenization.rb +2 -0
  32. data/examples/japanese/visualizing_dependencies.rb +3 -1
  33. data/examples/japanese/visualizing_named_entities.rb +4 -2
  34. data/examples/linguistic_features/ancestors.rb +7 -5
  35. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  36. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  37. data/examples/linguistic_features/information_extraction.rb +9 -9
  38. data/examples/linguistic_features/iterating_children.rb +6 -8
  39. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  40. data/examples/linguistic_features/lemmatization.rb +3 -1
  41. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  42. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  43. data/examples/linguistic_features/noun_chunks.rb +3 -1
  44. data/examples/linguistic_features/pos_tagging.rb +3 -1
  45. data/examples/linguistic_features/retokenize_1.rb +2 -0
  46. data/examples/linguistic_features/retokenize_2.rb +4 -2
  47. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  48. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  49. data/examples/linguistic_features/similarity.rb +4 -2
  50. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  51. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  52. data/examples/linguistic_features/tokenization.rb +3 -2
  53. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  54. data/examples/rule_based_matching/matcher.rb +4 -2
  55. data/lib/ruby-spacy/version.rb +1 -1
  56. data/lib/ruby-spacy.rb +139 -141
  57. data/ruby-spacy.gemspec +15 -17
  58. data/tags +132 -0
  59. metadata +69 -10
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "Credit and mortgage account holders must submit their requests"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  subject = Spacy::Token.new(root.lefts[0])
20
22
 
21
- puts "The root of the sentence is: " + root.text
22
- puts "The subject of the sentence is: " + subject.text
23
+ puts "The root of the sentence is: #{root.text}"
24
+ puts "The subject of the sentence is: #{subject.text}"
23
25
 
24
26
  subject.subtree.each do |descendant|
25
27
  # need to convert "ancestors" object from a python generator to a ruby array
26
- ancestors = Spacy::generator_to_array(descendant.ancestors)
28
+ ancestors = Spacy.generator_to_array(descendant.ancestors)
27
29
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
28
30
  end
29
31
 
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
- sentence = "San Francisco considers banning sidewalk delivery robots"
8
+ sentence = "San Francisco considers banning sidewalk delivery robots"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
7
9
 
8
-
9
10
  results = []
10
11
 
11
12
  doc.each do |token|
12
- if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
13
- results << token.head.text
14
- end
13
+ results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
15
14
  end
16
15
 
17
16
  puts results.to_s
18
17
 
19
18
  # ["shift"]
20
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
10
12
  doc = nlp.read(sentence)
11
13
 
12
14
  texts = [
13
- "Net income was $9.4 million compared to the prior year of $2.7 million.",
14
- "Revenue exceeded twelve billion dollars, with a loss of $1b.",
15
+ "Net income was $9.4 million compared to the prior year of $2.7 million.",
16
+ "Revenue exceeded twelve billion dollars, with a loss of $1b."
15
17
  ]
16
18
 
17
19
  texts.each do |text|
18
20
  doc = nlp.read(text)
19
21
  doc.each do |token|
20
22
  if token.ent_type_ == "MONEY"
21
- if ["attr", "dobj"].index token.dep_
22
- subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
23
- if !subj.empty?
24
- puts(subj[0].text + " --> " + token.text)
25
- end
26
- elsif token.dep_ == "pobj" and token.head.dep == "prep"
27
- puts token.head.head.text + " --> " + token.text
23
+ if %w[attr dobj].index token.dep_
24
+ subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
25
+ puts("#{subj[0].text} --> #{token.text}") unless subj.empty?
26
+ elsif token.dep_ == "pobj" && token.head.dep == "prep"
27
+ puts "#{token.head.head.text} --> #{token.text}"
28
28
  end
29
29
  end
30
30
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
7
9
 
8
-
9
10
  results = []
10
11
 
11
12
  doc.each do |token|
12
- if token.pos_ == "VERB"
13
- token.children.each do |child|
14
- if child.dep_ == "nsubj"
15
- results << child.head.text
16
- end
17
- end
13
+ next unless token.pos_ == "VERB"
14
+
15
+ token.children.each do |child|
16
+ results << child.head.text if child.dep_ == "nsubj"
18
17
  end
19
18
  end
20
19
 
21
20
  puts results.to_s
22
21
 
23
22
  # ["shift"]
24
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("bright red apples on the tree")
7
9
 
8
- puts "Text: " + doc.text
10
+ puts "Text: #{doc.text}"
9
11
 
10
- puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
11
- puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
12
+ puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
13
+ puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
12
14
 
13
- puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
14
- puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
15
+ puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
16
+ puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
15
17
 
16
18
  # Text: bright red apples on the tree
17
19
  # Words to the left of 'apple': bright, red
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("I was reading the paper.")
10
12
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "Apple is looking at buying U.K. startup for $1 billion"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "start", "end", "label"]
11
+ headings = %w[text start end label]
10
12
  rows = []
11
13
 
12
14
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
10
12
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
10
12
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
9
+ headings = %w[text lemma pos tag dep shape is_alpha is_stop]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "I live in New York"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- puts "Before: " + doc.tokens.map(&:text).join(", ")
11
+ puts "Before: #{doc.tokens.map(&:text).join(", ")}"
10
12
 
11
13
  doc.retokenize(3, 4)
12
14
 
13
- puts "After: " + doc.tokens.map(&:text).join(", ")
15
+ puts "After: #{doc.tokens.map(&:text).join(", ")}"
14
16
 
15
17
  # Before: I, live, in, New, York
16
18
  # After: I, live, in, New York
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Where are you?")
7
9
 
8
- puts "Morph features of the third word: " + doc[2].morph.to_s
9
- puts "POS of the third word: " + doc[2].pos
10
+ puts "Morph features of the third word: #{doc[2].morph}"
11
+ puts "POS of the third word: #{doc[2].pos}"
10
12
 
11
13
  # Morph features of the third word: Case=Nom|Person=2|PronType=Prs
12
14
  # POS of the third word: PRON
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
7
  doc = nlp.read("This is a sentence. This is another sentence.")
6
8
 
7
-
8
- puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
9
+ puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
9
10
 
10
11
  doc.sents.each do |sent|
11
12
  puts sent.text
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc1 = nlp.read("I like salty fries and hamburgers.")
6
8
  doc2 = nlp.read("Fast food tastes very good.")
7
9
 
8
- puts "Doc 1: " + doc1.text
9
- puts "Doc 2: " + doc2.text
10
+ puts "Doc 1: #{doc1.text}"
11
+ puts "Doc 2: #{doc2.text}"
10
12
  puts "Similarity: #{doc1.similarity(doc2)}"
11
13
 
12
14
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc1 = nlp.read("I like salty fries and hamburgers.")
6
8
  doc2 = nlp.read("Fast food tastes very good.")
7
9
 
8
- puts "Doc 1: " + doc1.text
9
- puts "Doc 2: " + doc2.text
10
+ puts "Doc 1: #{doc1.text}"
11
+ puts "Doc 2: #{doc2.text}"
10
12
  puts "Similarity: #{doc1.similarity(doc2)}"
11
13
 
12
14
  span1 = doc1.span(2, 2) # salty fries
13
- span2 = doc1.span(5 .. 5) # hamberger
14
- puts "Span 1: " + span1.text
15
- puts "Span 2: " + span2.text
15
+ span2 = doc1.span(5..5) # hamberger
16
+ puts "Span 1: #{span1.text}"
17
+ puts "Span 2: #{span2.text}"
16
18
  puts "Similarity: #{span1.similarity(span2)}"
17
19
 
18
20
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -20,4 +22,3 @@ puts table
20
22
  # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
21
23
  # | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
22
24
  # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
23
-
@@ -1,16 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  matcher = nlp.matcher
6
- matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
8
+ matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
7
9
  doc = nlp.read("Barack Obama was the 44th president of the United States")
8
10
 
9
11
  matches = matcher.match(doc)
10
12
 
11
13
  matches.each do |match|
12
- span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
13
- puts span.text + " / " + span.label
14
+ span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
15
+ puts "#{span.text} / #{span.label}"
14
16
  end
15
17
 
16
18
  # Barack Obama / US_PRESIDENT
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
7
+ pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
6
8
 
7
9
  matcher = nlp.matcher
8
10
  matcher.add("HelloWorld", pattern)
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
10
12
  doc = nlp.read("Hello, world! Hello world!")
11
13
  matches = matcher.match(doc)
12
14
 
13
- matches.each do | match |
15
+ matches.each do |match|
14
16
  string_id = nlp.vocab_string_lookup(match[:match_id])
15
17
  span = doc.span(match[:start_index]..match[:end_index])
16
18
  puts "#{string_id}, #{span.text}"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Spacy
4
4
  # The version number of the module
5
- VERSION = "0.1.4.1"
5
+ VERSION = "0.1.5.0"
6
6
  end