ruby-spacy 0.1.4 → 0.1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/CHANGELOG.md +5 -1
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +3 -3
  7. data/README.md +40 -39
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +30 -27
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +30 -27
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +142 -136
  58. data/ruby-spacy.gemspec +15 -17
  59. data/tags +132 -0
  60. metadata +69 -10
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,38 +11,39 @@ france = nlp.get_lexeme("フランス")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["key", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
18
+ results.each_with_index do |lexeme, i|
19
+ index = (i + 1).to_s
20
+ rows << [index, lexeme.text, lexeme.score]
18
21
  end
19
22
 
20
23
  table = Terminal::Table.new rows: rows, headings: headings
21
24
  puts table
22
25
 
23
- # +----------------------+----------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+----------------+--------------------+
26
- # | 12090003238699662352 | パリ | 0.7376999855041504 |
27
- # | 18290786970454458111 | フランス | 0.7221999764442444 |
28
- # | 9360021637096476946 | 東京 | 0.6697999835014343 |
29
- # | 2437546359230213520 | ストラスブール | 0.631600022315979 |
30
- # | 13988178952745813186 | リヨン | 0.5939000248908997 |
31
- # | 10427160276079242800 | Paris | 0.574400007724762 |
32
- # | 5562396768860926997 | ベルギー | 0.5683000087738037 |
33
- # | 15029176915627965481 | ニース | 0.5679000020027161 |
34
- # | 9750625950625019690 | アルザス | 0.5644999742507935 |
35
- # | 2381640614569534741 | 南仏 | 0.5547999739646912 |
36
- # | 7486004458946554189 | ロンドン | 0.5525000095367432 |
37
- # | 7457654095417343716 | モンマルトル | 0.5453000068664551 |
38
- # | 14063777960246535660 | ブローニュ | 0.5338000059127808 |
39
- # | 3297880777656467136 | トゥールーズ | 0.5275999903678894 |
40
- # | 3059066136348671923 | バスティーユ | 0.5213000178337097 |
41
- # | 2423471048892368989 | フランス人 | 0.5194000005722046 |
42
- # | 15944886306236465675 | ロレーヌ | 0.5148000121116638 |
43
- # | 9592561648283566590 | モンパルナス | 0.513700008392334 |
44
- # | 6560045335275831141 | 渡仏 | 0.5131000280380249 |
45
- # | 8597467336360225096 | イタリア | 0.5127000212669373 |
46
- # +----------------------+----------------+--------------------+
26
+ # +------+----------------+--------------------+
27
+ # | rank | text | score |
28
+ # +------+----------------+--------------------+
29
+ # | 1 | パリ | 0.7376999855041504 |
30
+ # | 2 | フランス | 0.7221999764442444 |
31
+ # | 3 | 東京 | 0.6697999835014343 |
32
+ # | 4 | ストラスブール | 0.631600022315979 |
33
+ # | 5 | リヨン | 0.5939000248908997 |
34
+ # | 6 | Paris | 0.574400007724762 |
35
+ # | 7 | ベルギー | 0.5683000087738037 |
36
+ # | 8 | ニース | 0.5679000020027161 |
37
+ # | 9 | アルザス | 0.5644999742507935 |
38
+ # | 10 | 南仏 | 0.5547999739646912 |
39
+ # | 11 | ロンドン | 0.5525000095367432 |
40
+ # | 12 | モンマルトル | 0.5453000068664551 |
41
+ # | 13 | ブローニュ | 0.5338000059127808 |
42
+ # | 14 | トゥールーズ | 0.5275999903678894 |
43
+ # | 15 | バスティーユ | 0.5213000178337097 |
44
+ # | 16 | フランス人 | 0.5194000005722046 |
45
+ # | 17 | ロレーヌ | 0.5148000121116638 |
46
+ # | 18 | モンパルナス | 0.513700008392334 |
47
+ # | 19 | 渡仏 | 0.5131000280380249 |
48
+ # | 20 | イタリア | 0.5127000212669373 |
49
+ # +------+----------------+--------------------+
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "任天堂は1983年にファミコンを14,800円で発売した。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "start", "end", "label"]
11
+ headings = %w[text start end label]
10
12
  rows = []
11
13
 
12
14
  doc.ents.each do |ent|
@@ -24,4 +26,3 @@ puts table
24
26
  # | ファミコン | 10 | 15 | PRODUCT |
25
27
  # | 14,800円 | 16 | 23 | MONEY |
26
28
  # +------------+-------+-----+---------+
27
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -15,20 +17,20 @@ end
15
17
  table = Terminal::Table.new rows: rows, headings: headings
16
18
  puts table
17
19
 
18
- +------+----------+-----------+----------+------------------------+
19
- | text | dep | head text | head pos | children |
20
- +------+----------+-----------+----------+------------------------+
21
- | 自動 | compound | 車 | 92 | |
22
- | 運転 | compound | 車 | 92 | |
23
- | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
24
- | は | case | 車 | 92 | |
25
- | 保険 | compound | 責任 | 92 | |
26
- | 責任 | obj | 転嫁 | 100 | 保険, を |
27
- | を | case | 責任 | 92 | |
28
- | 製造 | compound | 者 | 92 | |
29
- | 者 | obl | 転嫁 | 100 | 製造, に |
30
- | に | case | 者 | 92 | |
31
- | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
32
- | する | aux | 転嫁 | 100 | |
33
- | 。 | punct | 転嫁 | 100 | |
34
- +------+----------+-----------+----------+------------------------+
20
+ # +------+----------+-----------+----------+------------------------+
21
+ # | text | dep | head text | head pos | children |
22
+ # +------+----------+-----------+----------+------------------------+
23
+ # | 自動 | compound | 車 | 92 | |
24
+ # | 運転 | compound | 車 | 92 | |
25
+ # | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
26
+ # | は | case | 車 | 92 | |
27
+ # | 保険 | compound | 責任 | 92 | |
28
+ # | 責任 | obj | 転嫁 | 100 | 保険, を |
29
+ # | を | case | 責任 | 92 | |
30
+ # | 製造 | compound | 者 | 92 | |
31
+ # | 者 | obl | 転嫁 | 100 | 製造, に |
32
+ # | に | case | 者 | 92 | |
33
+ # | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
34
+ # | する | aux | 転嫁 | 100 | |
35
+ # | 。 | punct | 転嫁 | 100 | |
36
+ # +------+----------+-----------+----------+------------------------+
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
5
7
  doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep"]
9
+ headings = %w[text lemma pos tag dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("ja_core_news_sm")
4
6
 
5
7
  doc = nlp.read("これは文です。今私は「これは文です」と言いました。")
6
8
 
7
-
8
- puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
9
+ puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
9
10
 
10
11
  doc.sents.each do |sent|
11
12
  puts sent.text
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("ja_core_news_lg")
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
6
8
  sentence = "自動運転車は保険責任を製造者に転嫁する。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- dep_svg = doc.displacy(style: 'dep', compact: false)
11
+ dep_svg = doc.displacy(style: "dep", compact: false)
10
12
 
11
13
  File.open(File.join(File.dirname(__FILE__), "test_dep.svg"), "w") do |file|
12
14
  file.write(dep_svg)
@@ -1,13 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
5
7
 
6
- sentence ="セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
8
+ sentence = "セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
7
9
 
8
10
  doc = nlp.read(sentence)
9
11
 
10
- ent_html = doc.displacy(style: 'ent')
12
+ ent_html = doc.displacy(style: "ent")
11
13
 
12
14
  File.open(File.join(File.dirname(__FILE__), "outputs/test_ent.html"), "w") do |file|
13
15
  file.write(ent_html)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "Credit and mortgage account holders must submit their requests"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,16 +16,16 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  subject = Spacy::Token.new(root.lefts[0])
20
22
 
21
- puts "The root of the sentence is: " + root.text
22
- puts "The subject of the sentence is: " + subject.text
23
+ puts "The root of the sentence is: #{root.text}"
24
+ puts "The subject of the sentence is: #{subject.text}"
23
25
 
24
26
  subject.subtree.each do |descendant|
25
27
  # need to convert "ancestors" object from a python generator to a ruby array
26
- ancestors = Spacy::generator_to_array(descendant.ancestors)
28
+ ancestors = Spacy.generator_to_array(descendant.ancestors)
27
29
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
28
30
  end
29
31
 
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
- sentence = "San Francisco considers banning sidewalk delivery robots"
8
+ sentence = "San Francisco considers banning sidewalk delivery robots"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,16 +7,12 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
7
9
 
8
-
9
10
  results = []
10
11
 
11
12
  doc.each do |token|
12
- if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
13
- results << token.head.text
14
- end
13
+ results << token.head.text if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
15
14
  end
16
15
 
17
16
  puts results.to_s
18
17
 
19
18
  # ["shift"]
20
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -10,21 +12,19 @@ sentence = "Credit and mortgage account holders must submit their requests"
10
12
  doc = nlp.read(sentence)
11
13
 
12
14
  texts = [
13
- "Net income was $9.4 million compared to the prior year of $2.7 million.",
14
- "Revenue exceeded twelve billion dollars, with a loss of $1b.",
15
+ "Net income was $9.4 million compared to the prior year of $2.7 million.",
16
+ "Revenue exceeded twelve billion dollars, with a loss of $1b."
15
17
  ]
16
18
 
17
19
  texts.each do |text|
18
20
  doc = nlp.read(text)
19
21
  doc.each do |token|
20
22
  if token.ent_type_ == "MONEY"
21
- if ["attr", "dobj"].index token.dep_
22
- subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
23
- if !subj.empty?
24
- puts(subj[0].text + " --> " + token.text)
25
- end
26
- elsif token.dep_ == "pobj" and token.head.dep == "prep"
27
- puts token.head.head.text + " --> " + token.text
23
+ if %w[attr dobj].index token.dep_
24
+ subj = Spacy.generator_to_array(token.head.lefts).select { |t| t.dep == "nsubj" }
25
+ puts("#{subj[0].text} --> #{token.text}") unless subj.empty?
26
+ elsif token.dep_ == "pobj" && token.head.dep == "prep"
27
+ puts "#{token.head.head.text} --> #{token.text}"
28
28
  end
29
29
  end
30
30
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,20 +7,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
7
9
 
8
-
9
10
  results = []
10
11
 
11
12
  doc.each do |token|
12
- if token.pos_ == "VERB"
13
- token.children.each do |child|
14
- if child.dep_ == "nsubj"
15
- results << child.head.text
16
- end
17
- end
13
+ next unless token.pos_ == "VERB"
14
+
15
+ token.children.each do |child|
16
+ results << child.head.text if child.dep_ == "nsubj"
18
17
  end
19
18
  end
20
19
 
21
20
  puts results.to_s
22
21
 
23
22
  # ["shift"]
24
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,13 +7,13 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("bright red apples on the tree")
7
9
 
8
- puts "Text: " + doc.text
10
+ puts "Text: #{doc.text}"
9
11
 
10
- puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
11
- puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
12
+ puts "Words to the left of 'apple': #{doc[2].lefts.map(&:text).join(", ")}"
13
+ puts "Words to the right of 'apple': #{doc[2].rights.map(&:text).join(", ")}"
12
14
 
13
- puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
14
- puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
15
+ puts "Num of the words to the left of 'apple': #{doc[2].n_lefts}"
16
+ puts "Num of the words to the right of 'apple': #{doc[2].n_rights}"
15
17
 
16
18
  # Text: bright red apples on the tree
17
19
  # Words to the left of 'apple': bright, red
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("I was reading the paper.")
10
12
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "Apple is looking at buying U.K. startup for $1 billion"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "start", "end", "label"]
11
+ headings = %w[text start end label]
10
12
  rows = []
11
13
 
12
14
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
10
12
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  lemmatizer = nlp.get_pipe("lemmatizer")
7
- puts "Lemmatizer mode: " + lemmatizer.mode
9
+ puts "Lemmatizer mode: #{lemmatizer.mode}"
8
10
 
9
11
  doc = nlp.read("Autonomous cars shift insurance liability toward manufacturers")
10
12
 
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
9
+ headings = %w[text lemma pos tag dep shape is_alpha is_stop]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,11 +8,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
6
8
  sentence = "I live in New York"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- puts "Before: " + doc.tokens.map(&:text).join(", ")
11
+ puts "Before: #{doc.tokens.map(&:text).join(", ")}"
10
12
 
11
13
  doc.retokenize(3, 4)
12
14
 
13
- puts "After: " + doc.tokens.map(&:text).join(", ")
15
+ puts "After: #{doc.tokens.map(&:text).join(", ")}"
14
16
 
15
17
  # Before: I, live, in, New, York
16
18
  # After: I, live, in, New York
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Where are you?")
7
9
 
8
- puts "Morph features of the third word: " + doc[2].morph.to_s
9
- puts "POS of the third word: " + doc[2].pos
10
+ puts "Morph features of the third word: #{doc[2].morph}"
11
+ puts "POS of the third word: #{doc[2].pos}"
10
12
 
11
13
  # Morph features of the third word: Case=Nom|Person=2|PronType=Prs
12
14
  # POS of the third word: PRON
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
7
  doc = nlp.read("This is a sentence. This is another sentence.")
6
8
 
7
-
8
- puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
9
+ puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
9
10
 
10
11
  doc.sents.each do |sent|
11
12
  puts sent.text
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,8 +7,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc1 = nlp.read("I like salty fries and hamburgers.")
6
8
  doc2 = nlp.read("Fast food tastes very good.")
7
9
 
8
- puts "Doc 1: " + doc1.text
9
- puts "Doc 2: " + doc2.text
10
+ puts "Doc 1: #{doc1.text}"
11
+ puts "Doc 2: #{doc2.text}"
10
12
  puts "Similarity: #{doc1.similarity(doc2)}"
11
13
 
12
14
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,14 +7,14 @@ nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc1 = nlp.read("I like salty fries and hamburgers.")
6
8
  doc2 = nlp.read("Fast food tastes very good.")
7
9
 
8
- puts "Doc 1: " + doc1.text
9
- puts "Doc 2: " + doc2.text
10
+ puts "Doc 1: #{doc1.text}"
11
+ puts "Doc 2: #{doc2.text}"
10
12
  puts "Similarity: #{doc1.similarity(doc2)}"
11
13
 
12
14
  span1 = doc1.span(2, 2) # salty fries
13
- span2 = doc1.span(5 .. 5) # hamberger
14
- puts "Span 1: " + span1.text
15
- puts "Span 2: " + span2.text
15
+ span2 = doc1.span(5..5) # hamberger
16
+ puts "Span 1: #{span1.text}"
17
+ puts "Span 2: #{span2.text}"
16
18
  puts "Similarity: #{span1.similarity(span2)}"
17
19
 
18
20
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -20,4 +22,3 @@ puts table
20
22
  # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
21
23
  # | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
22
24
  # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
23
-
@@ -1,16 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  matcher = nlp.matcher
6
- matcher.add("US_PRESIDENT", [[{LOWER: "barack"}, {LOWER: "obama"}]])
8
+ matcher.add("US_PRESIDENT", [[{ LOWER: "barack" }, { LOWER: "obama" }]])
7
9
  doc = nlp.read("Barack Obama was the 44th president of the United States")
8
10
 
9
11
  matches = matcher.match(doc)
10
12
 
11
13
  matches.each do |match|
12
- span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
13
- puts span.text + " / " + span.label
14
+ span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: { label: match[:match_id] })
15
+ puts "#{span.text} / #{span.label}"
14
16
  end
15
17
 
16
18
  # Barack Obama / US_PRESIDENT
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- pattern = [[{LOWER: "hello"}, {IS_PUNCT: true}, {LOWER: "world"}]]
7
+ pattern = [[{ LOWER: "hello" }, { IS_PUNCT: true }, { LOWER: "world" }]]
6
8
 
7
9
  matcher = nlp.matcher
8
10
  matcher.add("HelloWorld", pattern)
@@ -10,7 +12,7 @@ matcher.add("HelloWorld", pattern)
10
12
  doc = nlp.read("Hello, world! Hello world!")
11
13
  matches = matcher.match(doc)
12
14
 
13
- matches.each do | match |
15
+ matches.each do |match|
14
16
  string_id = nlp.vocab_string_lookup(match[:match_id])
15
17
  span = doc.span(match[:start_index]..match[:end_index])
16
18
  puts "#{string_id}, #{span.text}"