ruby-spacy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +58 -0
  3. data/.yardopts +2 -0
  4. data/Gemfile +18 -0
  5. data/Gemfile.lock +39 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +498 -0
  8. data/Rakefile +12 -0
  9. data/bin/console +15 -0
  10. data/bin/setup +8 -0
  11. data/examples/get_started/lexeme.rb +24 -0
  12. data/examples/get_started/linguistic_annotations.rb +32 -0
  13. data/examples/get_started/most_similar.rb +46 -0
  14. data/examples/get_started/named_entities.rb +24 -0
  15. data/examples/get_started/outputs/test_dep.svg +84 -0
  16. data/examples/get_started/outputs/test_dep_compact.svg +84 -0
  17. data/examples/get_started/outputs/test_ent.html +11 -0
  18. data/examples/get_started/pos_tags_and_dependencies.rb +31 -0
  19. data/examples/get_started/similarity.rb +13 -0
  20. data/examples/get_started/tokenization.rb +22 -0
  21. data/examples/get_started/visualizing_dependencies.rb +14 -0
  22. data/examples/get_started/visualizing_dependencies_compact.rb +12 -0
  23. data/examples/get_started/visualizing_named_entities.rb +12 -0
  24. data/examples/get_started/vocab.rb +10 -0
  25. data/examples/get_started/word_vectors.rb +24 -0
  26. data/examples/japanese/ancestors.rb +44 -0
  27. data/examples/japanese/entity_annotations_and_labels.rb +45 -0
  28. data/examples/japanese/information_extraction.rb +27 -0
  29. data/examples/japanese/lemmatization.rb +32 -0
  30. data/examples/japanese/most_similar.rb +46 -0
  31. data/examples/japanese/named_entity_recognition.rb +27 -0
  32. data/examples/japanese/navigating_parse_tree.rb +34 -0
  33. data/examples/japanese/noun_chunks.rb +23 -0
  34. data/examples/japanese/outputs/test_dep.svg +149 -0
  35. data/examples/japanese/outputs/test_ent.html +16 -0
  36. data/examples/japanese/pos_tagging.rb +34 -0
  37. data/examples/japanese/sentence_segmentation.rb +16 -0
  38. data/examples/japanese/similarity.rb +12 -0
  39. data/examples/japanese/tokenization.rb +38 -0
  40. data/examples/japanese/visualizing_dependencies.rb +13 -0
  41. data/examples/japanese/visualizing_named_entities.rb +14 -0
  42. data/examples/linguistic_features/ancestors.rb +41 -0
  43. data/examples/linguistic_features/entity_annotations_and_labels.rb +29 -0
  44. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +20 -0
  45. data/examples/linguistic_features/information_extraction.rb +36 -0
  46. data/examples/linguistic_features/iterating_children.rb +24 -0
  47. data/examples/linguistic_features/iterating_lefts_and_rights.rb +20 -0
  48. data/examples/linguistic_features/lemmatization.rb +31 -0
  49. data/examples/linguistic_features/morphology.rb +17 -0
  50. data/examples/linguistic_features/named_entity_recognition.rb +25 -0
  51. data/examples/linguistic_features/navigating_parse_tree.rb +32 -0
  52. data/examples/linguistic_features/noun_chunks.rb +27 -0
  53. data/examples/linguistic_features/outputs/test_ent.html +11 -0
  54. data/examples/linguistic_features/pos_tagging.rb +31 -0
  55. data/examples/linguistic_features/retokenize_1.rb +29 -0
  56. data/examples/linguistic_features/retokenize_2.rb +16 -0
  57. data/examples/linguistic_features/rule_based_morphology.rb +12 -0
  58. data/examples/linguistic_features/sentence_segmentation.rb +16 -0
  59. data/examples/linguistic_features/similarity.rb +14 -0
  60. data/examples/linguistic_features/similarity_between_spans.rb +23 -0
  61. data/examples/linguistic_features/special_case_tokenization_rules.rb +19 -0
  62. data/examples/linguistic_features/tokenization.rb +23 -0
  63. data/examples/rule_based_matching/creating_spans_from_matches.rb +16 -0
  64. data/examples/rule_based_matching/matcher.rb +19 -0
  65. data/lib/ruby-spacy.rb +567 -0
  66. data/lib/ruby-spacy/version.rb +6 -0
  67. data/ruby-spacy.gemspec +42 -0
  68. metadata +157 -0
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ task default: :test
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "ruby-spacy"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,24 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("I love coffee")
6
+
7
+ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
8
+ rows = []
9
+
10
+ doc.each do |word|
11
+ lexeme = doc.vocab[word.text]
12
+ rows << [lexeme.text, lexeme.shape_, lexeme.prefix_, lexeme.suffix_, lexeme.is_alpha, lexeme.is_digit]
13
+ end
14
+
15
+ table = Terminal::Table.new rows: rows, headings: headings
16
+ puts table
17
+
18
+ # +--------+-------+--------+--------+----------+----------+
19
+ # | text | shape | prefix | suffix | is_alpha | is_digit |
20
+ # +--------+-------+--------+--------+----------+----------+
21
+ # | I | X | I | I | true | false |
22
+ # | love | xxxx | l | ove | true | false |
23
+ # | coffee | xxxx | c | fee | true | false |
24
+ # +--------+-------+--------+--------+----------+----------+
@@ -0,0 +1,32 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "pos", "dep"]
8
+ rows = []
9
+
10
+ doc.each do |token|
11
+ rows << [token.text, token.pos_, token.dep_]
12
+ end
13
+
14
+ table = Terminal::Table.new rows: rows, headings: headings
15
+
16
+ puts table
17
+
18
+ # +---------+-------+----------+
19
+ # | text | pos | dep |
20
+ # +---------+-------+----------+
21
+ # | Apple | PROPN | nsubj |
22
+ # | is | AUX | aux |
23
+ # | looking | VERB | ROOT |
24
+ # | at | ADP | prep |
25
+ # | buying | VERB | pcomp |
26
+ # | U.K. | PROPN | dobj |
27
+ # | startup | NOUN | advcl |
28
+ # | for | ADP | prep |
29
+ # | $ | SYM | quantmod |
30
+ # | 1 | NUM | compound |
31
+ # | billion | NUM | pobj |
32
+ # +---------+-------+----------+
@@ -0,0 +1,46 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_lg")
5
+
6
+ tokyo = nlp.get_lexeme("Tokyo")
7
+ japan = nlp.get_lexeme("Japan")
8
+ france = nlp.get_lexeme("France")
9
+
10
+ query = tokyo.vector - japan.vector + france.vector
11
+
12
+ headings = ["key", "text", "score"]
13
+ rows = []
14
+
15
+ results = nlp.most_similar(query, 20)
16
+ results.each do |lexeme|
17
+ rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
18
+ end
19
+
20
+ table = Terminal::Table.new rows: rows, headings: headings
21
+ puts table
22
+
23
+ # +----------------------+-------------+--------------------+
24
+ # | key | text | score |
25
+ # +----------------------+-------------+--------------------+
26
+ # | 1432967385481565694 | FRANCE | 0.8346999883651733 |
27
+ # | 6613816697677965370 | France | 0.8346999883651733 |
28
+ # | 4362406852232399325 | france | 0.8346999883651733 |
29
+ # | 1637573253267610771 | PARIS | 0.7703999876976013 |
30
+ # | 15322182186497800017 | paris | 0.7703999876976013 |
31
+ # | 10427160276079242800 | Paris | 0.7703999876976013 |
32
+ # | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
33
+ # | 7944504257273452052 | Toulouse | 0.6381999850273132 |
34
+ # | 9614730213792621885 | toulouse | 0.6381999850273132 |
35
+ # | 8515538464606421210 | marseille | 0.6370999813079834 |
36
+ # | 8215995793762630878 | Marseille | 0.6370999813079834 |
37
+ # | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
38
+ # | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
39
+ # | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
40
+ # | 13936807859007616770 | bordeaux | 0.6096000075340271 |
41
+ # | 8731576325682930212 | prague | 0.6075000166893005 |
42
+ # | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
43
+ # | 1133963107690000953 | Prague | 0.6075000166893005 |
44
+ # | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
45
+ # | 6936121537367717968 | switzerland | 0.6068000197410583 |
46
+ # +----------------------+-------------+--------------------+
@@ -0,0 +1,24 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "start_char", "end_char", "label"]
8
+ rows = []
9
+
10
+ doc.ents.each do |ent|
11
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
12
+ end
13
+
14
+ table = Terminal::Table.new rows: rows, headings: headings
15
+
16
+ puts table
17
+
18
+ # +------------+------------+----------+-------+
19
+ # | text | start_char | end_char | label |
20
+ # +------------+------------+----------+-------+
21
+ # | Apple | 0 | 5 | ORG |
22
+ # | U.K. | 27 | 31 | GPE |
23
+ # | $1 billion | 44 | 54 | MONEY |
24
+ # +------------+------------+----------+-------+
@@ -0,0 +1,84 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="d38e3d055efe43b381928ff23187c2d6-0" class="displacy" width="1275" height="399.5" direction="ltr" style="max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
2
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
3
+ <tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
4
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
5
+ </text>
6
+
7
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
8
+ <tspan class="displacy-word" fill="currentColor" x="225">cars</tspan>
9
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">NOUN</tspan>
10
+ </text>
11
+
12
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
13
+ <tspan class="displacy-word" fill="currentColor" x="400">shift</tspan>
14
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>
15
+ </text>
16
+
17
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
18
+ <tspan class="displacy-word" fill="currentColor" x="575">insurance</tspan>
19
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">NOUN</tspan>
20
+ </text>
21
+
22
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
23
+ <tspan class="displacy-word" fill="currentColor" x="750">liability</tspan>
24
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="750">NOUN</tspan>
25
+ </text>
26
+
27
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
28
+ <tspan class="displacy-word" fill="currentColor" x="925">toward</tspan>
29
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="925">ADP</tspan>
30
+ </text>
31
+
32
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
33
+ <tspan class="displacy-word" fill="currentColor" x="1100">manufacturers</tspan>
34
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="1100">NOUN</tspan>
35
+ </text>
36
+
37
+ <g class="displacy-arrow">
38
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-0" stroke-width="2px" d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5" fill="none" stroke="currentColor"/>
39
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
40
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">amod</textPath>
41
+ </text>
42
+ <path class="displacy-arrowhead" d="M70,266.5 L62,254.5 78,254.5" fill="currentColor"/>
43
+ </g>
44
+
45
+ <g class="displacy-arrow">
46
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-1" stroke-width="2px" d="M245,264.5 C245,177.0 390.0,177.0 390.0,264.5" fill="none" stroke="currentColor"/>
47
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
48
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
49
+ </text>
50
+ <path class="displacy-arrowhead" d="M245,266.5 L237,254.5 253,254.5" fill="currentColor"/>
51
+ </g>
52
+
53
+ <g class="displacy-arrow">
54
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-2" stroke-width="2px" d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5" fill="none" stroke="currentColor"/>
55
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
56
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
57
+ </text>
58
+ <path class="displacy-arrowhead" d="M595,266.5 L587,254.5 603,254.5" fill="currentColor"/>
59
+ </g>
60
+
61
+ <g class="displacy-arrow">
62
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-3" stroke-width="2px" d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5" fill="none" stroke="currentColor"/>
63
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
64
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
65
+ </text>
66
+ <path class="displacy-arrowhead" d="M745.0,266.5 L753.0,254.5 737.0,254.5" fill="currentColor"/>
67
+ </g>
68
+
69
+ <g class="displacy-arrow">
70
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-4" stroke-width="2px" d="M420,264.5 C420,2.0 925.0,2.0 925.0,264.5" fill="none" stroke="currentColor"/>
71
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
72
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">prep</textPath>
73
+ </text>
74
+ <path class="displacy-arrowhead" d="M925.0,266.5 L933.0,254.5 917.0,254.5" fill="currentColor"/>
75
+ </g>
76
+
77
+ <g class="displacy-arrow">
78
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-5" stroke-width="2px" d="M945,264.5 C945,177.0 1090.0,177.0 1090.0,264.5" fill="none" stroke="currentColor"/>
79
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
80
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">pobj</textPath>
81
+ </text>
82
+ <path class="displacy-arrowhead" d="M1090.0,266.5 L1098.0,254.5 1082.0,254.5" fill="currentColor"/>
83
+ </g>
84
+ </svg>
@@ -0,0 +1,84 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="2164aed0b1894f6fa4ce7be065a1063e-0" class="displacy" width="1100" height="362.0" direction="ltr" style="max-width: none; height: 362.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
2
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
3
+ <tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
4
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
5
+ </text>
6
+
7
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
8
+ <tspan class="displacy-word" fill="currentColor" x="200">cars</tspan>
9
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="200">NOUN</tspan>
10
+ </text>
11
+
12
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
13
+ <tspan class="displacy-word" fill="currentColor" x="350">shift</tspan>
14
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="350">VERB</tspan>
15
+ </text>
16
+
17
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
18
+ <tspan class="displacy-word" fill="currentColor" x="500">insurance</tspan>
19
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="500">NOUN</tspan>
20
+ </text>
21
+
22
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
23
+ <tspan class="displacy-word" fill="currentColor" x="650">liability</tspan>
24
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="650">NOUN</tspan>
25
+ </text>
26
+
27
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
28
+ <tspan class="displacy-word" fill="currentColor" x="800">toward</tspan>
29
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="800">ADP</tspan>
30
+ </text>
31
+
32
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
33
+ <tspan class="displacy-word" fill="currentColor" x="950">manufacturers</tspan>
34
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="950">NOUN</tspan>
35
+ </text>
36
+
37
+ <g class="displacy-arrow">
38
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-0" stroke-width="2px" d="M62,227.0 62,202.0 194.0,202.0 194.0,227.0" fill="none" stroke="currentColor"/>
39
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
40
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">amod</textPath>
41
+ </text>
42
+ <path class="displacy-arrowhead" d="M62,229.0 L58,221.0 66,221.0" fill="currentColor"/>
43
+ </g>
44
+
45
+ <g class="displacy-arrow">
46
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-1" stroke-width="2px" d="M212,227.0 212,202.0 344.0,202.0 344.0,227.0" fill="none" stroke="currentColor"/>
47
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
48
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
49
+ </text>
50
+ <path class="displacy-arrowhead" d="M212,229.0 L208,221.0 216,221.0" fill="currentColor"/>
51
+ </g>
52
+
53
+ <g class="displacy-arrow">
54
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-2" stroke-width="2px" d="M512,227.0 512,202.0 644.0,202.0 644.0,227.0" fill="none" stroke="currentColor"/>
55
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
56
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
57
+ </text>
58
+ <path class="displacy-arrowhead" d="M512,229.0 L508,221.0 516,221.0" fill="currentColor"/>
59
+ </g>
60
+
61
+ <g class="displacy-arrow">
62
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-3" stroke-width="2px" d="M362,227.0 362,177.0 647.0,177.0 647.0,227.0" fill="none" stroke="currentColor"/>
63
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
64
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
65
+ </text>
66
+ <path class="displacy-arrowhead" d="M647.0,229.0 L651.0,221.0 643.0,221.0" fill="currentColor"/>
67
+ </g>
68
+
69
+ <g class="displacy-arrow">
70
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-4" stroke-width="2px" d="M362,227.0 362,152.0 800.0,152.0 800.0,227.0" fill="none" stroke="currentColor"/>
71
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
72
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">prep</textPath>
73
+ </text>
74
+ <path class="displacy-arrowhead" d="M800.0,229.0 L804.0,221.0 796.0,221.0" fill="currentColor"/>
75
+ </g>
76
+
77
+ <g class="displacy-arrow">
78
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-5" stroke-width="2px" d="M812,227.0 812,202.0 944.0,202.0 944.0,227.0" fill="none" stroke="currentColor"/>
79
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
80
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">pobj</textPath>
81
+ </text>
82
+ <path class="displacy-arrowhead" d="M944.0,229.0 L948.0,221.0 940.0,221.0" fill="currentColor"/>
83
+ </g>
84
+ </svg>
@@ -0,0 +1,11 @@
1
+ <div class="entities" style="line-height: 2.5; direction: ltr">When
2
+ <mark class="entity" style="background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
3
+ Sebastian Thrun
4
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
5
+ </mark>
6
+ started working on self-driving cars at Google in
7
+ <mark class="entity" style="background: #bfe1d9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
8
+ 2007
9
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">DATE</span>
10
+ </mark>
11
+ , few people outside of the company took him seriously.</div>
@@ -0,0 +1,31 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
8
+ rows = []
9
+
10
+ doc.each do |token|
11
+ rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
12
+ end
13
+
14
+ table = Terminal::Table.new rows: rows, headings: headings
15
+ puts table
16
+
17
+ # +---------+---------+-------+-----+----------+-------+----------+---------+
18
+ # | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
19
+ # +---------+---------+-------+-----+----------+-------+----------+---------+
20
+ # | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
21
+ # | is | be | AUX | VBZ | aux | xx | true | true |
22
+ # | looking | look | VERB | VBG | ROOT | xxxx | true | false |
23
+ # | at | at | ADP | IN | prep | xx | true | true |
24
+ # | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
25
+ # | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
26
+ # | startup | startup | NOUN | NN | advcl | xxxx | true | false |
27
+ # | for | for | ADP | IN | prep | xxx | true | true |
28
+ # | $ | $ | SYM | $ | quantmod | $ | false | false |
29
+ # | 1 | 1 | NUM | CD | compound | d | false | false |
30
+ # | billion | billion | NUM | CD | pobj | xxxx | true | false |
31
+ # +---------+---------+-------+-----+----------+-------+----------+---------+
@@ -0,0 +1,13 @@
1
+ require "ruby-spacy"
2
+
3
+ nlp = Spacy::Language.new("en_core_web_lg")
4
+ doc1 = nlp.read("I like salty fries and hamburgers.")
5
+ doc2 = nlp.read("Fast food tastes very good.")
6
+
7
+ puts "Doc 1: " + doc1
8
+ puts "Doc 2: " + doc2
9
+ puts "Similarity: #{doc1.similarity(doc2)}"
10
+
11
+ # Doc 1: I like salty fries and hamburgers.
12
+ # Doc 2: Fast food tastes very good.
13
+ # Similarity: 0.7687607012190486
@@ -0,0 +1,22 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+
6
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
+
8
+ headings = [1,2,3,4,5,6,7,8,9,10,11]
9
+ row = []
10
+
11
+ doc.each do |token|
12
+ row << token.text
13
+ end
14
+
15
+ table = Terminal::Table.new rows: [row], headings: headings
16
+ puts table
17
+
18
+ # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
19
+ # | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
20
+ # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
21
+ # | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
22
+ # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
@@ -0,0 +1,14 @@
1
+ require "ruby-spacy"
2
+
3
+ nlp = Spacy::Language.new("en_core_web_sm")
4
+
5
+ sentence = "Autonomous cars shift insurance liability toward manufacturers"
6
+ doc = nlp.read(sentence)
7
+
8
+ dep_svg = doc.displacy(style: "dep", compact: false)
9
+
10
+ File.open(File.join("test_dep.svg"), "w") do |file|
11
+ file.write(dep_svg)
12
+ end
13
+
14
+
@@ -0,0 +1,12 @@
1
+ require "ruby-spacy"
2
+
3
+ nlp = Spacy::Language.new("en_core_web_sm")
4
+
5
+ sentence = "Autonomous cars shift insurance liability toward manufacturers"
6
+ doc = nlp.read(sentence)
7
+
8
+ dep_svg = doc.displacy(style: "dep", compact: true)
9
+
10
+ File.open(File.join("test_dep_compact.svg"), "w") do |file|
11
+ file.write(dep_svg)
12
+ end