ruby-spacy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +58 -0
  3. data/.yardopts +2 -0
  4. data/Gemfile +18 -0
  5. data/Gemfile.lock +39 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +498 -0
  8. data/Rakefile +12 -0
  9. data/bin/console +15 -0
  10. data/bin/setup +8 -0
  11. data/examples/get_started/lexeme.rb +24 -0
  12. data/examples/get_started/linguistic_annotations.rb +32 -0
  13. data/examples/get_started/most_similar.rb +46 -0
  14. data/examples/get_started/named_entities.rb +24 -0
  15. data/examples/get_started/outputs/test_dep.svg +84 -0
  16. data/examples/get_started/outputs/test_dep_compact.svg +84 -0
  17. data/examples/get_started/outputs/test_ent.html +11 -0
  18. data/examples/get_started/pos_tags_and_dependencies.rb +31 -0
  19. data/examples/get_started/similarity.rb +13 -0
  20. data/examples/get_started/tokenization.rb +22 -0
  21. data/examples/get_started/visualizing_dependencies.rb +14 -0
  22. data/examples/get_started/visualizing_dependencies_compact.rb +12 -0
  23. data/examples/get_started/visualizing_named_entities.rb +12 -0
  24. data/examples/get_started/vocab.rb +10 -0
  25. data/examples/get_started/word_vectors.rb +24 -0
  26. data/examples/japanese/ancestors.rb +44 -0
  27. data/examples/japanese/entity_annotations_and_labels.rb +45 -0
  28. data/examples/japanese/information_extraction.rb +27 -0
  29. data/examples/japanese/lemmatization.rb +32 -0
  30. data/examples/japanese/most_similar.rb +46 -0
  31. data/examples/japanese/named_entity_recognition.rb +27 -0
  32. data/examples/japanese/navigating_parse_tree.rb +34 -0
  33. data/examples/japanese/noun_chunks.rb +23 -0
  34. data/examples/japanese/outputs/test_dep.svg +149 -0
  35. data/examples/japanese/outputs/test_ent.html +16 -0
  36. data/examples/japanese/pos_tagging.rb +34 -0
  37. data/examples/japanese/sentence_segmentation.rb +16 -0
  38. data/examples/japanese/similarity.rb +12 -0
  39. data/examples/japanese/tokenization.rb +38 -0
  40. data/examples/japanese/visualizing_dependencies.rb +13 -0
  41. data/examples/japanese/visualizing_named_entities.rb +14 -0
  42. data/examples/linguistic_features/ancestors.rb +41 -0
  43. data/examples/linguistic_features/entity_annotations_and_labels.rb +29 -0
  44. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +20 -0
  45. data/examples/linguistic_features/information_extraction.rb +36 -0
  46. data/examples/linguistic_features/iterating_children.rb +24 -0
  47. data/examples/linguistic_features/iterating_lefts_and_rights.rb +20 -0
  48. data/examples/linguistic_features/lemmatization.rb +31 -0
  49. data/examples/linguistic_features/morphology.rb +17 -0
  50. data/examples/linguistic_features/named_entity_recognition.rb +25 -0
  51. data/examples/linguistic_features/navigating_parse_tree.rb +32 -0
  52. data/examples/linguistic_features/noun_chunks.rb +27 -0
  53. data/examples/linguistic_features/outputs/test_ent.html +11 -0
  54. data/examples/linguistic_features/pos_tagging.rb +31 -0
  55. data/examples/linguistic_features/retokenize_1.rb +29 -0
  56. data/examples/linguistic_features/retokenize_2.rb +16 -0
  57. data/examples/linguistic_features/rule_based_morphology.rb +12 -0
  58. data/examples/linguistic_features/sentence_segmentation.rb +16 -0
  59. data/examples/linguistic_features/similarity.rb +14 -0
  60. data/examples/linguistic_features/similarity_between_spans.rb +23 -0
  61. data/examples/linguistic_features/special_case_tokenization_rules.rb +19 -0
  62. data/examples/linguistic_features/tokenization.rb +23 -0
  63. data/examples/rule_based_matching/creating_spans_from_matches.rb +16 -0
  64. data/examples/rule_based_matching/matcher.rb +19 -0
  65. data/lib/ruby-spacy.rb +567 -0
  66. data/lib/ruby-spacy/version.rb +6 -0
  67. data/ruby-spacy.gemspec +42 -0
  68. metadata +157 -0
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ task default: :test
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "ruby-spacy"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,24 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("I love coffee")
6
+
7
+ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
8
+ rows = []
9
+
10
+ doc.each do |word|
11
+ lexeme = doc.vocab[word.text]
12
+ rows << [lexeme.text, lexeme.shape_, lexeme.prefix_, lexeme.suffix_, lexeme.is_alpha, lexeme.is_digit]
13
+ end
14
+
15
+ table = Terminal::Table.new rows: rows, headings: headings
16
+ puts table
17
+
18
+ # +--------+-------+--------+--------+----------+----------+
19
+ # | text | shape | prefix | suffix | is_alpha | is_digit |
20
+ # +--------+-------+--------+--------+----------+----------+
21
+ # | I | X | I | I | true | false |
22
+ # | love | xxxx | l | ove | true | false |
23
+ # | coffee | xxxx | c | fee | true | false |
24
+ # +--------+-------+--------+--------+----------+----------+
@@ -0,0 +1,32 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "pos", "dep"]
8
+ rows = []
9
+
10
+ doc.each do |token|
11
+ rows << [token.text, token.pos_, token.dep_]
12
+ end
13
+
14
+ table = Terminal::Table.new rows: rows, headings: headings
15
+
16
+ puts table
17
+
18
+ # +---------+-------+----------+
19
+ # | text | pos | dep |
20
+ # +---------+-------+----------+
21
+ # | Apple | PROPN | nsubj |
22
+ # | is | AUX | aux |
23
+ # | looking | VERB | ROOT |
24
+ # | at | ADP | prep |
25
+ # | buying | VERB | pcomp |
26
+ # | U.K. | PROPN | dobj |
27
+ # | startup | NOUN | advcl |
28
+ # | for | ADP | prep |
29
+ # | $ | SYM | quantmod |
30
+ # | 1 | NUM | compound |
31
+ # | billion | NUM | pobj |
32
+ # +---------+-------+----------+
@@ -0,0 +1,46 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_lg")
5
+
6
+ tokyo = nlp.get_lexeme("Tokyo")
7
+ japan = nlp.get_lexeme("Japan")
8
+ france = nlp.get_lexeme("France")
9
+
10
+ query = tokyo.vector - japan.vector + france.vector
11
+
12
+ headings = ["key", "text", "score"]
13
+ rows = []
14
+
15
+ results = nlp.most_similar(query, 20)
16
+ results.each do |lexeme|
17
+ rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
18
+ end
19
+
20
+ table = Terminal::Table.new rows: rows, headings: headings
21
+ puts table
22
+
23
+ # +----------------------+-------------+--------------------+
24
+ # | key | text | score |
25
+ # +----------------------+-------------+--------------------+
26
+ # | 1432967385481565694 | FRANCE | 0.8346999883651733 |
27
+ # | 6613816697677965370 | France | 0.8346999883651733 |
28
+ # | 4362406852232399325 | france | 0.8346999883651733 |
29
+ # | 1637573253267610771 | PARIS | 0.7703999876976013 |
30
+ # | 15322182186497800017 | paris | 0.7703999876976013 |
31
+ # | 10427160276079242800 | Paris | 0.7703999876976013 |
32
+ # | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
33
+ # | 7944504257273452052 | Toulouse | 0.6381999850273132 |
34
+ # | 9614730213792621885 | toulouse | 0.6381999850273132 |
35
+ # | 8515538464606421210 | marseille | 0.6370999813079834 |
36
+ # | 8215995793762630878 | Marseille | 0.6370999813079834 |
37
+ # | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
38
+ # | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
39
+ # | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
40
+ # | 13936807859007616770 | bordeaux | 0.6096000075340271 |
41
+ # | 8731576325682930212 | prague | 0.6075000166893005 |
42
+ # | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
43
+ # | 1133963107690000953 | Prague | 0.6075000166893005 |
44
+ # | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
45
+ # | 6936121537367717968 | switzerland | 0.6068000197410583 |
46
+ # +----------------------+-------------+--------------------+
@@ -0,0 +1,24 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "start_char", "end_char", "label"]
8
+ rows = []
9
+
10
+ doc.ents.each do |ent|
11
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
12
+ end
13
+
14
+ table = Terminal::Table.new rows: rows, headings: headings
15
+
16
+ puts table
17
+
18
+ # +------------+------------+----------+-------+
19
+ # | text | start_char | end_char | label |
20
+ # +------------+------------+----------+-------+
21
+ # | Apple | 0 | 5 | ORG |
22
+ # | U.K. | 27 | 31 | GPE |
23
+ # | $1 billion | 44 | 54 | MONEY |
24
+ # +------------+------------+----------+-------+
@@ -0,0 +1,84 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="d38e3d055efe43b381928ff23187c2d6-0" class="displacy" width="1275" height="399.5" direction="ltr" style="max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
2
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
3
+ <tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
4
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
5
+ </text>
6
+
7
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
8
+ <tspan class="displacy-word" fill="currentColor" x="225">cars</tspan>
9
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">NOUN</tspan>
10
+ </text>
11
+
12
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
13
+ <tspan class="displacy-word" fill="currentColor" x="400">shift</tspan>
14
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>
15
+ </text>
16
+
17
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
18
+ <tspan class="displacy-word" fill="currentColor" x="575">insurance</tspan>
19
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">NOUN</tspan>
20
+ </text>
21
+
22
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
23
+ <tspan class="displacy-word" fill="currentColor" x="750">liability</tspan>
24
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="750">NOUN</tspan>
25
+ </text>
26
+
27
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
28
+ <tspan class="displacy-word" fill="currentColor" x="925">toward</tspan>
29
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="925">ADP</tspan>
30
+ </text>
31
+
32
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
33
+ <tspan class="displacy-word" fill="currentColor" x="1100">manufacturers</tspan>
34
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="1100">NOUN</tspan>
35
+ </text>
36
+
37
+ <g class="displacy-arrow">
38
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-0" stroke-width="2px" d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5" fill="none" stroke="currentColor"/>
39
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
40
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">amod</textPath>
41
+ </text>
42
+ <path class="displacy-arrowhead" d="M70,266.5 L62,254.5 78,254.5" fill="currentColor"/>
43
+ </g>
44
+
45
+ <g class="displacy-arrow">
46
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-1" stroke-width="2px" d="M245,264.5 C245,177.0 390.0,177.0 390.0,264.5" fill="none" stroke="currentColor"/>
47
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
48
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
49
+ </text>
50
+ <path class="displacy-arrowhead" d="M245,266.5 L237,254.5 253,254.5" fill="currentColor"/>
51
+ </g>
52
+
53
+ <g class="displacy-arrow">
54
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-2" stroke-width="2px" d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5" fill="none" stroke="currentColor"/>
55
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
56
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
57
+ </text>
58
+ <path class="displacy-arrowhead" d="M595,266.5 L587,254.5 603,254.5" fill="currentColor"/>
59
+ </g>
60
+
61
+ <g class="displacy-arrow">
62
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-3" stroke-width="2px" d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5" fill="none" stroke="currentColor"/>
63
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
64
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
65
+ </text>
66
+ <path class="displacy-arrowhead" d="M745.0,266.5 L753.0,254.5 737.0,254.5" fill="currentColor"/>
67
+ </g>
68
+
69
+ <g class="displacy-arrow">
70
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-4" stroke-width="2px" d="M420,264.5 C420,2.0 925.0,2.0 925.0,264.5" fill="none" stroke="currentColor"/>
71
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
72
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">prep</textPath>
73
+ </text>
74
+ <path class="displacy-arrowhead" d="M925.0,266.5 L933.0,254.5 917.0,254.5" fill="currentColor"/>
75
+ </g>
76
+
77
+ <g class="displacy-arrow">
78
+ <path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-5" stroke-width="2px" d="M945,264.5 C945,177.0 1090.0,177.0 1090.0,264.5" fill="none" stroke="currentColor"/>
79
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
80
+ <textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">pobj</textPath>
81
+ </text>
82
+ <path class="displacy-arrowhead" d="M1090.0,266.5 L1098.0,254.5 1082.0,254.5" fill="currentColor"/>
83
+ </g>
84
+ </svg>
@@ -0,0 +1,84 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="2164aed0b1894f6fa4ce7be065a1063e-0" class="displacy" width="1100" height="362.0" direction="ltr" style="max-width: none; height: 362.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
2
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
3
+ <tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
4
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
5
+ </text>
6
+
7
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
8
+ <tspan class="displacy-word" fill="currentColor" x="200">cars</tspan>
9
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="200">NOUN</tspan>
10
+ </text>
11
+
12
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
13
+ <tspan class="displacy-word" fill="currentColor" x="350">shift</tspan>
14
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="350">VERB</tspan>
15
+ </text>
16
+
17
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
18
+ <tspan class="displacy-word" fill="currentColor" x="500">insurance</tspan>
19
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="500">NOUN</tspan>
20
+ </text>
21
+
22
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
23
+ <tspan class="displacy-word" fill="currentColor" x="650">liability</tspan>
24
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="650">NOUN</tspan>
25
+ </text>
26
+
27
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
28
+ <tspan class="displacy-word" fill="currentColor" x="800">toward</tspan>
29
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="800">ADP</tspan>
30
+ </text>
31
+
32
+ <text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
33
+ <tspan class="displacy-word" fill="currentColor" x="950">manufacturers</tspan>
34
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="950">NOUN</tspan>
35
+ </text>
36
+
37
+ <g class="displacy-arrow">
38
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-0" stroke-width="2px" d="M62,227.0 62,202.0 194.0,202.0 194.0,227.0" fill="none" stroke="currentColor"/>
39
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
40
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">amod</textPath>
41
+ </text>
42
+ <path class="displacy-arrowhead" d="M62,229.0 L58,221.0 66,221.0" fill="currentColor"/>
43
+ </g>
44
+
45
+ <g class="displacy-arrow">
46
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-1" stroke-width="2px" d="M212,227.0 212,202.0 344.0,202.0 344.0,227.0" fill="none" stroke="currentColor"/>
47
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
48
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
49
+ </text>
50
+ <path class="displacy-arrowhead" d="M212,229.0 L208,221.0 216,221.0" fill="currentColor"/>
51
+ </g>
52
+
53
+ <g class="displacy-arrow">
54
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-2" stroke-width="2px" d="M512,227.0 512,202.0 644.0,202.0 644.0,227.0" fill="none" stroke="currentColor"/>
55
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
56
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
57
+ </text>
58
+ <path class="displacy-arrowhead" d="M512,229.0 L508,221.0 516,221.0" fill="currentColor"/>
59
+ </g>
60
+
61
+ <g class="displacy-arrow">
62
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-3" stroke-width="2px" d="M362,227.0 362,177.0 647.0,177.0 647.0,227.0" fill="none" stroke="currentColor"/>
63
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
64
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
65
+ </text>
66
+ <path class="displacy-arrowhead" d="M647.0,229.0 L651.0,221.0 643.0,221.0" fill="currentColor"/>
67
+ </g>
68
+
69
+ <g class="displacy-arrow">
70
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-4" stroke-width="2px" d="M362,227.0 362,152.0 800.0,152.0 800.0,227.0" fill="none" stroke="currentColor"/>
71
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
72
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">prep</textPath>
73
+ </text>
74
+ <path class="displacy-arrowhead" d="M800.0,229.0 L804.0,221.0 796.0,221.0" fill="currentColor"/>
75
+ </g>
76
+
77
+ <g class="displacy-arrow">
78
+ <path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-5" stroke-width="2px" d="M812,227.0 812,202.0 944.0,202.0 944.0,227.0" fill="none" stroke="currentColor"/>
79
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
80
+ <textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">pobj</textPath>
81
+ </text>
82
+ <path class="displacy-arrowhead" d="M944.0,229.0 L948.0,221.0 940.0,221.0" fill="currentColor"/>
83
+ </g>
84
+ </svg>
@@ -0,0 +1,11 @@
1
+ <div class="entities" style="line-height: 2.5; direction: ltr">When
2
+ <mark class="entity" style="background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
3
+ Sebastian Thrun
4
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
5
+ </mark>
6
+ started working on self-driving cars at Google in
7
+ <mark class="entity" style="background: #bfe1d9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
8
+ 2007
9
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">DATE</span>
10
+ </mark>
11
+ , few people outside of the company took him seriously.</div>
@@ -0,0 +1,31 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
+
7
+ headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
8
+ rows = []
9
+
10
+ doc.each do |token|
11
+ rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
12
+ end
13
+
14
+ table = Terminal::Table.new rows: rows, headings: headings
15
+ puts table
16
+
17
+ # +---------+---------+-------+-----+----------+-------+----------+---------+
18
+ # | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
19
+ # +---------+---------+-------+-----+----------+-------+----------+---------+
20
+ # | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
21
+ # | is | be | AUX | VBZ | aux | xx | true | true |
22
+ # | looking | look | VERB | VBG | ROOT | xxxx | true | false |
23
+ # | at | at | ADP | IN | prep | xx | true | true |
24
+ # | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
25
+ # | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
26
+ # | startup | startup | NOUN | NN | advcl | xxxx | true | false |
27
+ # | for | for | ADP | IN | prep | xxx | true | true |
28
+ # | $ | $ | SYM | $ | quantmod | $ | false | false |
29
+ # | 1 | 1 | NUM | CD | compound | d | false | false |
30
+ # | billion | billion | NUM | CD | pobj | xxxx | true | false |
31
+ # +---------+---------+-------+-----+----------+-------+----------+---------+
@@ -0,0 +1,13 @@
1
+ require "ruby-spacy"
2
+
3
+ nlp = Spacy::Language.new("en_core_web_lg")
4
+ doc1 = nlp.read("I like salty fries and hamburgers.")
5
+ doc2 = nlp.read("Fast food tastes very good.")
6
+
7
+ puts "Doc 1: " + doc1
8
+ puts "Doc 2: " + doc2
9
+ puts "Similarity: #{doc1.similarity(doc2)}"
10
+
11
+ # Doc 1: I like salty fries and hamburgers.
12
+ # Doc 2: Fast food tastes very good.
13
+ # Similarity: 0.7687607012190486
@@ -0,0 +1,22 @@
1
+ require "ruby-spacy"
2
+ require "terminal-table"
3
+
4
+ nlp = Spacy::Language.new("en_core_web_sm")
5
+
6
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
+
8
+ headings = [1,2,3,4,5,6,7,8,9,10,11]
9
+ row = []
10
+
11
+ doc.each do |token|
12
+ row << token.text
13
+ end
14
+
15
+ table = Terminal::Table.new rows: [row], headings: headings
16
+ puts table
17
+
18
+ # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
19
+ # | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
20
+ # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
21
+ # | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
22
+ # +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
@@ -0,0 +1,14 @@
1
+ require "ruby-spacy"
2
+
3
+ nlp = Spacy::Language.new("en_core_web_sm")
4
+
5
+ sentence = "Autonomous cars shift insurance liability toward manufacturers"
6
+ doc = nlp.read(sentence)
7
+
8
+ dep_svg = doc.displacy(style: "dep", compact: false)
9
+
10
+ File.open(File.join("test_dep.svg"), "w") do |file|
11
+ file.write(dep_svg)
12
+ end
13
+
14
+
@@ -0,0 +1,12 @@
1
+ require "ruby-spacy"
2
+
3
+ nlp = Spacy::Language.new("en_core_web_sm")
4
+
5
+ sentence = "Autonomous cars shift insurance liability toward manufacturers"
6
+ doc = nlp.read(sentence)
7
+
8
+ dep_svg = doc.displacy(style: "dep", compact: true)
9
+
10
+ File.open(File.join("test_dep_compact.svg"), "w") do |file|
11
+ file.write(dep_svg)
12
+ end