ruby-spacy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +58 -0
- data/.yardopts +2 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +39 -0
- data/LICENSE.txt +21 -0
- data/README.md +498 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/examples/get_started/lexeme.rb +24 -0
- data/examples/get_started/linguistic_annotations.rb +32 -0
- data/examples/get_started/most_similar.rb +46 -0
- data/examples/get_started/named_entities.rb +24 -0
- data/examples/get_started/outputs/test_dep.svg +84 -0
- data/examples/get_started/outputs/test_dep_compact.svg +84 -0
- data/examples/get_started/outputs/test_ent.html +11 -0
- data/examples/get_started/pos_tags_and_dependencies.rb +31 -0
- data/examples/get_started/similarity.rb +13 -0
- data/examples/get_started/tokenization.rb +22 -0
- data/examples/get_started/visualizing_dependencies.rb +14 -0
- data/examples/get_started/visualizing_dependencies_compact.rb +12 -0
- data/examples/get_started/visualizing_named_entities.rb +12 -0
- data/examples/get_started/vocab.rb +10 -0
- data/examples/get_started/word_vectors.rb +24 -0
- data/examples/japanese/ancestors.rb +44 -0
- data/examples/japanese/entity_annotations_and_labels.rb +45 -0
- data/examples/japanese/information_extraction.rb +27 -0
- data/examples/japanese/lemmatization.rb +32 -0
- data/examples/japanese/most_similar.rb +46 -0
- data/examples/japanese/named_entity_recognition.rb +27 -0
- data/examples/japanese/navigating_parse_tree.rb +34 -0
- data/examples/japanese/noun_chunks.rb +23 -0
- data/examples/japanese/outputs/test_dep.svg +149 -0
- data/examples/japanese/outputs/test_ent.html +16 -0
- data/examples/japanese/pos_tagging.rb +34 -0
- data/examples/japanese/sentence_segmentation.rb +16 -0
- data/examples/japanese/similarity.rb +12 -0
- data/examples/japanese/tokenization.rb +38 -0
- data/examples/japanese/visualizing_dependencies.rb +13 -0
- data/examples/japanese/visualizing_named_entities.rb +14 -0
- data/examples/linguistic_features/ancestors.rb +41 -0
- data/examples/linguistic_features/entity_annotations_and_labels.rb +29 -0
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +20 -0
- data/examples/linguistic_features/information_extraction.rb +36 -0
- data/examples/linguistic_features/iterating_children.rb +24 -0
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +20 -0
- data/examples/linguistic_features/lemmatization.rb +31 -0
- data/examples/linguistic_features/morphology.rb +17 -0
- data/examples/linguistic_features/named_entity_recognition.rb +25 -0
- data/examples/linguistic_features/navigating_parse_tree.rb +32 -0
- data/examples/linguistic_features/noun_chunks.rb +27 -0
- data/examples/linguistic_features/outputs/test_ent.html +11 -0
- data/examples/linguistic_features/pos_tagging.rb +31 -0
- data/examples/linguistic_features/retokenize_1.rb +29 -0
- data/examples/linguistic_features/retokenize_2.rb +16 -0
- data/examples/linguistic_features/rule_based_morphology.rb +12 -0
- data/examples/linguistic_features/sentence_segmentation.rb +16 -0
- data/examples/linguistic_features/similarity.rb +14 -0
- data/examples/linguistic_features/similarity_between_spans.rb +23 -0
- data/examples/linguistic_features/special_case_tokenization_rules.rb +19 -0
- data/examples/linguistic_features/tokenization.rb +23 -0
- data/examples/rule_based_matching/creating_spans_from_matches.rb +16 -0
- data/examples/rule_based_matching/matcher.rb +19 -0
- data/lib/ruby-spacy.rb +567 -0
- data/lib/ruby-spacy/version.rb +6 -0
- data/ruby-spacy.gemspec +42 -0
- metadata +157 -0
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "ruby-spacy"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("I love coffee")
|
6
|
+
|
7
|
+
headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |word|
|
11
|
+
lexeme = doc.vocab[word.text]
|
12
|
+
rows << [lexeme.text, lexeme.shape_, lexeme.prefix_, lexeme.suffix_, lexeme.is_alpha, lexeme.is_digit]
|
13
|
+
end
|
14
|
+
|
15
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
16
|
+
puts table
|
17
|
+
|
18
|
+
# +--------+-------+--------+--------+----------+----------+
|
19
|
+
# | text | shape | prefix | suffix | is_alpha | is_digit |
|
20
|
+
# +--------+-------+--------+--------+----------+----------+
|
21
|
+
# | I | X | I | I | true | false |
|
22
|
+
# | love | xxxx | l | ove | true | false |
|
23
|
+
# | coffee | xxxx | c | fee | true | false |
|
24
|
+
# +--------+-------+--------+--------+----------+----------+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "pos", "dep"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
rows << [token.text, token.pos_, token.dep_]
|
12
|
+
end
|
13
|
+
|
14
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
15
|
+
|
16
|
+
puts table
|
17
|
+
|
18
|
+
# +---------+-------+----------+
|
19
|
+
# | text | pos | dep |
|
20
|
+
# +---------+-------+----------+
|
21
|
+
# | Apple | PROPN | nsubj |
|
22
|
+
# | is | AUX | aux |
|
23
|
+
# | looking | VERB | ROOT |
|
24
|
+
# | at | ADP | prep |
|
25
|
+
# | buying | VERB | pcomp |
|
26
|
+
# | U.K. | PROPN | dobj |
|
27
|
+
# | startup | NOUN | advcl |
|
28
|
+
# | for | ADP | prep |
|
29
|
+
# | $ | SYM | quantmod |
|
30
|
+
# | 1 | NUM | compound |
|
31
|
+
# | billion | NUM | pobj |
|
32
|
+
# +---------+-------+----------+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_lg")
|
5
|
+
|
6
|
+
tokyo = nlp.get_lexeme("Tokyo")
|
7
|
+
japan = nlp.get_lexeme("Japan")
|
8
|
+
france = nlp.get_lexeme("France")
|
9
|
+
|
10
|
+
query = tokyo.vector - japan.vector + france.vector
|
11
|
+
|
12
|
+
headings = ["key", "text", "score"]
|
13
|
+
rows = []
|
14
|
+
|
15
|
+
results = nlp.most_similar(query, 20)
|
16
|
+
results.each do |lexeme|
|
17
|
+
rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
|
18
|
+
end
|
19
|
+
|
20
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
21
|
+
puts table
|
22
|
+
|
23
|
+
# +----------------------+-------------+--------------------+
|
24
|
+
# | key | text | score |
|
25
|
+
# +----------------------+-------------+--------------------+
|
26
|
+
# | 1432967385481565694 | FRANCE | 0.8346999883651733 |
|
27
|
+
# | 6613816697677965370 | France | 0.8346999883651733 |
|
28
|
+
# | 4362406852232399325 | france | 0.8346999883651733 |
|
29
|
+
# | 1637573253267610771 | PARIS | 0.7703999876976013 |
|
30
|
+
# | 15322182186497800017 | paris | 0.7703999876976013 |
|
31
|
+
# | 10427160276079242800 | Paris | 0.7703999876976013 |
|
32
|
+
# | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
|
33
|
+
# | 7944504257273452052 | Toulouse | 0.6381999850273132 |
|
34
|
+
# | 9614730213792621885 | toulouse | 0.6381999850273132 |
|
35
|
+
# | 8515538464606421210 | marseille | 0.6370999813079834 |
|
36
|
+
# | 8215995793762630878 | Marseille | 0.6370999813079834 |
|
37
|
+
# | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
|
38
|
+
# | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
|
39
|
+
# | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
|
40
|
+
# | 13936807859007616770 | bordeaux | 0.6096000075340271 |
|
41
|
+
# | 8731576325682930212 | prague | 0.6075000166893005 |
|
42
|
+
# | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
|
43
|
+
# | 1133963107690000953 | Prague | 0.6075000166893005 |
|
44
|
+
# | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
|
45
|
+
# | 6936121537367717968 | switzerland | 0.6068000197410583 |
|
46
|
+
# +----------------------+-------------+--------------------+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "start_char", "end_char", "label"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.ents.each do |ent|
|
11
|
+
rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
|
12
|
+
end
|
13
|
+
|
14
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
15
|
+
|
16
|
+
puts table
|
17
|
+
|
18
|
+
# +------------+------------+----------+-------+
|
19
|
+
# | text | start_char | end_char | label |
|
20
|
+
# +------------+------------+----------+-------+
|
21
|
+
# | Apple | 0 | 5 | ORG |
|
22
|
+
# | U.K. | 27 | 31 | GPE |
|
23
|
+
# | $1 billion | 44 | 54 | MONEY |
|
24
|
+
# +------------+------------+----------+-------+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="d38e3d055efe43b381928ff23187c2d6-0" class="displacy" width="1275" height="399.5" direction="ltr" style="max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
|
2
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
3
|
+
<tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
|
4
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
|
5
|
+
</text>
|
6
|
+
|
7
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
8
|
+
<tspan class="displacy-word" fill="currentColor" x="225">cars</tspan>
|
9
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">NOUN</tspan>
|
10
|
+
</text>
|
11
|
+
|
12
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
13
|
+
<tspan class="displacy-word" fill="currentColor" x="400">shift</tspan>
|
14
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>
|
15
|
+
</text>
|
16
|
+
|
17
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
18
|
+
<tspan class="displacy-word" fill="currentColor" x="575">insurance</tspan>
|
19
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">NOUN</tspan>
|
20
|
+
</text>
|
21
|
+
|
22
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
23
|
+
<tspan class="displacy-word" fill="currentColor" x="750">liability</tspan>
|
24
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="750">NOUN</tspan>
|
25
|
+
</text>
|
26
|
+
|
27
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
28
|
+
<tspan class="displacy-word" fill="currentColor" x="925">toward</tspan>
|
29
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="925">ADP</tspan>
|
30
|
+
</text>
|
31
|
+
|
32
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="309.5">
|
33
|
+
<tspan class="displacy-word" fill="currentColor" x="1100">manufacturers</tspan>
|
34
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="1100">NOUN</tspan>
|
35
|
+
</text>
|
36
|
+
|
37
|
+
<g class="displacy-arrow">
|
38
|
+
<path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-0" stroke-width="2px" d="M70,264.5 C70,177.0 215.0,177.0 215.0,264.5" fill="none" stroke="currentColor"/>
|
39
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
40
|
+
<textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">amod</textPath>
|
41
|
+
</text>
|
42
|
+
<path class="displacy-arrowhead" d="M70,266.5 L62,254.5 78,254.5" fill="currentColor"/>
|
43
|
+
</g>
|
44
|
+
|
45
|
+
<g class="displacy-arrow">
|
46
|
+
<path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-1" stroke-width="2px" d="M245,264.5 C245,177.0 390.0,177.0 390.0,264.5" fill="none" stroke="currentColor"/>
|
47
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
48
|
+
<textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
|
49
|
+
</text>
|
50
|
+
<path class="displacy-arrowhead" d="M245,266.5 L237,254.5 253,254.5" fill="currentColor"/>
|
51
|
+
</g>
|
52
|
+
|
53
|
+
<g class="displacy-arrow">
|
54
|
+
<path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-2" stroke-width="2px" d="M595,264.5 C595,177.0 740.0,177.0 740.0,264.5" fill="none" stroke="currentColor"/>
|
55
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
56
|
+
<textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
|
57
|
+
</text>
|
58
|
+
<path class="displacy-arrowhead" d="M595,266.5 L587,254.5 603,254.5" fill="currentColor"/>
|
59
|
+
</g>
|
60
|
+
|
61
|
+
<g class="displacy-arrow">
|
62
|
+
<path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-3" stroke-width="2px" d="M420,264.5 C420,89.5 745.0,89.5 745.0,264.5" fill="none" stroke="currentColor"/>
|
63
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
64
|
+
<textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
|
65
|
+
</text>
|
66
|
+
<path class="displacy-arrowhead" d="M745.0,266.5 L753.0,254.5 737.0,254.5" fill="currentColor"/>
|
67
|
+
</g>
|
68
|
+
|
69
|
+
<g class="displacy-arrow">
|
70
|
+
<path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-4" stroke-width="2px" d="M420,264.5 C420,2.0 925.0,2.0 925.0,264.5" fill="none" stroke="currentColor"/>
|
71
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
72
|
+
<textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">prep</textPath>
|
73
|
+
</text>
|
74
|
+
<path class="displacy-arrowhead" d="M925.0,266.5 L933.0,254.5 917.0,254.5" fill="currentColor"/>
|
75
|
+
</g>
|
76
|
+
|
77
|
+
<g class="displacy-arrow">
|
78
|
+
<path class="displacy-arc" id="arrow-d38e3d055efe43b381928ff23187c2d6-0-5" stroke-width="2px" d="M945,264.5 C945,177.0 1090.0,177.0 1090.0,264.5" fill="none" stroke="currentColor"/>
|
79
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
80
|
+
<textPath xlink:href="#arrow-d38e3d055efe43b381928ff23187c2d6-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">pobj</textPath>
|
81
|
+
</text>
|
82
|
+
<path class="displacy-arrowhead" d="M1090.0,266.5 L1098.0,254.5 1082.0,254.5" fill="currentColor"/>
|
83
|
+
</g>
|
84
|
+
</svg>
|
@@ -0,0 +1,84 @@
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="2164aed0b1894f6fa4ce7be065a1063e-0" class="displacy" width="1100" height="362.0" direction="ltr" style="max-width: none; height: 362.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">
|
2
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
3
|
+
<tspan class="displacy-word" fill="currentColor" x="50">Autonomous</tspan>
|
4
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">ADJ</tspan>
|
5
|
+
</text>
|
6
|
+
|
7
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
8
|
+
<tspan class="displacy-word" fill="currentColor" x="200">cars</tspan>
|
9
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="200">NOUN</tspan>
|
10
|
+
</text>
|
11
|
+
|
12
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
13
|
+
<tspan class="displacy-word" fill="currentColor" x="350">shift</tspan>
|
14
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="350">VERB</tspan>
|
15
|
+
</text>
|
16
|
+
|
17
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
18
|
+
<tspan class="displacy-word" fill="currentColor" x="500">insurance</tspan>
|
19
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="500">NOUN</tspan>
|
20
|
+
</text>
|
21
|
+
|
22
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
23
|
+
<tspan class="displacy-word" fill="currentColor" x="650">liability</tspan>
|
24
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="650">NOUN</tspan>
|
25
|
+
</text>
|
26
|
+
|
27
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
28
|
+
<tspan class="displacy-word" fill="currentColor" x="800">toward</tspan>
|
29
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="800">ADP</tspan>
|
30
|
+
</text>
|
31
|
+
|
32
|
+
<text class="displacy-token" fill="currentColor" text-anchor="middle" y="272.0">
|
33
|
+
<tspan class="displacy-word" fill="currentColor" x="950">manufacturers</tspan>
|
34
|
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="950">NOUN</tspan>
|
35
|
+
</text>
|
36
|
+
|
37
|
+
<g class="displacy-arrow">
|
38
|
+
<path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-0" stroke-width="2px" d="M62,227.0 62,202.0 194.0,202.0 194.0,227.0" fill="none" stroke="currentColor"/>
|
39
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
40
|
+
<textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">amod</textPath>
|
41
|
+
</text>
|
42
|
+
<path class="displacy-arrowhead" d="M62,229.0 L58,221.0 66,221.0" fill="currentColor"/>
|
43
|
+
</g>
|
44
|
+
|
45
|
+
<g class="displacy-arrow">
|
46
|
+
<path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-1" stroke-width="2px" d="M212,227.0 212,202.0 344.0,202.0 344.0,227.0" fill="none" stroke="currentColor"/>
|
47
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
48
|
+
<textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>
|
49
|
+
</text>
|
50
|
+
<path class="displacy-arrowhead" d="M212,229.0 L208,221.0 216,221.0" fill="currentColor"/>
|
51
|
+
</g>
|
52
|
+
|
53
|
+
<g class="displacy-arrow">
|
54
|
+
<path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-2" stroke-width="2px" d="M512,227.0 512,202.0 644.0,202.0 644.0,227.0" fill="none" stroke="currentColor"/>
|
55
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
56
|
+
<textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>
|
57
|
+
</text>
|
58
|
+
<path class="displacy-arrowhead" d="M512,229.0 L508,221.0 516,221.0" fill="currentColor"/>
|
59
|
+
</g>
|
60
|
+
|
61
|
+
<g class="displacy-arrow">
|
62
|
+
<path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-3" stroke-width="2px" d="M362,227.0 362,177.0 647.0,177.0 647.0,227.0" fill="none" stroke="currentColor"/>
|
63
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
64
|
+
<textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">dobj</textPath>
|
65
|
+
</text>
|
66
|
+
<path class="displacy-arrowhead" d="M647.0,229.0 L651.0,221.0 643.0,221.0" fill="currentColor"/>
|
67
|
+
</g>
|
68
|
+
|
69
|
+
<g class="displacy-arrow">
|
70
|
+
<path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-4" stroke-width="2px" d="M362,227.0 362,152.0 800.0,152.0 800.0,227.0" fill="none" stroke="currentColor"/>
|
71
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
72
|
+
<textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">prep</textPath>
|
73
|
+
</text>
|
74
|
+
<path class="displacy-arrowhead" d="M800.0,229.0 L804.0,221.0 796.0,221.0" fill="currentColor"/>
|
75
|
+
</g>
|
76
|
+
|
77
|
+
<g class="displacy-arrow">
|
78
|
+
<path class="displacy-arc" id="arrow-2164aed0b1894f6fa4ce7be065a1063e-0-5" stroke-width="2px" d="M812,227.0 812,202.0 944.0,202.0 944.0,227.0" fill="none" stroke="currentColor"/>
|
79
|
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
80
|
+
<textPath xlink:href="#arrow-2164aed0b1894f6fa4ce7be065a1063e-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">pobj</textPath>
|
81
|
+
</text>
|
82
|
+
<path class="displacy-arrowhead" d="M944.0,229.0 L948.0,221.0 940.0,221.0" fill="currentColor"/>
|
83
|
+
</g>
|
84
|
+
</svg>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<div class="entities" style="line-height: 2.5; direction: ltr">When
|
2
|
+
<mark class="entity" style="background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
3
|
+
Sebastian Thrun
|
4
|
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
|
5
|
+
</mark>
|
6
|
+
started working on self-driving cars at Google in
|
7
|
+
<mark class="entity" style="background: #bfe1d9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
8
|
+
2007
|
9
|
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">DATE</span>
|
10
|
+
</mark>
|
11
|
+
, few people outside of the company took him seriously.</div>
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
|
+
|
7
|
+
headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
|
8
|
+
rows = []
|
9
|
+
|
10
|
+
doc.each do |token|
|
11
|
+
rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
|
12
|
+
end
|
13
|
+
|
14
|
+
table = Terminal::Table.new rows: rows, headings: headings
|
15
|
+
puts table
|
16
|
+
|
17
|
+
# +---------+---------+-------+-----+----------+-------+----------+---------+
|
18
|
+
# | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
|
19
|
+
# +---------+---------+-------+-----+----------+-------+----------+---------+
|
20
|
+
# | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
|
21
|
+
# | is | be | AUX | VBZ | aux | xx | true | true |
|
22
|
+
# | looking | look | VERB | VBG | ROOT | xxxx | true | false |
|
23
|
+
# | at | at | ADP | IN | prep | xx | true | true |
|
24
|
+
# | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
|
25
|
+
# | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
|
26
|
+
# | startup | startup | NOUN | NN | advcl | xxxx | true | false |
|
27
|
+
# | for | for | ADP | IN | prep | xxx | true | true |
|
28
|
+
# | $ | $ | SYM | $ | quantmod | $ | false | false |
|
29
|
+
# | 1 | 1 | NUM | CD | compound | d | false | false |
|
30
|
+
# | billion | billion | NUM | CD | pobj | xxxx | true | false |
|
31
|
+
# +---------+---------+-------+-----+----------+-------+----------+---------+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
|
3
|
+
nlp = Spacy::Language.new("en_core_web_lg")
|
4
|
+
doc1 = nlp.read("I like salty fries and hamburgers.")
|
5
|
+
doc2 = nlp.read("Fast food tastes very good.")
|
6
|
+
|
7
|
+
puts "Doc 1: " + doc1
|
8
|
+
puts "Doc 2: " + doc2
|
9
|
+
puts "Similarity: #{doc1.similarity(doc2)}"
|
10
|
+
|
11
|
+
# Doc 1: I like salty fries and hamburgers.
|
12
|
+
# Doc 2: Fast food tastes very good.
|
13
|
+
# Similarity: 0.7687607012190486
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
require "terminal-table"
|
3
|
+
|
4
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
+
|
6
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
|
+
|
8
|
+
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
9
|
+
row = []
|
10
|
+
|
11
|
+
doc.each do |token|
|
12
|
+
row << token.text
|
13
|
+
end
|
14
|
+
|
15
|
+
table = Terminal::Table.new rows: [row], headings: headings
|
16
|
+
puts table
|
17
|
+
|
18
|
+
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
19
|
+
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
|
20
|
+
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
21
|
+
# | Apple | is | looking | at | buying | U.K. | startup | for | $ | 1 | billion |
|
22
|
+
# +-------+----+---------+----+--------+------+---------+-----+---+----+---------+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
|
3
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
4
|
+
|
5
|
+
sentence = "Autonomous cars shift insurance liability toward manufacturers"
|
6
|
+
doc = nlp.read(sentence)
|
7
|
+
|
8
|
+
dep_svg = doc.displacy(style: "dep", compact: false)
|
9
|
+
|
10
|
+
File.open(File.join("test_dep.svg"), "w") do |file|
|
11
|
+
file.write(dep_svg)
|
12
|
+
end
|
13
|
+
|
14
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "ruby-spacy"
|
2
|
+
|
3
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
4
|
+
|
5
|
+
sentence = "Autonomous cars shift insurance liability toward manufacturers"
|
6
|
+
doc = nlp.read(sentence)
|
7
|
+
|
8
|
+
dep_svg = doc.displacy(style: "dep", compact: true)
|
9
|
+
|
10
|
+
File.open(File.join("test_dep_compact.svg"), "w") do |file|
|
11
|
+
file.write(dep_svg)
|
12
|
+
end
|