ruby-spacy 0.1.4.1 → 0.1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/Gemfile +7 -7
- data/Gemfile.lock +88 -9
- data/README.md +7 -10
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +3 -1
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +3 -1
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +147 -142
- data/ruby-spacy.gemspec +15 -17
- metadata +68 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06dd0ed2027c0d0c2e610141ef375d483734adfe7855a2306b9b23a00a743b73
|
4
|
+
data.tar.gz: fbe23e5e67a9502d2b6bd439608a6f1d43c82c0b05437386c7f65a69326b2cf0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da5fd99c782737cea2e1fa125b66de561522ac541e7a3c15f6ebbda7c6eae1e55925b043cd3421463d95c8dda450ed6d1c65704b88a92acff4b9c716d38a96d4
|
7
|
+
data.tar.gz: db33fe51cbe8d6613afaed4e648928b95e4d78481831c0f998cca443d181903af444719d90275035b1b582d4bd281b6dc3e8624e9177506b8ad2ef836b6d82e6
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: disable
|
3
|
+
SuggestExtensions: false
|
4
|
+
TargetRubyVersion: 2.6
|
5
|
+
|
6
|
+
Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
9
|
+
Naming/VariableNumber:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
Naming/FileName:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/StringLiterals:
|
16
|
+
Enabled: true
|
17
|
+
EnforcedStyle: double_quotes
|
18
|
+
|
19
|
+
Style/StringLiteralsInInterpolation:
|
20
|
+
Enabled: true
|
21
|
+
EnforcedStyle: double_quotes
|
22
|
+
|
23
|
+
Layout/LineLength:
|
24
|
+
Max: 400
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 80
|
28
|
+
|
29
|
+
Metrics/BlockLength:
|
30
|
+
Max: 60
|
31
|
+
|
32
|
+
Metrics/AbcSize:
|
33
|
+
Max: 60
|
34
|
+
|
35
|
+
Metrics/PerceivedComplexity:
|
36
|
+
Max: 10
|
37
|
+
|
38
|
+
Metrics/ClassLength:
|
39
|
+
Max: 400
|
40
|
+
|
41
|
+
Metrics/CyclomaticComplexity:
|
42
|
+
Max: 20
|
43
|
+
|
44
|
+
Metrics/ParameterLists:
|
45
|
+
Max: 8
|
46
|
+
|
47
|
+
Metrics/ModuleLength:
|
48
|
+
Max: 200
|
data/.solargraph.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
---
|
2
|
+
include:
|
3
|
+
- "**/*.rb"
|
4
|
+
exclude:
|
5
|
+
- spec/**/*
|
6
|
+
- test/**/*
|
7
|
+
- vendor/**/*
|
8
|
+
- ".bundle/**/*"
|
9
|
+
require: []
|
10
|
+
domains: []
|
11
|
+
reporters:
|
12
|
+
- rubocop
|
13
|
+
# - require_not_found
|
14
|
+
formatter:
|
15
|
+
rubocop:
|
16
|
+
cops: safe
|
17
|
+
except: []
|
18
|
+
only: []
|
19
|
+
extra_args: []
|
20
|
+
require_paths: []
|
21
|
+
plugins: []
|
22
|
+
max_files: 5000
|
data/Gemfile
CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in ruby-spacy.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
8
|
+
gem "numpy"
|
9
|
+
gem "pycall"
|
10
|
+
gem "terminal-table"
|
11
11
|
|
12
12
|
group :development do
|
13
|
-
gem "
|
13
|
+
gem "github-markup"
|
14
14
|
gem "minitest", "~> 5.0"
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
15
|
+
gem "rake", "~> 13.0"
|
16
|
+
gem "redcarpet"
|
17
|
+
gem "yard"
|
18
18
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.
|
4
|
+
ruby-spacy (0.1.5.0)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -9,33 +9,112 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
|
13
|
-
|
12
|
+
ast (2.4.2)
|
13
|
+
backport (1.2.0)
|
14
|
+
benchmark (0.2.1)
|
15
|
+
diff-lcs (1.5.0)
|
16
|
+
e2mmap (0.1.0)
|
17
|
+
github-markup (4.0.1)
|
18
|
+
jaro_winkler (1.5.4)
|
19
|
+
json (2.6.3)
|
20
|
+
kramdown (2.4.0)
|
21
|
+
rexml
|
22
|
+
kramdown-parser-gfm (1.1.0)
|
23
|
+
kramdown (~> 2.0)
|
24
|
+
mini_portile2 (2.8.1)
|
25
|
+
minitest (5.17.0)
|
26
|
+
nokogiri (1.14.0)
|
27
|
+
mini_portile2 (~> 2.8.0)
|
28
|
+
racc (~> 1.4)
|
29
|
+
nokogiri (1.14.0-arm64-darwin)
|
30
|
+
racc (~> 1.4)
|
31
|
+
nokogiri (1.14.0-x86_64-darwin)
|
32
|
+
racc (~> 1.4)
|
33
|
+
nokogiri (1.14.0-x86_64-linux)
|
34
|
+
racc (~> 1.4)
|
14
35
|
numpy (0.4.0)
|
15
36
|
pycall (>= 1.2.0.beta1)
|
16
|
-
|
17
|
-
|
37
|
+
parallel (1.22.1)
|
38
|
+
parser (3.2.0.0)
|
39
|
+
ast (~> 2.4.1)
|
40
|
+
pycall (1.4.2)
|
41
|
+
racc (1.6.2)
|
42
|
+
rainbow (3.1.1)
|
43
|
+
rake (13.0.6)
|
18
44
|
redcarpet (3.5.1)
|
19
|
-
|
45
|
+
regexp_parser (2.6.2)
|
46
|
+
reverse_markdown (2.1.1)
|
47
|
+
nokogiri
|
48
|
+
rexml (3.2.5)
|
49
|
+
rspec (3.12.0)
|
50
|
+
rspec-core (~> 3.12.0)
|
51
|
+
rspec-expectations (~> 3.12.0)
|
52
|
+
rspec-mocks (~> 3.12.0)
|
53
|
+
rspec-core (3.12.0)
|
54
|
+
rspec-support (~> 3.12.0)
|
55
|
+
rspec-expectations (3.12.2)
|
56
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
57
|
+
rspec-support (~> 3.12.0)
|
58
|
+
rspec-mocks (3.12.3)
|
59
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
60
|
+
rspec-support (~> 3.12.0)
|
61
|
+
rspec-support (3.12.0)
|
62
|
+
rubocop (1.43.0)
|
63
|
+
json (~> 2.3)
|
64
|
+
parallel (~> 1.10)
|
65
|
+
parser (>= 3.2.0.0)
|
66
|
+
rainbow (>= 2.2.2, < 4.0)
|
67
|
+
regexp_parser (>= 1.8, < 3.0)
|
68
|
+
rexml (>= 3.2.5, < 4.0)
|
69
|
+
rubocop-ast (>= 1.24.1, < 2.0)
|
70
|
+
ruby-progressbar (~> 1.7)
|
71
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
72
|
+
rubocop-ast (1.24.1)
|
73
|
+
parser (>= 3.1.1.0)
|
74
|
+
ruby-progressbar (1.11.0)
|
75
|
+
solargraph (0.48.0)
|
76
|
+
backport (~> 1.2)
|
77
|
+
benchmark
|
78
|
+
bundler (>= 1.17.2)
|
79
|
+
diff-lcs (~> 1.4)
|
80
|
+
e2mmap
|
81
|
+
jaro_winkler (~> 1.5)
|
82
|
+
kramdown (~> 2.3)
|
83
|
+
kramdown-parser-gfm (~> 1.1)
|
84
|
+
parser (~> 3.0)
|
85
|
+
reverse_markdown (>= 1.0.5, < 3)
|
86
|
+
rubocop (>= 0.52)
|
87
|
+
thor (~> 1.0)
|
88
|
+
tilt (~> 2.0)
|
89
|
+
yard (~> 0.9, >= 0.9.24)
|
90
|
+
terminal-table (3.0.2)
|
20
91
|
unicode-display_width (>= 1.1.1, < 3)
|
21
|
-
|
22
|
-
|
92
|
+
thor (1.2.1)
|
93
|
+
tilt (2.0.11)
|
94
|
+
unicode-display_width (2.4.2)
|
95
|
+
webrick (1.7.0)
|
96
|
+
yard (0.9.28)
|
97
|
+
webrick (~> 1.7.0)
|
23
98
|
|
24
99
|
PLATFORMS
|
25
100
|
arm64-darwin-20
|
101
|
+
ruby
|
26
102
|
x86_64-darwin-20
|
27
103
|
x86_64-linux
|
28
104
|
|
29
105
|
DEPENDENCIES
|
106
|
+
bundler
|
30
107
|
github-markup
|
31
108
|
minitest (~> 5.0)
|
32
109
|
numpy
|
33
110
|
pycall
|
34
111
|
rake (~> 13.0)
|
35
112
|
redcarpet
|
113
|
+
rspec
|
36
114
|
ruby-spacy!
|
115
|
+
solargraph
|
37
116
|
terminal-table
|
38
117
|
yard
|
39
118
|
|
40
119
|
BUNDLED WITH
|
41
|
-
2.2
|
120
|
+
2.4.2
|
data/README.md
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
# ruby-spacy
|
2
|
-
|
3
|
-
⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
|
1
|
+
# 💎 ruby-spacy
|
4
2
|
|
5
3
|
## Overview
|
6
4
|
|
@@ -16,22 +14,22 @@
|
|
16
14
|
|
17
15
|
## Installation of prerequisites
|
18
16
|
|
19
|
-
Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.
|
17
|
+
**IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
|
20
18
|
|
21
19
|
```shell
|
22
|
-
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.
|
20
|
+
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
|
23
21
|
```
|
24
22
|
|
25
23
|
Don't forget to make it accessible from your working directory.
|
26
24
|
|
27
25
|
```shell
|
28
|
-
$ pyenv local 3.
|
26
|
+
$ pyenv local 3.10.6
|
29
27
|
```
|
30
28
|
|
31
29
|
Or alternatively:
|
32
30
|
|
33
31
|
```shell
|
34
|
-
$ pyenv global 3.
|
32
|
+
$ pyenv global 3.10.6
|
35
33
|
```
|
36
34
|
|
37
35
|
Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
|
@@ -451,7 +449,7 @@ query = tokyo.vector - japan.vector + france.vector
|
|
451
449
|
headings = ["rank", "text", "score"]
|
452
450
|
rows = []
|
453
451
|
|
454
|
-
results = nlp.most_similar(query,
|
452
|
+
results = nlp.most_similar(query, 10)
|
455
453
|
results.each_with_index do |lexeme, i|
|
456
454
|
index = (i + 1).to_s
|
457
455
|
rows << [index, lexeme.text, lexeme.score]
|
@@ -501,7 +499,7 @@ query = tokyo.vector - japan.vector + france.vector
|
|
501
499
|
headings = ["rank", "text", "score"]
|
502
500
|
rows = []
|
503
501
|
|
504
|
-
results = nlp.most_similar(query,
|
502
|
+
results = nlp.most_similar(query, 10)
|
505
503
|
results.each_with_index do |lexeme, i|
|
506
504
|
index = (i + 1).to_s
|
507
505
|
rows << [index, lexeme.text, lexeme.score]
|
@@ -541,4 +539,3 @@ I would like to thank the following open source projects and their creators for
|
|
541
539
|
## License
|
542
540
|
|
543
541
|
This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
544
|
-
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("I love coffee")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape prefix suffix is_alpha is_digit]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |word|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text pos dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape is_alpha is_stop morphology]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("France")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text start_char end_char label]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_lg")
|
4
6
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
5
7
|
doc2 = nlp.read("Fast food tastes very good.")
|
6
8
|
|
7
|
-
puts "Doc 1:
|
8
|
-
puts "Doc 2:
|
9
|
+
puts "Doc 1: #{doc1.text}"
|
10
|
+
puts "Doc 2: #{doc2.text}"
|
9
11
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
10
12
|
|
11
13
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
|
|
10
12
|
File.open(File.join("test_dep.svg"), "w") do |file|
|
11
13
|
file.write(dep_svg)
|
12
14
|
end
|
13
|
-
|
14
|
-
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
7
|
+
sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
6
8
|
doc = nlp.read(sentence)
|
7
9
|
|
8
|
-
ent_html = doc.displacy(style:
|
10
|
+
ent_html = doc.displacy(style: "ent")
|
9
11
|
|
10
12
|
File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
|
11
13
|
file.write(ent_html)
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
doc = nlp.read("I love coffee")
|
5
7
|
|
6
8
|
pp doc.vocab.strings["coffee"]
|
7
|
-
pp doc.vocab.strings[
|
9
|
+
pp doc.vocab.strings[3_197_928_453_018_144_401]
|
8
10
|
|
9
11
|
# 3197928453018144401
|
10
12
|
# "coffee"
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
doc = nlp.read("dog cat banana afskfsd")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text has_vector vector_norm is_oov]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "私の父は寿司が好きだ。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
# subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
subject = Spacy::Token.new(root.lefts[0])
|
21
23
|
|
22
|
-
puts "The root of the sentence is:
|
23
|
-
puts "The subject of the sentence is:
|
24
|
+
puts "The root of the sentence is: #{root.text}"
|
25
|
+
puts "The subject of the sentence is: #{subject.text}"
|
24
26
|
|
25
27
|
subject.subtree.each do |descendant|
|
26
28
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
|
6
|
-
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
8
|
+
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require("ruby-spacy")
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
|
|
7
9
|
nlp.add_pipe("merge_noun_chunks")
|
8
10
|
|
9
11
|
texts = [
|
10
|
-
|
11
|
-
|
12
|
+
"アメリカ合衆国の国土面積は日本の約25倍あります。",
|
13
|
+
"現在1ドルは日本円で110円です。"
|
12
14
|
]
|
13
15
|
|
14
16
|
texts.each do |text|
|
15
17
|
doc = nlp.read(text)
|
16
18
|
doc.each do |token|
|
17
|
-
if token.dep_ == "case"
|
18
|
-
puts token.head.text + " --> " + token.text
|
19
|
-
end
|
19
|
+
puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("私は論文を読んでいるところだった。")
|
7
9
|
|
8
|
-
headings = [
|
10
|
+
headings = %w[text lemma]
|
9
11
|
rows = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("フランス")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "任天堂は1983年にファミコンを14,800円で発売した。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text start end label]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.ents.each do |ent|
|
@@ -24,4 +26,3 @@ puts table
|
|
24
26
|
# | ファミコン | 10 | 15 | PRODUCT |
|
25
27
|
# | 14,800円 | 16 | 23 | MONEY |
|
26
28
|
# +------------+-------+-----+---------+
|
27
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -15,20 +17,20 @@ end
|
|
15
17
|
table = Terminal::Table.new rows: rows, headings: headings
|
16
18
|
puts table
|
17
19
|
|
18
|
-
+------+----------+-----------+----------+------------------------+
|
19
|
-
| text | dep | head text | head pos | children |
|
20
|
-
+------+----------+-----------+----------+------------------------+
|
21
|
-
| 自動 | compound | 車 | 92 | |
|
22
|
-
| 運転 | compound | 車 | 92 | |
|
23
|
-
| 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
|
24
|
-
| は | case | 車 | 92 | |
|
25
|
-
| 保険 | compound | 責任 | 92 | |
|
26
|
-
| 責任 | obj | 転嫁 | 100 | 保険, を |
|
27
|
-
| を | case | 責任 | 92 | |
|
28
|
-
| 製造 | compound | 者 | 92 | |
|
29
|
-
| 者 | obl | 転嫁 | 100 | 製造, に |
|
30
|
-
| に | case | 者 | 92 | |
|
31
|
-
| 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
|
32
|
-
| する | aux | 転嫁 | 100 | |
|
33
|
-
| 。 | punct | 転嫁 | 100 | |
|
34
|
-
+------+----------+-----------+----------+------------------------+
|
20
|
+
# +------+----------+-----------+----------+------------------------+
|
21
|
+
# | text | dep | head text | head pos | children |
|
22
|
+
# +------+----------+-----------+----------+------------------------+
|
23
|
+
# | 自動 | compound | 車 | 92 | |
|
24
|
+
# | 運転 | compound | 車 | 92 | |
|
25
|
+
# | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
|
26
|
+
# | は | case | 車 | 92 | |
|
27
|
+
# | 保険 | compound | 責任 | 92 | |
|
28
|
+
# | 責任 | obj | 転嫁 | 100 | 保険, を |
|
29
|
+
# | を | case | 責任 | 92 | |
|
30
|
+
# | 製造 | compound | 者 | 92 | |
|
31
|
+
# | 者 | obl | 転嫁 | 100 | 製造, に |
|
32
|
+
# | に | case | 者 | 92 | |
|
33
|
+
# | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
|
34
|
+
# | する | aux | 転嫁 | 100 | |
|
35
|
+
# | 。 | punct | 転嫁 | 100 | |
|
36
|
+
# +------+----------+-----------+----------+------------------------+
|