ruby-spacy 0.1.4 → 0.1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/CHANGELOG.md +5 -1
- data/Gemfile +7 -7
- data/Gemfile.lock +3 -3
- data/README.md +40 -39
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +30 -27
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +30 -27
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +142 -136
- data/ruby-spacy.gemspec +15 -17
- data/tags +132 -0
- metadata +69 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cd52dfe6ab652bcefacd7401deef42ccccb5e711d418ca127776e66673b87f0
|
4
|
+
data.tar.gz: 5b2bd6ac16341c09e53673a31b60cb31a9c07d89344d35b7a7d9c01fe629881a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3af0557f6a33c0a4bfbf6a65e1e8922e14dc3f9df70fbc7bbe271212134b3d8b27e908aa79fd172fedf9e8daa4b39c6d1967eb1e2972d186c9ddb0a0bd6685c5
|
7
|
+
data.tar.gz: c893e49c75fb0ddb861c052ca5415df0235ef6d1d15960f6b2e5c4b815f1f6018607a8fff6b56bd1cbf5514c9762f8e39c9ce731f8ae713d148eb17eb7d9531b
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: disable
|
3
|
+
SuggestExtensions: false
|
4
|
+
TargetRubyVersion: 2.6
|
5
|
+
|
6
|
+
Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
9
|
+
Naming/VariableNumber:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
Naming/FileName:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/StringLiterals:
|
16
|
+
Enabled: true
|
17
|
+
EnforcedStyle: double_quotes
|
18
|
+
|
19
|
+
Style/StringLiteralsInInterpolation:
|
20
|
+
Enabled: true
|
21
|
+
EnforcedStyle: double_quotes
|
22
|
+
|
23
|
+
Layout/LineLength:
|
24
|
+
Max: 400
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 80
|
28
|
+
|
29
|
+
Metrics/BlockLength:
|
30
|
+
Max: 60
|
31
|
+
|
32
|
+
Metrics/AbcSize:
|
33
|
+
Max: 60
|
34
|
+
|
35
|
+
Metrics/PerceivedComplexity:
|
36
|
+
Max: 10
|
37
|
+
|
38
|
+
Metrics/ClassLength:
|
39
|
+
Max: 400
|
40
|
+
|
41
|
+
Metrics/CyclomaticComplexity:
|
42
|
+
Max: 20
|
43
|
+
|
44
|
+
Metrics/ParameterLists:
|
45
|
+
Max: 8
|
46
|
+
|
47
|
+
Metrics/ModuleLength:
|
48
|
+
Max: 200
|
data/.solargraph.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
---
|
2
|
+
include:
|
3
|
+
- "**/*.rb"
|
4
|
+
exclude:
|
5
|
+
- spec/**/*
|
6
|
+
- test/**/*
|
7
|
+
- vendor/**/*
|
8
|
+
- ".bundle/**/*"
|
9
|
+
require: []
|
10
|
+
domains: []
|
11
|
+
reporters:
|
12
|
+
- rubocop
|
13
|
+
# - require_not_found
|
14
|
+
formatter:
|
15
|
+
rubocop:
|
16
|
+
cops: safe
|
17
|
+
except: []
|
18
|
+
only: []
|
19
|
+
extra_args: []
|
20
|
+
require_paths: []
|
21
|
+
plugins: []
|
22
|
+
max_files: 5000
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in ruby-spacy.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
8
|
+
gem "numpy"
|
9
|
+
gem "pycall"
|
10
|
+
gem "terminal-table"
|
11
11
|
|
12
12
|
group :development do
|
13
|
-
gem "
|
13
|
+
gem "github-markup"
|
14
14
|
gem "minitest", "~> 5.0"
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
15
|
+
gem "rake", "~> 13.0"
|
16
|
+
gem "redcarpet"
|
17
|
+
gem "yard"
|
18
18
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.4)
|
4
|
+
ruby-spacy (0.1.4.1)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -13,8 +13,8 @@ GEM
|
|
13
13
|
minitest (5.14.4)
|
14
14
|
numpy (0.4.0)
|
15
15
|
pycall (>= 1.2.0.beta1)
|
16
|
-
pycall (1.4.
|
17
|
-
rake (13.0.
|
16
|
+
pycall (1.4.1)
|
17
|
+
rake (13.0.6)
|
18
18
|
redcarpet (3.5.1)
|
19
19
|
terminal-table (3.0.1)
|
20
20
|
unicode-display_width (>= 1.1.1, < 3)
|
data/README.md
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
# ruby-spacy
|
2
|
-
|
3
|
-
⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
|
1
|
+
# 💎 ruby-spacy
|
4
2
|
|
5
3
|
## Overview
|
6
4
|
|
@@ -16,22 +14,22 @@
|
|
16
14
|
|
17
15
|
## Installation of prerequisites
|
18
16
|
|
19
|
-
Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.
|
17
|
+
**IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
|
20
18
|
|
21
19
|
```shell
|
22
|
-
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.
|
20
|
+
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
|
23
21
|
```
|
24
22
|
|
25
23
|
Don't forget to make it accessible from your working directory.
|
26
24
|
|
27
25
|
```shell
|
28
|
-
$ pyenv local 3.
|
26
|
+
$ pyenv local 3.10.6
|
29
27
|
```
|
30
28
|
|
31
29
|
Or alternatively:
|
32
30
|
|
33
31
|
```shell
|
34
|
-
$ pyenv global 3.
|
32
|
+
$ pyenv global 3.10.6
|
35
33
|
```
|
36
34
|
|
37
35
|
Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
|
@@ -448,32 +446,36 @@ france = nlp.get_lexeme("France")
|
|
448
446
|
|
449
447
|
query = tokyo.vector - japan.vector + france.vector
|
450
448
|
|
449
|
+
headings = ["rank", "text", "score"]
|
451
450
|
rows = []
|
452
451
|
|
453
452
|
results = nlp.most_similar(query, 10)
|
454
|
-
results.
|
455
|
-
|
453
|
+
results.each_with_index do |lexeme, i|
|
454
|
+
index = (i + 1).to_s
|
455
|
+
rows << [index, lexeme.text, lexeme.score]
|
456
456
|
end
|
457
457
|
|
458
|
-
headings = ["key", "text", "score"]
|
459
458
|
table = Terminal::Table.new rows: rows, headings: headings
|
460
459
|
puts table
|
461
460
|
```
|
462
461
|
|
463
462
|
Output:
|
464
463
|
|
465
|
-
|
|
466
|
-
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
464
|
+
| rank | text | score |
|
465
|
+
|:-----|:------------|:-------------------|
|
466
|
+
| 1 | FRANCE | 0.8346999883651733 |
|
467
|
+
| 2 | France | 0.8346999883651733 |
|
468
|
+
| 3 | france | 0.8346999883651733 |
|
469
|
+
| 4 | PARIS | 0.7703999876976013 |
|
470
|
+
| 5 | paris | 0.7703999876976013 |
|
471
|
+
| 6 | Paris | 0.7703999876976013 |
|
472
|
+
| 7 | TOULOUSE | 0.6381999850273132 |
|
473
|
+
| 8 | Toulouse | 0.6381999850273132 |
|
474
|
+
| 9 | toulouse | 0.6381999850273132 |
|
475
|
+
| 10 | marseille | 0.6370999813079834 |
|
476
|
+
|
477
|
+
|
478
|
+
|
477
479
|
|
478
480
|
|
479
481
|
### Word vector calculation (Japanese)
|
@@ -494,33 +496,33 @@ france = nlp.get_lexeme("フランス")
|
|
494
496
|
|
495
497
|
query = tokyo.vector - japan.vector + france.vector
|
496
498
|
|
499
|
+
headings = ["rank", "text", "score"]
|
497
500
|
rows = []
|
498
501
|
|
499
502
|
results = nlp.most_similar(query, 10)
|
500
|
-
results.
|
501
|
-
|
503
|
+
results.each_with_index do |lexeme, i|
|
504
|
+
index = (i + 1).to_s
|
505
|
+
rows << [index, lexeme.text, lexeme.score]
|
502
506
|
end
|
503
507
|
|
504
|
-
headings = ["key", "text", "score"]
|
505
508
|
table = Terminal::Table.new rows: rows, headings: headings
|
506
509
|
puts table
|
507
510
|
```
|
508
511
|
|
509
512
|
Output:
|
510
513
|
|
511
|
-
|
|
512
|
-
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
514
|
+
| rank | text | score |
|
515
|
+
|:-----|:---------------|:-------------------|
|
516
|
+
| 1 | パリ | 0.7376999855041504 |
|
517
|
+
| 2 | フランス | 0.7221999764442444 |
|
518
|
+
| 3 | 東京 | 0.6697999835014343 |
|
519
|
+
| 4 | ストラスブール | 0.631600022315979 |
|
520
|
+
| 5 | リヨン | 0.5939000248908997 |
|
521
|
+
| 6 | Paris | 0.574400007724762 |
|
522
|
+
| 7 | ベルギー | 0.5683000087738037 |
|
523
|
+
| 8 | ニース | 0.5679000020027161 |
|
524
|
+
| 9 | アルザス | 0.5644999742507935 |
|
525
|
+
| 10 | 南仏 | 0.5547999739646912 |
|
524
526
|
|
525
527
|
## Author
|
526
528
|
|
@@ -537,4 +539,3 @@ I would like to thank the following open source projects and their creators for
|
|
537
539
|
## License
|
538
540
|
|
539
541
|
This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
540
|
-
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("I love coffee")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape prefix suffix is_alpha is_digit]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |word|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text pos dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape is_alpha is_stop morphology]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,38 +11,39 @@ france = nlp.get_lexeme("France")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
16
|
-
results.
|
17
|
-
|
18
|
+
results.each_with_index do |lexeme, i|
|
19
|
+
index = (i + 1).to_s
|
20
|
+
rows << [index, lexeme.text, lexeme.score]
|
18
21
|
end
|
19
22
|
|
20
23
|
table = Terminal::Table.new rows: rows, headings: headings
|
21
24
|
puts table
|
22
25
|
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
# |
|
28
|
-
# |
|
29
|
-
# |
|
30
|
-
# |
|
31
|
-
# |
|
32
|
-
# |
|
33
|
-
# |
|
34
|
-
# |
|
35
|
-
# |
|
36
|
-
# |
|
37
|
-
# |
|
38
|
-
# |
|
39
|
-
# |
|
40
|
-
# |
|
41
|
-
# |
|
42
|
-
# |
|
43
|
-
# |
|
44
|
-
# |
|
45
|
-
# |
|
46
|
-
#
|
26
|
+
# +------+-------------+--------------------+
|
27
|
+
# | rank | text | score |
|
28
|
+
# +------+-------------+--------------------+
|
29
|
+
# | 1 | FRANCE | 0.8346999883651733 |
|
30
|
+
# | 2 | France | 0.8346999883651733 |
|
31
|
+
# | 3 | france | 0.8346999883651733 |
|
32
|
+
# | 4 | PARIS | 0.7703999876976013 |
|
33
|
+
# | 5 | paris | 0.7703999876976013 |
|
34
|
+
# | 6 | Paris | 0.7703999876976013 |
|
35
|
+
# | 7 | TOULOUSE | 0.6381999850273132 |
|
36
|
+
# | 8 | Toulouse | 0.6381999850273132 |
|
37
|
+
# | 9 | toulouse | 0.6381999850273132 |
|
38
|
+
# | 10 | marseille | 0.6370999813079834 |
|
39
|
+
# | 11 | Marseille | 0.6370999813079834 |
|
40
|
+
# | 12 | MARSEILLE | 0.6370999813079834 |
|
41
|
+
# | 13 | Bordeaux | 0.6096000075340271 |
|
42
|
+
# | 14 | BORDEAUX | 0.6096000075340271 |
|
43
|
+
# | 15 | bordeaux | 0.6096000075340271 |
|
44
|
+
# | 16 | prague | 0.6075000166893005 |
|
45
|
+
# | 17 | PRAGUE | 0.6075000166893005 |
|
46
|
+
# | 18 | Prague | 0.6075000166893005 |
|
47
|
+
# | 19 | SWITZERLAND | 0.6068000197410583 |
|
48
|
+
# | 20 | switzerland | 0.6068000197410583 |
|
49
|
+
# +------+-------------+--------------------+
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text start_char end_char label]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_lg")
|
4
6
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
5
7
|
doc2 = nlp.read("Fast food tastes very good.")
|
6
8
|
|
7
|
-
puts "Doc 1:
|
8
|
-
puts "Doc 2:
|
9
|
+
puts "Doc 1: #{doc1.text}"
|
10
|
+
puts "Doc 2: #{doc2.text}"
|
9
11
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
10
12
|
|
11
13
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
|
|
10
12
|
File.open(File.join("test_dep.svg"), "w") do |file|
|
11
13
|
file.write(dep_svg)
|
12
14
|
end
|
13
|
-
|
14
|
-
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
7
|
+
sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
6
8
|
doc = nlp.read(sentence)
|
7
9
|
|
8
|
-
ent_html = doc.displacy(style:
|
10
|
+
ent_html = doc.displacy(style: "ent")
|
9
11
|
|
10
12
|
File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
|
11
13
|
file.write(ent_html)
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
doc = nlp.read("I love coffee")
|
5
7
|
|
6
8
|
pp doc.vocab.strings["coffee"]
|
7
|
-
pp doc.vocab.strings[
|
9
|
+
pp doc.vocab.strings[3_197_928_453_018_144_401]
|
8
10
|
|
9
11
|
# 3197928453018144401
|
10
12
|
# "coffee"
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
doc = nlp.read("dog cat banana afskfsd")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text has_vector vector_norm is_oov]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "私の父は寿司が好きだ。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
# subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
subject = Spacy::Token.new(root.lefts[0])
|
21
23
|
|
22
|
-
puts "The root of the sentence is:
|
23
|
-
puts "The subject of the sentence is:
|
24
|
+
puts "The root of the sentence is: #{root.text}"
|
25
|
+
puts "The subject of the sentence is: #{subject.text}"
|
24
26
|
|
25
27
|
subject.subtree.each do |descendant|
|
26
28
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
|
6
|
-
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
8
|
+
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require("ruby-spacy")
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
|
|
7
9
|
nlp.add_pipe("merge_noun_chunks")
|
8
10
|
|
9
11
|
texts = [
|
10
|
-
|
11
|
-
|
12
|
+
"アメリカ合衆国の国土面積は日本の約25倍あります。",
|
13
|
+
"現在1ドルは日本円で110円です。"
|
12
14
|
]
|
13
15
|
|
14
16
|
texts.each do |text|
|
15
17
|
doc = nlp.read(text)
|
16
18
|
doc.each do |token|
|
17
|
-
if token.dep_ == "case"
|
18
|
-
puts token.head.text + " --> " + token.text
|
19
|
-
end
|
19
|
+
puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("私は論文を読んでいるところだった。")
|
7
9
|
|
8
|
-
headings = [
|
10
|
+
headings = %w[text lemma]
|
9
11
|
rows = []
|
10
12
|
|
11
13
|
doc.each do |token|
|