ruby-spacy 0.1.4 → 0.1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/CHANGELOG.md +5 -1
- data/Gemfile +7 -7
- data/Gemfile.lock +3 -3
- data/README.md +40 -39
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +30 -27
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +30 -27
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +142 -136
- data/ruby-spacy.gemspec +15 -17
- data/tags +132 -0
- metadata +69 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cd52dfe6ab652bcefacd7401deef42ccccb5e711d418ca127776e66673b87f0
|
4
|
+
data.tar.gz: 5b2bd6ac16341c09e53673a31b60cb31a9c07d89344d35b7a7d9c01fe629881a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3af0557f6a33c0a4bfbf6a65e1e8922e14dc3f9df70fbc7bbe271212134b3d8b27e908aa79fd172fedf9e8daa4b39c6d1967eb1e2972d186c9ddb0a0bd6685c5
|
7
|
+
data.tar.gz: c893e49c75fb0ddb861c052ca5415df0235ef6d1d15960f6b2e5c4b815f1f6018607a8fff6b56bd1cbf5514c9762f8e39c9ce731f8ae713d148eb17eb7d9531b
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: disable
|
3
|
+
SuggestExtensions: false
|
4
|
+
TargetRubyVersion: 2.6
|
5
|
+
|
6
|
+
Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
9
|
+
Naming/VariableNumber:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
Naming/FileName:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/StringLiterals:
|
16
|
+
Enabled: true
|
17
|
+
EnforcedStyle: double_quotes
|
18
|
+
|
19
|
+
Style/StringLiteralsInInterpolation:
|
20
|
+
Enabled: true
|
21
|
+
EnforcedStyle: double_quotes
|
22
|
+
|
23
|
+
Layout/LineLength:
|
24
|
+
Max: 400
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 80
|
28
|
+
|
29
|
+
Metrics/BlockLength:
|
30
|
+
Max: 60
|
31
|
+
|
32
|
+
Metrics/AbcSize:
|
33
|
+
Max: 60
|
34
|
+
|
35
|
+
Metrics/PerceivedComplexity:
|
36
|
+
Max: 10
|
37
|
+
|
38
|
+
Metrics/ClassLength:
|
39
|
+
Max: 400
|
40
|
+
|
41
|
+
Metrics/CyclomaticComplexity:
|
42
|
+
Max: 20
|
43
|
+
|
44
|
+
Metrics/ParameterLists:
|
45
|
+
Max: 8
|
46
|
+
|
47
|
+
Metrics/ModuleLength:
|
48
|
+
Max: 200
|
data/.solargraph.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
---
|
2
|
+
include:
|
3
|
+
- "**/*.rb"
|
4
|
+
exclude:
|
5
|
+
- spec/**/*
|
6
|
+
- test/**/*
|
7
|
+
- vendor/**/*
|
8
|
+
- ".bundle/**/*"
|
9
|
+
require: []
|
10
|
+
domains: []
|
11
|
+
reporters:
|
12
|
+
- rubocop
|
13
|
+
# - require_not_found
|
14
|
+
formatter:
|
15
|
+
rubocop:
|
16
|
+
cops: safe
|
17
|
+
except: []
|
18
|
+
only: []
|
19
|
+
extra_args: []
|
20
|
+
require_paths: []
|
21
|
+
plugins: []
|
22
|
+
max_files: 5000
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in ruby-spacy.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
8
|
+
gem "numpy"
|
9
|
+
gem "pycall"
|
10
|
+
gem "terminal-table"
|
11
11
|
|
12
12
|
group :development do
|
13
|
-
gem "
|
13
|
+
gem "github-markup"
|
14
14
|
gem "minitest", "~> 5.0"
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
15
|
+
gem "rake", "~> 13.0"
|
16
|
+
gem "redcarpet"
|
17
|
+
gem "yard"
|
18
18
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-spacy (0.1.4)
|
4
|
+
ruby-spacy (0.1.4.1)
|
5
5
|
numpy (~> 0.4.0)
|
6
6
|
pycall (~> 1.4.0)
|
7
7
|
terminal-table (~> 3.0.1)
|
@@ -13,8 +13,8 @@ GEM
|
|
13
13
|
minitest (5.14.4)
|
14
14
|
numpy (0.4.0)
|
15
15
|
pycall (>= 1.2.0.beta1)
|
16
|
-
pycall (1.4.
|
17
|
-
rake (13.0.
|
16
|
+
pycall (1.4.1)
|
17
|
+
rake (13.0.6)
|
18
18
|
redcarpet (3.5.1)
|
19
19
|
terminal-table (3.0.1)
|
20
20
|
unicode-display_width (>= 1.1.1, < 3)
|
data/README.md
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
# ruby-spacy
|
2
|
-
|
3
|
-
⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
|
1
|
+
# 💎 ruby-spacy
|
4
2
|
|
5
3
|
## Overview
|
6
4
|
|
@@ -16,22 +14,22 @@
|
|
16
14
|
|
17
15
|
## Installation of prerequisites
|
18
16
|
|
19
|
-
Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.
|
17
|
+
**IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
|
20
18
|
|
21
19
|
```shell
|
22
|
-
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.
|
20
|
+
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
|
23
21
|
```
|
24
22
|
|
25
23
|
Don't forget to make it accessible from your working directory.
|
26
24
|
|
27
25
|
```shell
|
28
|
-
$ pyenv local 3.
|
26
|
+
$ pyenv local 3.10.6
|
29
27
|
```
|
30
28
|
|
31
29
|
Or alternatively:
|
32
30
|
|
33
31
|
```shell
|
34
|
-
$ pyenv global 3.
|
32
|
+
$ pyenv global 3.10.6
|
35
33
|
```
|
36
34
|
|
37
35
|
Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
|
@@ -448,32 +446,36 @@ france = nlp.get_lexeme("France")
|
|
448
446
|
|
449
447
|
query = tokyo.vector - japan.vector + france.vector
|
450
448
|
|
449
|
+
headings = ["rank", "text", "score"]
|
451
450
|
rows = []
|
452
451
|
|
453
452
|
results = nlp.most_similar(query, 10)
|
454
|
-
results.
|
455
|
-
|
453
|
+
results.each_with_index do |lexeme, i|
|
454
|
+
index = (i + 1).to_s
|
455
|
+
rows << [index, lexeme.text, lexeme.score]
|
456
456
|
end
|
457
457
|
|
458
|
-
headings = ["key", "text", "score"]
|
459
458
|
table = Terminal::Table.new rows: rows, headings: headings
|
460
459
|
puts table
|
461
460
|
```
|
462
461
|
|
463
462
|
Output:
|
464
463
|
|
465
|
-
|
|
466
|
-
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
464
|
+
| rank | text | score |
|
465
|
+
|:-----|:------------|:-------------------|
|
466
|
+
| 1 | FRANCE | 0.8346999883651733 |
|
467
|
+
| 2 | France | 0.8346999883651733 |
|
468
|
+
| 3 | france | 0.8346999883651733 |
|
469
|
+
| 4 | PARIS | 0.7703999876976013 |
|
470
|
+
| 5 | paris | 0.7703999876976013 |
|
471
|
+
| 6 | Paris | 0.7703999876976013 |
|
472
|
+
| 7 | TOULOUSE | 0.6381999850273132 |
|
473
|
+
| 8 | Toulouse | 0.6381999850273132 |
|
474
|
+
| 9 | toulouse | 0.6381999850273132 |
|
475
|
+
| 10 | marseille | 0.6370999813079834 |
|
476
|
+
|
477
|
+
|
478
|
+
|
477
479
|
|
478
480
|
|
479
481
|
### Word vector calculation (Japanese)
|
@@ -494,33 +496,33 @@ france = nlp.get_lexeme("フランス")
|
|
494
496
|
|
495
497
|
query = tokyo.vector - japan.vector + france.vector
|
496
498
|
|
499
|
+
headings = ["rank", "text", "score"]
|
497
500
|
rows = []
|
498
501
|
|
499
502
|
results = nlp.most_similar(query, 10)
|
500
|
-
results.
|
501
|
-
|
503
|
+
results.each_with_index do |lexeme, i|
|
504
|
+
index = (i + 1).to_s
|
505
|
+
rows << [index, lexeme.text, lexeme.score]
|
502
506
|
end
|
503
507
|
|
504
|
-
headings = ["key", "text", "score"]
|
505
508
|
table = Terminal::Table.new rows: rows, headings: headings
|
506
509
|
puts table
|
507
510
|
```
|
508
511
|
|
509
512
|
Output:
|
510
513
|
|
511
|
-
|
|
512
|
-
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
514
|
+
| rank | text | score |
|
515
|
+
|:-----|:---------------|:-------------------|
|
516
|
+
| 1 | パリ | 0.7376999855041504 |
|
517
|
+
| 2 | フランス | 0.7221999764442444 |
|
518
|
+
| 3 | 東京 | 0.6697999835014343 |
|
519
|
+
| 4 | ストラスブール | 0.631600022315979 |
|
520
|
+
| 5 | リヨン | 0.5939000248908997 |
|
521
|
+
| 6 | Paris | 0.574400007724762 |
|
522
|
+
| 7 | ベルギー | 0.5683000087738037 |
|
523
|
+
| 8 | ニース | 0.5679000020027161 |
|
524
|
+
| 9 | アルザス | 0.5644999742507935 |
|
525
|
+
| 10 | 南仏 | 0.5547999739646912 |
|
524
526
|
|
525
527
|
## Author
|
526
528
|
|
@@ -537,4 +539,3 @@ I would like to thank the following open source projects and their creators for
|
|
537
539
|
## License
|
538
540
|
|
539
541
|
This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
540
|
-
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("I love coffee")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape prefix suffix is_alpha is_digit]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |word|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text pos dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape is_alpha is_stop morphology]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,38 +11,39 @@ france = nlp.get_lexeme("France")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
16
|
-
results.
|
17
|
-
|
18
|
+
results.each_with_index do |lexeme, i|
|
19
|
+
index = (i + 1).to_s
|
20
|
+
rows << [index, lexeme.text, lexeme.score]
|
18
21
|
end
|
19
22
|
|
20
23
|
table = Terminal::Table.new rows: rows, headings: headings
|
21
24
|
puts table
|
22
25
|
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
# |
|
28
|
-
# |
|
29
|
-
# |
|
30
|
-
# |
|
31
|
-
# |
|
32
|
-
# |
|
33
|
-
# |
|
34
|
-
# |
|
35
|
-
# |
|
36
|
-
# |
|
37
|
-
# |
|
38
|
-
# |
|
39
|
-
# |
|
40
|
-
# |
|
41
|
-
# |
|
42
|
-
# |
|
43
|
-
# |
|
44
|
-
# |
|
45
|
-
# |
|
46
|
-
#
|
26
|
+
# +------+-------------+--------------------+
|
27
|
+
# | rank | text | score |
|
28
|
+
# +------+-------------+--------------------+
|
29
|
+
# | 1 | FRANCE | 0.8346999883651733 |
|
30
|
+
# | 2 | France | 0.8346999883651733 |
|
31
|
+
# | 3 | france | 0.8346999883651733 |
|
32
|
+
# | 4 | PARIS | 0.7703999876976013 |
|
33
|
+
# | 5 | paris | 0.7703999876976013 |
|
34
|
+
# | 6 | Paris | 0.7703999876976013 |
|
35
|
+
# | 7 | TOULOUSE | 0.6381999850273132 |
|
36
|
+
# | 8 | Toulouse | 0.6381999850273132 |
|
37
|
+
# | 9 | toulouse | 0.6381999850273132 |
|
38
|
+
# | 10 | marseille | 0.6370999813079834 |
|
39
|
+
# | 11 | Marseille | 0.6370999813079834 |
|
40
|
+
# | 12 | MARSEILLE | 0.6370999813079834 |
|
41
|
+
# | 13 | Bordeaux | 0.6096000075340271 |
|
42
|
+
# | 14 | BORDEAUX | 0.6096000075340271 |
|
43
|
+
# | 15 | bordeaux | 0.6096000075340271 |
|
44
|
+
# | 16 | prague | 0.6075000166893005 |
|
45
|
+
# | 17 | PRAGUE | 0.6075000166893005 |
|
46
|
+
# | 18 | Prague | 0.6075000166893005 |
|
47
|
+
# | 19 | SWITZERLAND | 0.6068000197410583 |
|
48
|
+
# | 20 | switzerland | 0.6068000197410583 |
|
49
|
+
# +------+-------------+--------------------+
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text start_char end_char label]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_lg")
|
4
6
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
5
7
|
doc2 = nlp.read("Fast food tastes very good.")
|
6
8
|
|
7
|
-
puts "Doc 1:
|
8
|
-
puts "Doc 2:
|
9
|
+
puts "Doc 1: #{doc1.text}"
|
10
|
+
puts "Doc 2: #{doc2.text}"
|
9
11
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
10
12
|
|
11
13
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
|
|
10
12
|
File.open(File.join("test_dep.svg"), "w") do |file|
|
11
13
|
file.write(dep_svg)
|
12
14
|
end
|
13
|
-
|
14
|
-
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
7
|
+
sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
6
8
|
doc = nlp.read(sentence)
|
7
9
|
|
8
|
-
ent_html = doc.displacy(style:
|
10
|
+
ent_html = doc.displacy(style: "ent")
|
9
11
|
|
10
12
|
File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
|
11
13
|
file.write(ent_html)
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
doc = nlp.read("I love coffee")
|
5
7
|
|
6
8
|
pp doc.vocab.strings["coffee"]
|
7
|
-
pp doc.vocab.strings[
|
9
|
+
pp doc.vocab.strings[3_197_928_453_018_144_401]
|
8
10
|
|
9
11
|
# 3197928453018144401
|
10
12
|
# "coffee"
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
doc = nlp.read("dog cat banana afskfsd")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text has_vector vector_norm is_oov]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "私の父は寿司が好きだ。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
# subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
subject = Spacy::Token.new(root.lefts[0])
|
21
23
|
|
22
|
-
puts "The root of the sentence is:
|
23
|
-
puts "The subject of the sentence is:
|
24
|
+
puts "The root of the sentence is: #{root.text}"
|
25
|
+
puts "The subject of the sentence is: #{subject.text}"
|
24
26
|
|
25
27
|
subject.subtree.each do |descendant|
|
26
28
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
|
6
|
-
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
8
|
+
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require("ruby-spacy")
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
|
|
7
9
|
nlp.add_pipe("merge_noun_chunks")
|
8
10
|
|
9
11
|
texts = [
|
10
|
-
|
11
|
-
|
12
|
+
"アメリカ合衆国の国土面積は日本の約25倍あります。",
|
13
|
+
"現在1ドルは日本円で110円です。"
|
12
14
|
]
|
13
15
|
|
14
16
|
texts.each do |text|
|
15
17
|
doc = nlp.read(text)
|
16
18
|
doc.each do |token|
|
17
|
-
if token.dep_ == "case"
|
18
|
-
puts token.head.text + " --> " + token.text
|
19
|
-
end
|
19
|
+
puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("私は論文を読んでいるところだった。")
|
7
9
|
|
8
|
-
headings = [
|
10
|
+
headings = %w[text lemma]
|
9
11
|
rows = []
|
10
12
|
|
11
13
|
doc.each do |token|
|