ruby-spacy 0.1.4.1 → 0.1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +48 -0
- data/.solargraph.yml +22 -0
- data/Gemfile +7 -7
- data/Gemfile.lock +2 -2
- data/README.md +7 -10
- data/examples/get_started/lexeme.rb +3 -1
- data/examples/get_started/linguistic_annotations.rb +3 -1
- data/examples/get_started/morphology.rb +3 -1
- data/examples/get_started/most_similar.rb +3 -1
- data/examples/get_started/named_entities.rb +4 -2
- data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
- data/examples/get_started/similarity.rb +4 -2
- data/examples/get_started/tokenization.rb +3 -1
- data/examples/get_started/visualizing_dependencies.rb +2 -2
- data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
- data/examples/get_started/visualizing_named_entities.rb +4 -2
- data/examples/get_started/vocab.rb +3 -1
- data/examples/get_started/word_vectors.rb +3 -1
- data/examples/japanese/ancestors.rb +6 -4
- data/examples/japanese/entity_annotations_and_labels.rb +4 -2
- data/examples/japanese/information_extraction.rb +6 -6
- data/examples/japanese/lemmatization.rb +3 -1
- data/examples/japanese/most_similar.rb +3 -1
- data/examples/japanese/named_entity_recognition.rb +3 -2
- data/examples/japanese/navigating_parse_tree.rb +19 -17
- data/examples/japanese/noun_chunks.rb +2 -0
- data/examples/japanese/pos_tagging.rb +3 -1
- data/examples/japanese/sentence_segmentation.rb +3 -2
- data/examples/japanese/similarity.rb +2 -0
- data/examples/japanese/tokenization.rb +2 -0
- data/examples/japanese/visualizing_dependencies.rb +3 -1
- data/examples/japanese/visualizing_named_entities.rb +4 -2
- data/examples/linguistic_features/ancestors.rb +7 -5
- data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
- data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
- data/examples/linguistic_features/information_extraction.rb +9 -9
- data/examples/linguistic_features/iterating_children.rb +6 -8
- data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
- data/examples/linguistic_features/lemmatization.rb +3 -1
- data/examples/linguistic_features/named_entity_recognition.rb +3 -1
- data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
- data/examples/linguistic_features/noun_chunks.rb +3 -1
- data/examples/linguistic_features/pos_tagging.rb +3 -1
- data/examples/linguistic_features/retokenize_1.rb +2 -0
- data/examples/linguistic_features/retokenize_2.rb +4 -2
- data/examples/linguistic_features/rule_based_morphology.rb +4 -2
- data/examples/linguistic_features/sentence_segmentation.rb +3 -2
- data/examples/linguistic_features/similarity.rb +4 -2
- data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
- data/examples/linguistic_features/similarity_between_spans.rb +7 -5
- data/examples/linguistic_features/tokenization.rb +3 -2
- data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
- data/examples/rule_based_matching/matcher.rb +4 -2
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +139 -141
- data/ruby-spacy.gemspec +15 -17
- data/tags +132 -0
- metadata +69 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cd52dfe6ab652bcefacd7401deef42ccccb5e711d418ca127776e66673b87f0
|
4
|
+
data.tar.gz: 5b2bd6ac16341c09e53673a31b60cb31a9c07d89344d35b7a7d9c01fe629881a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3af0557f6a33c0a4bfbf6a65e1e8922e14dc3f9df70fbc7bbe271212134b3d8b27e908aa79fd172fedf9e8daa4b39c6d1967eb1e2972d186c9ddb0a0bd6685c5
|
7
|
+
data.tar.gz: c893e49c75fb0ddb861c052ca5415df0235ef6d1d15960f6b2e5c4b815f1f6018607a8fff6b56bd1cbf5514c9762f8e39c9ce731f8ae713d148eb17eb7d9531b
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: disable
|
3
|
+
SuggestExtensions: false
|
4
|
+
TargetRubyVersion: 2.6
|
5
|
+
|
6
|
+
Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
9
|
+
Naming/VariableNumber:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
Naming/FileName:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/StringLiterals:
|
16
|
+
Enabled: true
|
17
|
+
EnforcedStyle: double_quotes
|
18
|
+
|
19
|
+
Style/StringLiteralsInInterpolation:
|
20
|
+
Enabled: true
|
21
|
+
EnforcedStyle: double_quotes
|
22
|
+
|
23
|
+
Layout/LineLength:
|
24
|
+
Max: 400
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 80
|
28
|
+
|
29
|
+
Metrics/BlockLength:
|
30
|
+
Max: 60
|
31
|
+
|
32
|
+
Metrics/AbcSize:
|
33
|
+
Max: 60
|
34
|
+
|
35
|
+
Metrics/PerceivedComplexity:
|
36
|
+
Max: 10
|
37
|
+
|
38
|
+
Metrics/ClassLength:
|
39
|
+
Max: 400
|
40
|
+
|
41
|
+
Metrics/CyclomaticComplexity:
|
42
|
+
Max: 20
|
43
|
+
|
44
|
+
Metrics/ParameterLists:
|
45
|
+
Max: 8
|
46
|
+
|
47
|
+
Metrics/ModuleLength:
|
48
|
+
Max: 200
|
data/.solargraph.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
---
|
2
|
+
include:
|
3
|
+
- "**/*.rb"
|
4
|
+
exclude:
|
5
|
+
- spec/**/*
|
6
|
+
- test/**/*
|
7
|
+
- vendor/**/*
|
8
|
+
- ".bundle/**/*"
|
9
|
+
require: []
|
10
|
+
domains: []
|
11
|
+
reporters:
|
12
|
+
- rubocop
|
13
|
+
# - require_not_found
|
14
|
+
formatter:
|
15
|
+
rubocop:
|
16
|
+
cops: safe
|
17
|
+
except: []
|
18
|
+
only: []
|
19
|
+
extra_args: []
|
20
|
+
require_paths: []
|
21
|
+
plugins: []
|
22
|
+
max_files: 5000
|
data/Gemfile
CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in ruby-spacy.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
8
|
+
gem "numpy"
|
9
|
+
gem "pycall"
|
10
|
+
gem "terminal-table"
|
11
11
|
|
12
12
|
group :development do
|
13
|
-
gem "
|
13
|
+
gem "github-markup"
|
14
14
|
gem "minitest", "~> 5.0"
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
15
|
+
gem "rake", "~> 13.0"
|
16
|
+
gem "redcarpet"
|
17
|
+
gem "yard"
|
18
18
|
end
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
# ruby-spacy
|
2
|
-
|
3
|
-
⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
|
1
|
+
# 💎 ruby-spacy
|
4
2
|
|
5
3
|
## Overview
|
6
4
|
|
@@ -16,22 +14,22 @@
|
|
16
14
|
|
17
15
|
## Installation of prerequisites
|
18
16
|
|
19
|
-
Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.
|
17
|
+
**IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
|
20
18
|
|
21
19
|
```shell
|
22
|
-
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.
|
20
|
+
$ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
|
23
21
|
```
|
24
22
|
|
25
23
|
Don't forget to make it accessible from your working directory.
|
26
24
|
|
27
25
|
```shell
|
28
|
-
$ pyenv local 3.
|
26
|
+
$ pyenv local 3.10.6
|
29
27
|
```
|
30
28
|
|
31
29
|
Or alternatively:
|
32
30
|
|
33
31
|
```shell
|
34
|
-
$ pyenv global 3.
|
32
|
+
$ pyenv global 3.10.6
|
35
33
|
```
|
36
34
|
|
37
35
|
Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
|
@@ -451,7 +449,7 @@ query = tokyo.vector - japan.vector + france.vector
|
|
451
449
|
headings = ["rank", "text", "score"]
|
452
450
|
rows = []
|
453
451
|
|
454
|
-
results = nlp.most_similar(query,
|
452
|
+
results = nlp.most_similar(query, 10)
|
455
453
|
results.each_with_index do |lexeme, i|
|
456
454
|
index = (i + 1).to_s
|
457
455
|
rows << [index, lexeme.text, lexeme.score]
|
@@ -501,7 +499,7 @@ query = tokyo.vector - japan.vector + france.vector
|
|
501
499
|
headings = ["rank", "text", "score"]
|
502
500
|
rows = []
|
503
501
|
|
504
|
-
results = nlp.most_similar(query,
|
502
|
+
results = nlp.most_similar(query, 10)
|
505
503
|
results.each_with_index do |lexeme, i|
|
506
504
|
index = (i + 1).to_s
|
507
505
|
rows << [index, lexeme.text, lexeme.score]
|
@@ -541,4 +539,3 @@ I would like to thank the following open source projects and their creators for
|
|
541
539
|
## License
|
542
540
|
|
543
541
|
This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
544
|
-
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("I love coffee")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape prefix suffix is_alpha is_digit]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |word|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text pos dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text shape is_alpha is_stop morphology]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("France")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
|
-
doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
|
+
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text start_char end_char label]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.ents.each do |ent|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_sm")
|
5
7
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_lg")
|
4
6
|
doc1 = nlp.read("I like salty fries and hamburgers.")
|
5
7
|
doc2 = nlp.read("Fast food tastes very good.")
|
6
8
|
|
7
|
-
puts "Doc 1:
|
8
|
-
puts "Doc 2:
|
9
|
+
puts "Doc 1: #{doc1.text}"
|
10
|
+
puts "Doc 2: #{doc2.text}"
|
9
11
|
puts "Similarity: #{doc1.similarity(doc2)}"
|
10
12
|
|
11
13
|
# Doc 1: I like salty fries and hamburgers.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
|
7
9
|
|
8
|
-
headings = [1,2,3,4,5,6,7,8,9,10,11]
|
10
|
+
headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
9
11
|
row = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
|
|
10
12
|
File.open(File.join("test_dep.svg"), "w") do |file|
|
11
13
|
file.write(dep_svg)
|
12
14
|
end
|
13
|
-
|
14
|
-
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
|
5
|
-
sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
7
|
+
sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
|
6
8
|
doc = nlp.read(sentence)
|
7
9
|
|
8
|
-
ent_html = doc.displacy(style:
|
10
|
+
ent_html = doc.displacy(style: "ent")
|
9
11
|
|
10
12
|
File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
|
11
13
|
file.write(ent_html)
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("en_core_web_sm")
|
4
6
|
doc = nlp.read("I love coffee")
|
5
7
|
|
6
8
|
pp doc.vocab.strings["coffee"]
|
7
|
-
pp doc.vocab.strings[
|
9
|
+
pp doc.vocab.strings[3_197_928_453_018_144_401]
|
8
10
|
|
9
11
|
# 3197928453018144401
|
10
12
|
# "coffee"
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("en_core_web_lg")
|
5
7
|
doc = nlp.read("dog cat banana afskfsd")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text has_vector vector_norm is_oov]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "私の父は寿司が好きだ。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text dep n_lefts n_rights ancestors]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
root = doc.tokens.select do |t|
|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
|
|
14
16
|
t.i == t.head.i
|
15
17
|
end.first
|
16
18
|
|
17
|
-
puts "The sentence: "
|
19
|
+
puts "The sentence: #{sentence}"
|
18
20
|
|
19
21
|
# subject = Spacy::Token.new(root.lefts[0])
|
20
22
|
subject = Spacy::Token.new(root.lefts[0])
|
21
23
|
|
22
|
-
puts "The root of the sentence is:
|
23
|
-
puts "The subject of the sentence is:
|
24
|
+
puts "The root of the sentence is: #{root.text}"
|
25
|
+
puts "The subject of the sentence is: #{subject.text}"
|
24
26
|
|
25
27
|
subject.subtree.each do |descendant|
|
26
28
|
rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
|
6
|
-
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
8
|
+
sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text ent_iob ent_iob_ ent_type_]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.each do |ent|
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require("ruby-spacy")
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
|
|
7
9
|
nlp.add_pipe("merge_noun_chunks")
|
8
10
|
|
9
11
|
texts = [
|
10
|
-
|
11
|
-
|
12
|
+
"アメリカ合衆国の国土面積は日本の約25倍あります。",
|
13
|
+
"現在1ドルは日本円で110円です。"
|
12
14
|
]
|
13
15
|
|
14
16
|
texts.each do |text|
|
15
17
|
doc = nlp.read(text)
|
16
18
|
doc.each do |token|
|
17
|
-
if token.dep_ == "case"
|
18
|
-
puts token.head.text + " --> " + token.text
|
19
|
-
end
|
19
|
+
puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
5
7
|
|
6
8
|
doc = nlp.read("私は論文を読んでいるところだった。")
|
7
9
|
|
8
|
-
headings = [
|
10
|
+
headings = %w[text lemma]
|
9
11
|
rows = []
|
10
12
|
|
11
13
|
doc.each do |token|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("フランス")
|
|
9
11
|
|
10
12
|
query = tokyo.vector - japan.vector + france.vector
|
11
13
|
|
12
|
-
headings = [
|
14
|
+
headings = %w[rank text score]
|
13
15
|
rows = []
|
14
16
|
|
15
17
|
results = nlp.most_similar(query, 20)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
|
|
6
8
|
sentence = "任天堂は1983年にファミコンを14,800円で発売した。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
headings = [
|
11
|
+
headings = %w[text start end label]
|
10
12
|
rows = []
|
11
13
|
|
12
14
|
doc.ents.each do |ent|
|
@@ -24,4 +26,3 @@ puts table
|
|
24
26
|
# | ファミコン | 10 | 15 | PRODUCT |
|
25
27
|
# | 14,800円 | 16 | 23 | MONEY |
|
26
28
|
# +------------+-------+-----+---------+
|
27
|
-
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -15,20 +17,20 @@ end
|
|
15
17
|
table = Terminal::Table.new rows: rows, headings: headings
|
16
18
|
puts table
|
17
19
|
|
18
|
-
+------+----------+-----------+----------+------------------------+
|
19
|
-
| text | dep | head text | head pos | children |
|
20
|
-
+------+----------+-----------+----------+------------------------+
|
21
|
-
| 自動 | compound | 車 | 92 | |
|
22
|
-
| 運転 | compound | 車 | 92 | |
|
23
|
-
| 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
|
24
|
-
| は | case | 車 | 92 | |
|
25
|
-
| 保険 | compound | 責任 | 92 | |
|
26
|
-
| 責任 | obj | 転嫁 | 100 | 保険, を |
|
27
|
-
| を | case | 責任 | 92 | |
|
28
|
-
| 製造 | compound | 者 | 92 | |
|
29
|
-
| 者 | obl | 転嫁 | 100 | 製造, に |
|
30
|
-
| に | case | 者 | 92 | |
|
31
|
-
| 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
|
32
|
-
| する | aux | 転嫁 | 100 | |
|
33
|
-
| 。 | punct | 転嫁 | 100 | |
|
34
|
-
+------+----------+-----------+----------+------------------------+
|
20
|
+
# +------+----------+-----------+----------+------------------------+
|
21
|
+
# | text | dep | head text | head pos | children |
|
22
|
+
# +------+----------+-----------+----------+------------------------+
|
23
|
+
# | 自動 | compound | 車 | 92 | |
|
24
|
+
# | 運転 | compound | 車 | 92 | |
|
25
|
+
# | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
|
26
|
+
# | は | case | 車 | 92 | |
|
27
|
+
# | 保険 | compound | 責任 | 92 | |
|
28
|
+
# | 責任 | obj | 転嫁 | 100 | 保険, を |
|
29
|
+
# | を | case | 責任 | 92 | |
|
30
|
+
# | 製造 | compound | 者 | 92 | |
|
31
|
+
# | 者 | obl | 転嫁 | 100 | 製造, に |
|
32
|
+
# | に | case | 者 | 92 | |
|
33
|
+
# | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
|
34
|
+
# | する | aux | 転嫁 | 100 | |
|
35
|
+
# | 。 | punct | 転嫁 | 100 | |
|
36
|
+
# +------+----------+-----------+----------+------------------------+
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
|
6
8
|
|
7
|
-
headings = [
|
9
|
+
headings = %w[text lemma pos tag dep]
|
8
10
|
rows = []
|
9
11
|
|
10
12
|
doc.each do |token|
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
|
3
5
|
nlp = Spacy::Language.new("ja_core_news_sm")
|
4
6
|
|
5
7
|
doc = nlp.read("これは文です。今私は「これは文です」と言いました。")
|
6
8
|
|
7
|
-
|
8
|
-
puts "doc has annotation SENT_START: " + doc.has_annotation("SENT_START").to_s
|
9
|
+
puts "doc has annotation SENT_START: #{doc.has_annotation("SENT_START")}"
|
9
10
|
|
10
11
|
doc.sents.each do |sent|
|
11
12
|
puts sent.text
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
|
|
6
8
|
sentence = "自動運転車は保険責任を製造者に転嫁する。"
|
7
9
|
doc = nlp.read(sentence)
|
8
10
|
|
9
|
-
dep_svg = doc.displacy(style:
|
11
|
+
dep_svg = doc.displacy(style: "dep", compact: false)
|
10
12
|
|
11
13
|
File.open(File.join(File.dirname(__FILE__), "test_dep.svg"), "w") do |file|
|
12
14
|
file.write(dep_svg)
|
@@ -1,13 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "ruby-spacy"
|
2
4
|
require "terminal-table"
|
3
5
|
|
4
6
|
nlp = Spacy::Language.new("ja_core_news_lg")
|
5
7
|
|
6
|
-
sentence ="セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
|
8
|
+
sentence = "セバスチアン・スランが2007年にグーグルで自動運転車に取り組み始めたとき、社外の人間で彼のことを真剣に捉えている者はほとんどいなかった。"
|
7
9
|
|
8
10
|
doc = nlp.read(sentence)
|
9
11
|
|
10
|
-
ent_html = doc.displacy(style:
|
12
|
+
ent_html = doc.displacy(style: "ent")
|
11
13
|
|
12
14
|
File.open(File.join(File.dirname(__FILE__), "outputs/test_ent.html"), "w") do |file|
|
13
15
|
file.write(ent_html)
|