ruby-spacy 0.1.4.1 → 0.1.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +48 -0
  4. data/.solargraph.yml +22 -0
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +88 -9
  7. data/README.md +7 -10
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +3 -1
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +3 -1
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +147 -142
  58. data/ruby-spacy.gemspec +15 -17
  59. metadata +68 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fc769c4257e78333c3d6dc114d76b39c31b57365d032d7b741358f34b37099e
4
- data.tar.gz: 281a9997a325d16819574c96a0696eeedb59af0709d8f25814e6fa0d39646757
3
+ metadata.gz: 06dd0ed2027c0d0c2e610141ef375d483734adfe7855a2306b9b23a00a743b73
4
+ data.tar.gz: fbe23e5e67a9502d2b6bd439608a6f1d43c82c0b05437386c7f65a69326b2cf0
5
5
  SHA512:
6
- metadata.gz: 8b387962ee82b60499208225ab7cfca631a55a6eb305212f3b14a2c802f67cfa685b23e762a3877e4ba5ae01308c4909eb26286bcde2fc9683dedeee9059db88
7
- data.tar.gz: d94e788a1458f6be22db486e43180f7cbcce516ad053baa8724ce6eacd7869c3123c9f292845fe7e400aa2115786617f5ad69bda5c71b403221c98c084dc9900
6
+ metadata.gz: da5fd99c782737cea2e1fa125b66de561522ac541e7a3c15f6ebbda7c6eae1e55925b043cd3421463d95c8dda450ed6d1c65704b88a92acff4b9c716d38a96d4
7
+ data.tar.gz: db33fe51cbe8d6613afaed4e648928b95e4d78481831c0f998cca443d181903af444719d90275035b1b582d4bd281b6dc3e8624e9177506b8ad2ef836b6d82e6
data/.gitignore CHANGED
@@ -56,3 +56,4 @@ build-iPhoneSimulator/
56
56
  # .rubocop-https?--*
57
57
 
58
58
  .DS_Store
59
+ tags
data/.rubocop.yml ADDED
@@ -0,0 +1,48 @@
1
+ AllCops:
2
+ NewCops: disable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 2.6
5
+
6
+ Documentation:
7
+ Enabled: false
8
+
9
+ Naming/VariableNumber:
10
+ Enabled: false
11
+
12
+ Naming/FileName:
13
+ Enabled: false
14
+
15
+ Style/StringLiterals:
16
+ Enabled: true
17
+ EnforcedStyle: double_quotes
18
+
19
+ Style/StringLiteralsInInterpolation:
20
+ Enabled: true
21
+ EnforcedStyle: double_quotes
22
+
23
+ Layout/LineLength:
24
+ Max: 400
25
+
26
+ Metrics/MethodLength:
27
+ Max: 80
28
+
29
+ Metrics/BlockLength:
30
+ Max: 60
31
+
32
+ Metrics/AbcSize:
33
+ Max: 60
34
+
35
+ Metrics/PerceivedComplexity:
36
+ Max: 10
37
+
38
+ Metrics/ClassLength:
39
+ Max: 400
40
+
41
+ Metrics/CyclomaticComplexity:
42
+ Max: 20
43
+
44
+ Metrics/ParameterLists:
45
+ Max: 8
46
+
47
+ Metrics/ModuleLength:
48
+ Max: 200
data/.solargraph.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ include:
3
+ - "**/*.rb"
4
+ exclude:
5
+ - spec/**/*
6
+ - test/**/*
7
+ - vendor/**/*
8
+ - ".bundle/**/*"
9
+ require: []
10
+ domains: []
11
+ reporters:
12
+ - rubocop
13
+ # - require_not_found
14
+ formatter:
15
+ rubocop:
16
+ cops: safe
17
+ except: []
18
+ only: []
19
+ extra_args: []
20
+ require_paths: []
21
+ plugins: []
22
+ max_files: 5000
data/Gemfile CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in ruby-spacy.gemspec
6
6
  gemspec
7
7
 
8
- gem 'pycall'
9
- gem 'numpy'
10
- gem 'terminal-table'
8
+ gem "numpy"
9
+ gem "pycall"
10
+ gem "terminal-table"
11
11
 
12
12
  group :development do
13
- gem "rake", "~> 13.0"
13
+ gem "github-markup"
14
14
  gem "minitest", "~> 5.0"
15
- gem 'yard'
16
- gem 'redcarpet'
17
- gem 'github-markup'
15
+ gem "rake", "~> 13.0"
16
+ gem "redcarpet"
17
+ gem "yard"
18
18
  end
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-spacy (0.1.4.1)
4
+ ruby-spacy (0.1.5.0)
5
5
  numpy (~> 0.4.0)
6
6
  pycall (~> 1.4.0)
7
7
  terminal-table (~> 3.0.1)
@@ -9,33 +9,112 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- github-markup (4.0.0)
13
- minitest (5.14.4)
12
+ ast (2.4.2)
13
+ backport (1.2.0)
14
+ benchmark (0.2.1)
15
+ diff-lcs (1.5.0)
16
+ e2mmap (0.1.0)
17
+ github-markup (4.0.1)
18
+ jaro_winkler (1.5.4)
19
+ json (2.6.3)
20
+ kramdown (2.4.0)
21
+ rexml
22
+ kramdown-parser-gfm (1.1.0)
23
+ kramdown (~> 2.0)
24
+ mini_portile2 (2.8.1)
25
+ minitest (5.17.0)
26
+ nokogiri (1.14.0)
27
+ mini_portile2 (~> 2.8.0)
28
+ racc (~> 1.4)
29
+ nokogiri (1.14.0-arm64-darwin)
30
+ racc (~> 1.4)
31
+ nokogiri (1.14.0-x86_64-darwin)
32
+ racc (~> 1.4)
33
+ nokogiri (1.14.0-x86_64-linux)
34
+ racc (~> 1.4)
14
35
  numpy (0.4.0)
15
36
  pycall (>= 1.2.0.beta1)
16
- pycall (1.4.0)
17
- rake (13.0.3)
37
+ parallel (1.22.1)
38
+ parser (3.2.0.0)
39
+ ast (~> 2.4.1)
40
+ pycall (1.4.2)
41
+ racc (1.6.2)
42
+ rainbow (3.1.1)
43
+ rake (13.0.6)
18
44
  redcarpet (3.5.1)
19
- terminal-table (3.0.1)
45
+ regexp_parser (2.6.2)
46
+ reverse_markdown (2.1.1)
47
+ nokogiri
48
+ rexml (3.2.5)
49
+ rspec (3.12.0)
50
+ rspec-core (~> 3.12.0)
51
+ rspec-expectations (~> 3.12.0)
52
+ rspec-mocks (~> 3.12.0)
53
+ rspec-core (3.12.0)
54
+ rspec-support (~> 3.12.0)
55
+ rspec-expectations (3.12.2)
56
+ diff-lcs (>= 1.2.0, < 2.0)
57
+ rspec-support (~> 3.12.0)
58
+ rspec-mocks (3.12.3)
59
+ diff-lcs (>= 1.2.0, < 2.0)
60
+ rspec-support (~> 3.12.0)
61
+ rspec-support (3.12.0)
62
+ rubocop (1.43.0)
63
+ json (~> 2.3)
64
+ parallel (~> 1.10)
65
+ parser (>= 3.2.0.0)
66
+ rainbow (>= 2.2.2, < 4.0)
67
+ regexp_parser (>= 1.8, < 3.0)
68
+ rexml (>= 3.2.5, < 4.0)
69
+ rubocop-ast (>= 1.24.1, < 2.0)
70
+ ruby-progressbar (~> 1.7)
71
+ unicode-display_width (>= 2.4.0, < 3.0)
72
+ rubocop-ast (1.24.1)
73
+ parser (>= 3.1.1.0)
74
+ ruby-progressbar (1.11.0)
75
+ solargraph (0.48.0)
76
+ backport (~> 1.2)
77
+ benchmark
78
+ bundler (>= 1.17.2)
79
+ diff-lcs (~> 1.4)
80
+ e2mmap
81
+ jaro_winkler (~> 1.5)
82
+ kramdown (~> 2.3)
83
+ kramdown-parser-gfm (~> 1.1)
84
+ parser (~> 3.0)
85
+ reverse_markdown (>= 1.0.5, < 3)
86
+ rubocop (>= 0.52)
87
+ thor (~> 1.0)
88
+ tilt (~> 2.0)
89
+ yard (~> 0.9, >= 0.9.24)
90
+ terminal-table (3.0.2)
20
91
  unicode-display_width (>= 1.1.1, < 3)
21
- unicode-display_width (2.0.0)
22
- yard (0.9.26)
92
+ thor (1.2.1)
93
+ tilt (2.0.11)
94
+ unicode-display_width (2.4.2)
95
+ webrick (1.7.0)
96
+ yard (0.9.28)
97
+ webrick (~> 1.7.0)
23
98
 
24
99
  PLATFORMS
25
100
  arm64-darwin-20
101
+ ruby
26
102
  x86_64-darwin-20
27
103
  x86_64-linux
28
104
 
29
105
  DEPENDENCIES
106
+ bundler
30
107
  github-markup
31
108
  minitest (~> 5.0)
32
109
  numpy
33
110
  pycall
34
111
  rake (~> 13.0)
35
112
  redcarpet
113
+ rspec
36
114
  ruby-spacy!
115
+ solargraph
37
116
  terminal-table
38
117
  yard
39
118
 
40
119
  BUNDLED WITH
41
- 2.2.21
120
+ 2.4.2
data/README.md CHANGED
@@ -1,6 +1,4 @@
1
- # ruby-spacy
2
-
3
- ⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
1
+ # 💎 ruby-spacy
4
2
 
5
3
  ## Overview
6
4
 
@@ -16,22 +14,22 @@
16
14
 
17
15
  ## Installation of prerequisites
18
16
 
19
- Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.8.5, for instance, using pyenv with `enable-shared` as follows:
17
+ **IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
20
18
 
21
19
  ```shell
22
- $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.8.5
20
+ $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
23
21
  ```
24
22
 
25
23
  Don't forget to make it accessible from your working directory.
26
24
 
27
25
  ```shell
28
- $ pyenv local 3.8.5
26
+ $ pyenv local 3.10.6
29
27
  ```
30
28
 
31
29
  Or alternatively:
32
30
 
33
31
  ```shell
34
- $ pyenv global 3.8.5
32
+ $ pyenv global 3.10.6
35
33
  ```
36
34
 
37
35
  Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
@@ -451,7 +449,7 @@ query = tokyo.vector - japan.vector + france.vector
451
449
  headings = ["rank", "text", "score"]
452
450
  rows = []
453
451
 
454
- results = nlp.most_similar(query, 20)
452
+ results = nlp.most_similar(query, 10)
455
453
  results.each_with_index do |lexeme, i|
456
454
  index = (i + 1).to_s
457
455
  rows << [index, lexeme.text, lexeme.score]
@@ -501,7 +499,7 @@ query = tokyo.vector - japan.vector + france.vector
501
499
  headings = ["rank", "text", "score"]
502
500
  rows = []
503
501
 
504
- results = nlp.most_similar(query, 20)
502
+ results = nlp.most_similar(query, 10)
505
503
  results.each_with_index do |lexeme, i|
506
504
  index = (i + 1).to_s
507
505
  rows << [index, lexeme.text, lexeme.score]
@@ -541,4 +539,3 @@ I would like to thank the following open source projects and their creators for
541
539
  ## License
542
540
 
543
541
  This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
544
-
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("I love coffee")
6
8
 
7
- headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
9
+ headings = %w[text shape prefix suffix is_alpha is_digit]
8
10
  rows = []
9
11
 
10
12
  doc.each do |word|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "pos", "dep"]
9
+ headings = %w[text pos dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
9
+ headings = %w[text shape is_alpha is_stop morphology]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("France")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["rank", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
- doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "start_char", "end_char", "label"]
9
+ headings = %w[text start_char end_char label]
8
10
  rows = []
9
11
 
10
12
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep"]
9
+ headings = %w[text lemma pos tag dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_lg")
4
6
  doc1 = nlp.read("I like salty fries and hamburgers.")
5
7
  doc2 = nlp.read("Fast food tastes very good.")
6
8
 
7
- puts "Doc 1: " + doc1.text
8
- puts "Doc 2: " + doc2.text
9
+ puts "Doc 1: #{doc1.text}"
10
+ puts "Doc 2: #{doc2.text}"
9
11
  puts "Similarity: #{doc1.similarity(doc2)}"
10
12
 
11
13
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
10
12
  File.open(File.join("test_dep.svg"), "w") do |file|
11
13
  file.write(dep_svg)
12
14
  end
13
-
14
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
7
+ sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
6
8
  doc = nlp.read(sentence)
7
9
 
8
- ent_html = doc.displacy(style: 'ent')
10
+ ent_html = doc.displacy(style: "ent")
9
11
 
10
12
  File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
11
13
  file.write(ent_html)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
  doc = nlp.read("I love coffee")
5
7
 
6
8
  pp doc.vocab.strings["coffee"]
7
- pp doc.vocab.strings[3197928453018144401]
9
+ pp doc.vocab.strings[3_197_928_453_018_144_401]
8
10
 
9
11
  # 3197928453018144401
10
12
  # "coffee"
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc = nlp.read("dog cat banana afskfsd")
6
8
 
7
- headings = ["text", "has_vector", "vector_norm", "is_oov"]
9
+ headings = %w[text has_vector vector_norm is_oov]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "私の父は寿司が好きだ。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  # subject = Spacy::Token.new(root.lefts[0])
20
22
  subject = Spacy::Token.new(root.lefts[0])
21
23
 
22
- puts "The root of the sentence is: " + root.text
23
- puts "The subject of the sentence is: " + subject.text
24
+ puts "The root of the sentence is: #{root.text}"
25
+ puts "The subject of the sentence is: #{subject.text}"
24
26
 
25
27
  subject.subtree.each do |descendant|
26
28
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
5
7
 
6
- sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
8
+ sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,4 +1,6 @@
1
- require( "ruby-spacy")
1
+ # frozen_string_literal: true
2
+
3
+ require("ruby-spacy")
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
7
9
  nlp.add_pipe("merge_noun_chunks")
8
10
 
9
11
  texts = [
10
- "アメリカ合衆国の国土面積は日本の約25倍あります。",
11
- "現在1ドルは日本円で110円です。",
12
+ "アメリカ合衆国の国土面積は日本の約25倍あります。",
13
+ "現在1ドルは日本円で110円です。"
12
14
  ]
13
15
 
14
16
  texts.each do |text|
15
17
  doc = nlp.read(text)
16
18
  doc.each do |token|
17
- if token.dep_ == "case"
18
- puts token.head.text + " --> " + token.text
19
- end
19
+ puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
20
20
  end
21
21
  end
22
22
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
5
7
 
6
8
  doc = nlp.read("私は論文を読んでいるところだった。")
7
9
 
8
- headings = ["text", "lemma"]
10
+ headings = %w[text lemma]
9
11
  rows = []
10
12
 
11
13
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("フランス")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["rank", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "任天堂は1983年にファミコンを14,800円で発売した。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "start", "end", "label"]
11
+ headings = %w[text start end label]
10
12
  rows = []
11
13
 
12
14
  doc.ents.each do |ent|
@@ -24,4 +26,3 @@ puts table
24
26
  # | ファミコン | 10 | 15 | PRODUCT |
25
27
  # | 14,800円 | 16 | 23 | MONEY |
26
28
  # +------------+-------+-----+---------+
27
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -15,20 +17,20 @@ end
15
17
  table = Terminal::Table.new rows: rows, headings: headings
16
18
  puts table
17
19
 
18
- +------+----------+-----------+----------+------------------------+
19
- | text | dep | head text | head pos | children |
20
- +------+----------+-----------+----------+------------------------+
21
- | 自動 | compound | 車 | 92 | |
22
- | 運転 | compound | 車 | 92 | |
23
- | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
24
- | は | case | 車 | 92 | |
25
- | 保険 | compound | 責任 | 92 | |
26
- | 責任 | obj | 転嫁 | 100 | 保険, を |
27
- | を | case | 責任 | 92 | |
28
- | 製造 | compound | 者 | 92 | |
29
- | 者 | obl | 転嫁 | 100 | 製造, に |
30
- | に | case | 者 | 92 | |
31
- | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
32
- | する | aux | 転嫁 | 100 | |
33
- | 。 | punct | 転嫁 | 100 | |
34
- +------+----------+-----------+----------+------------------------+
20
+ # +------+----------+-----------+----------+------------------------+
21
+ # | text | dep | head text | head pos | children |
22
+ # +------+----------+-----------+----------+------------------------+
23
+ # | 自動 | compound | 車 | 92 | |
24
+ # | 運転 | compound | 車 | 92 | |
25
+ # | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
26
+ # | は | case | 車 | 92 | |
27
+ # | 保険 | compound | 責任 | 92 | |
28
+ # | 責任 | obj | 転嫁 | 100 | 保険, を |
29
+ # | を | case | 責任 | 92 | |
30
+ # | 製造 | compound | 者 | 92 | |
31
+ # | 者 | obl | 転嫁 | 100 | 製造, に |
32
+ # | に | case | 者 | 92 | |
33
+ # | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
34
+ # | する | aux | 転嫁 | 100 | |
35
+ # | 。 | punct | 転嫁 | 100 | |
36
+ # +------+----------+-----------+----------+------------------------+