ruby-spacy 0.1.4.1 → 0.1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +48 -0
  4. data/.solargraph.yml +22 -0
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +88 -9
  7. data/README.md +7 -10
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +3 -1
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +3 -1
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +147 -142
  58. data/ruby-spacy.gemspec +15 -17
  59. metadata +68 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fc769c4257e78333c3d6dc114d76b39c31b57365d032d7b741358f34b37099e
4
- data.tar.gz: 281a9997a325d16819574c96a0696eeedb59af0709d8f25814e6fa0d39646757
3
+ metadata.gz: 06dd0ed2027c0d0c2e610141ef375d483734adfe7855a2306b9b23a00a743b73
4
+ data.tar.gz: fbe23e5e67a9502d2b6bd439608a6f1d43c82c0b05437386c7f65a69326b2cf0
5
5
  SHA512:
6
- metadata.gz: 8b387962ee82b60499208225ab7cfca631a55a6eb305212f3b14a2c802f67cfa685b23e762a3877e4ba5ae01308c4909eb26286bcde2fc9683dedeee9059db88
7
- data.tar.gz: d94e788a1458f6be22db486e43180f7cbcce516ad053baa8724ce6eacd7869c3123c9f292845fe7e400aa2115786617f5ad69bda5c71b403221c98c084dc9900
6
+ metadata.gz: da5fd99c782737cea2e1fa125b66de561522ac541e7a3c15f6ebbda7c6eae1e55925b043cd3421463d95c8dda450ed6d1c65704b88a92acff4b9c716d38a96d4
7
+ data.tar.gz: db33fe51cbe8d6613afaed4e648928b95e4d78481831c0f998cca443d181903af444719d90275035b1b582d4bd281b6dc3e8624e9177506b8ad2ef836b6d82e6
data/.gitignore CHANGED
@@ -56,3 +56,4 @@ build-iPhoneSimulator/
56
56
  # .rubocop-https?--*
57
57
 
58
58
  .DS_Store
59
+ tags
data/.rubocop.yml ADDED
@@ -0,0 +1,48 @@
1
+ AllCops:
2
+ NewCops: disable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 2.6
5
+
6
+ Documentation:
7
+ Enabled: false
8
+
9
+ Naming/VariableNumber:
10
+ Enabled: false
11
+
12
+ Naming/FileName:
13
+ Enabled: false
14
+
15
+ Style/StringLiterals:
16
+ Enabled: true
17
+ EnforcedStyle: double_quotes
18
+
19
+ Style/StringLiteralsInInterpolation:
20
+ Enabled: true
21
+ EnforcedStyle: double_quotes
22
+
23
+ Layout/LineLength:
24
+ Max: 400
25
+
26
+ Metrics/MethodLength:
27
+ Max: 80
28
+
29
+ Metrics/BlockLength:
30
+ Max: 60
31
+
32
+ Metrics/AbcSize:
33
+ Max: 60
34
+
35
+ Metrics/PerceivedComplexity:
36
+ Max: 10
37
+
38
+ Metrics/ClassLength:
39
+ Max: 400
40
+
41
+ Metrics/CyclomaticComplexity:
42
+ Max: 20
43
+
44
+ Metrics/ParameterLists:
45
+ Max: 8
46
+
47
+ Metrics/ModuleLength:
48
+ Max: 200
data/.solargraph.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ include:
3
+ - "**/*.rb"
4
+ exclude:
5
+ - spec/**/*
6
+ - test/**/*
7
+ - vendor/**/*
8
+ - ".bundle/**/*"
9
+ require: []
10
+ domains: []
11
+ reporters:
12
+ - rubocop
13
+ # - require_not_found
14
+ formatter:
15
+ rubocop:
16
+ cops: safe
17
+ except: []
18
+ only: []
19
+ extra_args: []
20
+ require_paths: []
21
+ plugins: []
22
+ max_files: 5000
data/Gemfile CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in ruby-spacy.gemspec
6
6
  gemspec
7
7
 
8
- gem 'pycall'
9
- gem 'numpy'
10
- gem 'terminal-table'
8
+ gem "numpy"
9
+ gem "pycall"
10
+ gem "terminal-table"
11
11
 
12
12
  group :development do
13
- gem "rake", "~> 13.0"
13
+ gem "github-markup"
14
14
  gem "minitest", "~> 5.0"
15
- gem 'yard'
16
- gem 'redcarpet'
17
- gem 'github-markup'
15
+ gem "rake", "~> 13.0"
16
+ gem "redcarpet"
17
+ gem "yard"
18
18
  end
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-spacy (0.1.4.1)
4
+ ruby-spacy (0.1.5.0)
5
5
  numpy (~> 0.4.0)
6
6
  pycall (~> 1.4.0)
7
7
  terminal-table (~> 3.0.1)
@@ -9,33 +9,112 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- github-markup (4.0.0)
13
- minitest (5.14.4)
12
+ ast (2.4.2)
13
+ backport (1.2.0)
14
+ benchmark (0.2.1)
15
+ diff-lcs (1.5.0)
16
+ e2mmap (0.1.0)
17
+ github-markup (4.0.1)
18
+ jaro_winkler (1.5.4)
19
+ json (2.6.3)
20
+ kramdown (2.4.0)
21
+ rexml
22
+ kramdown-parser-gfm (1.1.0)
23
+ kramdown (~> 2.0)
24
+ mini_portile2 (2.8.1)
25
+ minitest (5.17.0)
26
+ nokogiri (1.14.0)
27
+ mini_portile2 (~> 2.8.0)
28
+ racc (~> 1.4)
29
+ nokogiri (1.14.0-arm64-darwin)
30
+ racc (~> 1.4)
31
+ nokogiri (1.14.0-x86_64-darwin)
32
+ racc (~> 1.4)
33
+ nokogiri (1.14.0-x86_64-linux)
34
+ racc (~> 1.4)
14
35
  numpy (0.4.0)
15
36
  pycall (>= 1.2.0.beta1)
16
- pycall (1.4.0)
17
- rake (13.0.3)
37
+ parallel (1.22.1)
38
+ parser (3.2.0.0)
39
+ ast (~> 2.4.1)
40
+ pycall (1.4.2)
41
+ racc (1.6.2)
42
+ rainbow (3.1.1)
43
+ rake (13.0.6)
18
44
  redcarpet (3.5.1)
19
- terminal-table (3.0.1)
45
+ regexp_parser (2.6.2)
46
+ reverse_markdown (2.1.1)
47
+ nokogiri
48
+ rexml (3.2.5)
49
+ rspec (3.12.0)
50
+ rspec-core (~> 3.12.0)
51
+ rspec-expectations (~> 3.12.0)
52
+ rspec-mocks (~> 3.12.0)
53
+ rspec-core (3.12.0)
54
+ rspec-support (~> 3.12.0)
55
+ rspec-expectations (3.12.2)
56
+ diff-lcs (>= 1.2.0, < 2.0)
57
+ rspec-support (~> 3.12.0)
58
+ rspec-mocks (3.12.3)
59
+ diff-lcs (>= 1.2.0, < 2.0)
60
+ rspec-support (~> 3.12.0)
61
+ rspec-support (3.12.0)
62
+ rubocop (1.43.0)
63
+ json (~> 2.3)
64
+ parallel (~> 1.10)
65
+ parser (>= 3.2.0.0)
66
+ rainbow (>= 2.2.2, < 4.0)
67
+ regexp_parser (>= 1.8, < 3.0)
68
+ rexml (>= 3.2.5, < 4.0)
69
+ rubocop-ast (>= 1.24.1, < 2.0)
70
+ ruby-progressbar (~> 1.7)
71
+ unicode-display_width (>= 2.4.0, < 3.0)
72
+ rubocop-ast (1.24.1)
73
+ parser (>= 3.1.1.0)
74
+ ruby-progressbar (1.11.0)
75
+ solargraph (0.48.0)
76
+ backport (~> 1.2)
77
+ benchmark
78
+ bundler (>= 1.17.2)
79
+ diff-lcs (~> 1.4)
80
+ e2mmap
81
+ jaro_winkler (~> 1.5)
82
+ kramdown (~> 2.3)
83
+ kramdown-parser-gfm (~> 1.1)
84
+ parser (~> 3.0)
85
+ reverse_markdown (>= 1.0.5, < 3)
86
+ rubocop (>= 0.52)
87
+ thor (~> 1.0)
88
+ tilt (~> 2.0)
89
+ yard (~> 0.9, >= 0.9.24)
90
+ terminal-table (3.0.2)
20
91
  unicode-display_width (>= 1.1.1, < 3)
21
- unicode-display_width (2.0.0)
22
- yard (0.9.26)
92
+ thor (1.2.1)
93
+ tilt (2.0.11)
94
+ unicode-display_width (2.4.2)
95
+ webrick (1.7.0)
96
+ yard (0.9.28)
97
+ webrick (~> 1.7.0)
23
98
 
24
99
  PLATFORMS
25
100
  arm64-darwin-20
101
+ ruby
26
102
  x86_64-darwin-20
27
103
  x86_64-linux
28
104
 
29
105
  DEPENDENCIES
106
+ bundler
30
107
  github-markup
31
108
  minitest (~> 5.0)
32
109
  numpy
33
110
  pycall
34
111
  rake (~> 13.0)
35
112
  redcarpet
113
+ rspec
36
114
  ruby-spacy!
115
+ solargraph
37
116
  terminal-table
38
117
  yard
39
118
 
40
119
  BUNDLED WITH
41
- 2.2.21
120
+ 2.4.2
data/README.md CHANGED
@@ -1,6 +1,4 @@
1
- # ruby-spacy
2
-
3
- ⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
1
+ # 💎 ruby-spacy
4
2
 
5
3
  ## Overview
6
4
 
@@ -16,22 +14,22 @@
16
14
 
17
15
  ## Installation of prerequisites
18
16
 
19
- Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.8.5, for instance, using pyenv with `enable-shared` as follows:
17
+ **IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
20
18
 
21
19
  ```shell
22
- $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.8.5
20
+ $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
23
21
  ```
24
22
 
25
23
  Don't forget to make it accessible from your working directory.
26
24
 
27
25
  ```shell
28
- $ pyenv local 3.8.5
26
+ $ pyenv local 3.10.6
29
27
  ```
30
28
 
31
29
  Or alternatively:
32
30
 
33
31
  ```shell
34
- $ pyenv global 3.8.5
32
+ $ pyenv global 3.10.6
35
33
  ```
36
34
 
37
35
  Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
@@ -451,7 +449,7 @@ query = tokyo.vector - japan.vector + france.vector
451
449
  headings = ["rank", "text", "score"]
452
450
  rows = []
453
451
 
454
- results = nlp.most_similar(query, 20)
452
+ results = nlp.most_similar(query, 10)
455
453
  results.each_with_index do |lexeme, i|
456
454
  index = (i + 1).to_s
457
455
  rows << [index, lexeme.text, lexeme.score]
@@ -501,7 +499,7 @@ query = tokyo.vector - japan.vector + france.vector
501
499
  headings = ["rank", "text", "score"]
502
500
  rows = []
503
501
 
504
- results = nlp.most_similar(query, 20)
502
+ results = nlp.most_similar(query, 10)
505
503
  results.each_with_index do |lexeme, i|
506
504
  index = (i + 1).to_s
507
505
  rows << [index, lexeme.text, lexeme.score]
@@ -541,4 +539,3 @@ I would like to thank the following open source projects and their creators for
541
539
  ## License
542
540
 
543
541
  This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
544
-
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("I love coffee")
6
8
 
7
- headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
9
+ headings = %w[text shape prefix suffix is_alpha is_digit]
8
10
  rows = []
9
11
 
10
12
  doc.each do |word|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "pos", "dep"]
9
+ headings = %w[text pos dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
9
+ headings = %w[text shape is_alpha is_stop morphology]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("France")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["rank", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
- doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "start_char", "end_char", "label"]
9
+ headings = %w[text start_char end_char label]
8
10
  rows = []
9
11
 
10
12
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep"]
9
+ headings = %w[text lemma pos tag dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_lg")
4
6
  doc1 = nlp.read("I like salty fries and hamburgers.")
5
7
  doc2 = nlp.read("Fast food tastes very good.")
6
8
 
7
- puts "Doc 1: " + doc1.text
8
- puts "Doc 2: " + doc2.text
9
+ puts "Doc 1: #{doc1.text}"
10
+ puts "Doc 2: #{doc2.text}"
9
11
  puts "Similarity: #{doc1.similarity(doc2)}"
10
12
 
11
13
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
10
12
  File.open(File.join("test_dep.svg"), "w") do |file|
11
13
  file.write(dep_svg)
12
14
  end
13
-
14
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
7
+ sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
6
8
  doc = nlp.read(sentence)
7
9
 
8
- ent_html = doc.displacy(style: 'ent')
10
+ ent_html = doc.displacy(style: "ent")
9
11
 
10
12
  File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
11
13
  file.write(ent_html)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
  doc = nlp.read("I love coffee")
5
7
 
6
8
  pp doc.vocab.strings["coffee"]
7
- pp doc.vocab.strings[3197928453018144401]
9
+ pp doc.vocab.strings[3_197_928_453_018_144_401]
8
10
 
9
11
  # 3197928453018144401
10
12
  # "coffee"
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc = nlp.read("dog cat banana afskfsd")
6
8
 
7
- headings = ["text", "has_vector", "vector_norm", "is_oov"]
9
+ headings = %w[text has_vector vector_norm is_oov]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "私の父は寿司が好きだ。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  # subject = Spacy::Token.new(root.lefts[0])
20
22
  subject = Spacy::Token.new(root.lefts[0])
21
23
 
22
- puts "The root of the sentence is: " + root.text
23
- puts "The subject of the sentence is: " + subject.text
24
+ puts "The root of the sentence is: #{root.text}"
25
+ puts "The subject of the sentence is: #{subject.text}"
24
26
 
25
27
  subject.subtree.each do |descendant|
26
28
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
5
7
 
6
- sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
8
+ sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,4 +1,6 @@
1
- require( "ruby-spacy")
1
+ # frozen_string_literal: true
2
+
3
+ require("ruby-spacy")
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
7
9
  nlp.add_pipe("merge_noun_chunks")
8
10
 
9
11
  texts = [
10
- "アメリカ合衆国の国土面積は日本の約25倍あります。",
11
- "現在1ドルは日本円で110円です。",
12
+ "アメリカ合衆国の国土面積は日本の約25倍あります。",
13
+ "現在1ドルは日本円で110円です。"
12
14
  ]
13
15
 
14
16
  texts.each do |text|
15
17
  doc = nlp.read(text)
16
18
  doc.each do |token|
17
- if token.dep_ == "case"
18
- puts token.head.text + " --> " + token.text
19
- end
19
+ puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
20
20
  end
21
21
  end
22
22
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
5
7
 
6
8
  doc = nlp.read("私は論文を読んでいるところだった。")
7
9
 
8
- headings = ["text", "lemma"]
10
+ headings = %w[text lemma]
9
11
  rows = []
10
12
 
11
13
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,7 +11,7 @@ france = nlp.get_lexeme("フランス")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["rank", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "任天堂は1983年にファミコンを14,800円で発売した。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "start", "end", "label"]
11
+ headings = %w[text start end label]
10
12
  rows = []
11
13
 
12
14
  doc.ents.each do |ent|
@@ -24,4 +26,3 @@ puts table
24
26
  # | ファミコン | 10 | 15 | PRODUCT |
25
27
  # | 14,800円 | 16 | 23 | MONEY |
26
28
  # +------------+-------+-----+---------+
27
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -15,20 +17,20 @@ end
15
17
  table = Terminal::Table.new rows: rows, headings: headings
16
18
  puts table
17
19
 
18
- +------+----------+-----------+----------+------------------------+
19
- | text | dep | head text | head pos | children |
20
- +------+----------+-----------+----------+------------------------+
21
- | 自動 | compound | 車 | 92 | |
22
- | 運転 | compound | 車 | 92 | |
23
- | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
24
- | は | case | 車 | 92 | |
25
- | 保険 | compound | 責任 | 92 | |
26
- | 責任 | obj | 転嫁 | 100 | 保険, を |
27
- | を | case | 責任 | 92 | |
28
- | 製造 | compound | 者 | 92 | |
29
- | 者 | obl | 転嫁 | 100 | 製造, に |
30
- | に | case | 者 | 92 | |
31
- | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
32
- | する | aux | 転嫁 | 100 | |
33
- | 。 | punct | 転嫁 | 100 | |
34
- +------+----------+-----------+----------+------------------------+
20
+ # +------+----------+-----------+----------+------------------------+
21
+ # | text | dep | head text | head pos | children |
22
+ # +------+----------+-----------+----------+------------------------+
23
+ # | 自動 | compound | 車 | 92 | |
24
+ # | 運転 | compound | 車 | 92 | |
25
+ # | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
26
+ # | は | case | 車 | 92 | |
27
+ # | 保険 | compound | 責任 | 92 | |
28
+ # | 責任 | obj | 転嫁 | 100 | 保険, を |
29
+ # | を | case | 責任 | 92 | |
30
+ # | 製造 | compound | 者 | 92 | |
31
+ # | 者 | obl | 転嫁 | 100 | 製造, に |
32
+ # | に | case | 者 | 92 | |
33
+ # | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
34
+ # | する | aux | 転嫁 | 100 | |
35
+ # | 。 | punct | 転嫁 | 100 | |
36
+ # +------+----------+-----------+----------+------------------------+