ruby-spacy 0.1.4 → 0.1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/CHANGELOG.md +5 -1
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +3 -3
  7. data/README.md +40 -39
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +30 -27
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +30 -27
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +142 -136
  58. data/ruby-spacy.gemspec +15 -17
  59. data/tags +132 -0
  60. metadata +69 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd5a1c905e5aed7553ac5b1927a6b9cdecaf887c505ea3e38f806e886adeb60c
4
- data.tar.gz: 6d3f3fd22e9d927d430d2b9e48dcd018da6eb601813192e6ea14e094cf51e331
3
+ metadata.gz: 4cd52dfe6ab652bcefacd7401deef42ccccb5e711d418ca127776e66673b87f0
4
+ data.tar.gz: 5b2bd6ac16341c09e53673a31b60cb31a9c07d89344d35b7a7d9c01fe629881a
5
5
  SHA512:
6
- metadata.gz: b5419fb75109b837465c64da1ace956b91d0a0ab589cdb71ace9a308ce1af263edc0e2f206a80ab71a3ab17e86e6520ab432b657c5f60548c696a36049773c60
7
- data.tar.gz: 385606212f290b701458bd1a555e553417ed20be2d1e2008107396a9adc224590c76317c52d30d7c97435c0650ef8c1a15a43fe4b92c797188944a302da51612
6
+ metadata.gz: 3af0557f6a33c0a4bfbf6a65e1e8922e14dc3f9df70fbc7bbe271212134b3d8b27e908aa79fd172fedf9e8daa4b39c6d1967eb1e2972d186c9ddb0a0bd6685c5
7
+ data.tar.gz: c893e49c75fb0ddb861c052ca5415df0235ef6d1d15960f6b2e5c4b815f1f6018607a8fff6b56bd1cbf5514c9762f8e39c9ce731f8ae713d148eb17eb7d9531b
data/.rubocop.yml ADDED
@@ -0,0 +1,48 @@
1
+ AllCops:
2
+ NewCops: disable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 2.6
5
+
6
+ Documentation:
7
+ Enabled: false
8
+
9
+ Naming/VariableNumber:
10
+ Enabled: false
11
+
12
+ Naming/FileName:
13
+ Enabled: false
14
+
15
+ Style/StringLiterals:
16
+ Enabled: true
17
+ EnforcedStyle: double_quotes
18
+
19
+ Style/StringLiteralsInInterpolation:
20
+ Enabled: true
21
+ EnforcedStyle: double_quotes
22
+
23
+ Layout/LineLength:
24
+ Max: 400
25
+
26
+ Metrics/MethodLength:
27
+ Max: 80
28
+
29
+ Metrics/BlockLength:
30
+ Max: 60
31
+
32
+ Metrics/AbcSize:
33
+ Max: 60
34
+
35
+ Metrics/PerceivedComplexity:
36
+ Max: 10
37
+
38
+ Metrics/ClassLength:
39
+ Max: 400
40
+
41
+ Metrics/CyclomaticComplexity:
42
+ Max: 20
43
+
44
+ Metrics/ParameterLists:
45
+ Max: 8
46
+
47
+ Metrics/ModuleLength:
48
+ Max: 200
data/.solargraph.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ include:
3
+ - "**/*.rb"
4
+ exclude:
5
+ - spec/**/*
6
+ - test/**/*
7
+ - vendor/**/*
8
+ - ".bundle/**/*"
9
+ require: []
10
+ domains: []
11
+ reporters:
12
+ - rubocop
13
+ # - require_not_found
14
+ formatter:
15
+ rubocop:
16
+ cops: safe
17
+ except: []
18
+ only: []
19
+ extra_args: []
20
+ require_paths: []
21
+ plugins: []
22
+ max_files: 5000
data/CHANGELOG.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # Change Log
2
2
 
3
- ## 0.1.2 - 2021-06-26
3
+ ## 0.1.4.1 - 2021-07-06
4
+ - Test code refined
5
+ - `Spacy::Language::most_similar` returns an array of hash-based objects that accepts method calls
6
+
7
+ ## 0.1.4 - 2021-06-26
4
8
  ### Added
5
9
  - `Spacy::Lexeme` class
6
10
 
data/Gemfile CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in ruby-spacy.gemspec
6
6
  gemspec
7
7
 
8
- gem 'pycall'
9
- gem 'numpy'
10
- gem 'terminal-table'
8
+ gem "numpy"
9
+ gem "pycall"
10
+ gem "terminal-table"
11
11
 
12
12
  group :development do
13
- gem "rake", "~> 13.0"
13
+ gem "github-markup"
14
14
  gem "minitest", "~> 5.0"
15
- gem 'yard'
16
- gem 'redcarpet'
17
- gem 'github-markup'
15
+ gem "rake", "~> 13.0"
16
+ gem "redcarpet"
17
+ gem "yard"
18
18
  end
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-spacy (0.1.4)
4
+ ruby-spacy (0.1.4.1)
5
5
  numpy (~> 0.4.0)
6
6
  pycall (~> 1.4.0)
7
7
  terminal-table (~> 3.0.1)
@@ -13,8 +13,8 @@ GEM
13
13
  minitest (5.14.4)
14
14
  numpy (0.4.0)
15
15
  pycall (>= 1.2.0.beta1)
16
- pycall (1.4.0)
17
- rake (13.0.3)
16
+ pycall (1.4.1)
17
+ rake (13.0.6)
18
18
  redcarpet (3.5.1)
19
19
  terminal-table (3.0.1)
20
20
  unicode-display_width (>= 1.1.1, < 3)
data/README.md CHANGED
@@ -1,6 +1,4 @@
1
- # ruby-spacy
2
-
3
- ⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
1
+ # 💎 ruby-spacy
4
2
 
5
3
  ## Overview
6
4
 
@@ -16,22 +14,22 @@
16
14
 
17
15
  ## Installation of prerequisites
18
16
 
19
- Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.8.5, for instance, using pyenv with `enable-shared` as follows:
17
+ **IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
20
18
 
21
19
  ```shell
22
- $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.8.5
20
+ $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
23
21
  ```
24
22
 
25
23
  Don't forget to make it accessible from your working directory.
26
24
 
27
25
  ```shell
28
- $ pyenv local 3.8.5
26
+ $ pyenv local 3.10.6
29
27
  ```
30
28
 
31
29
  Or alternatively:
32
30
 
33
31
  ```shell
34
- $ pyenv global 3.8.5
32
+ $ pyenv global 3.10.6
35
33
  ```
36
34
 
37
35
  Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
@@ -448,32 +446,36 @@ france = nlp.get_lexeme("France")
448
446
 
449
447
  query = tokyo.vector - japan.vector + france.vector
450
448
 
449
+ headings = ["rank", "text", "score"]
451
450
  rows = []
452
451
 
453
452
  results = nlp.most_similar(query, 10)
454
- results.each do |lexeme|
455
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
453
+ results.each_with_index do |lexeme, i|
454
+ index = (i + 1).to_s
455
+ rows << [index, lexeme.text, lexeme.score]
456
456
  end
457
457
 
458
- headings = ["key", "text", "score"]
459
458
  table = Terminal::Table.new rows: rows, headings: headings
460
459
  puts table
461
460
  ```
462
461
 
463
462
  Output:
464
463
 
465
- | key | text | score |
466
- |:---------------------|:------------|:-------------------|
467
- | 1432967385481565694 | FRANCE | 0.8346999883651733 |
468
- | 6613816697677965370 | France | 0.8346999883651733 |
469
- | 4362406852232399325 | france | 0.8346999883651733 |
470
- | 1637573253267610771 | PARIS | 0.7703999876976013 |
471
- | 15322182186497800017 | paris | 0.7703999876976013 |
472
- | 10427160276079242800 | Paris | 0.7703999876976013 |
473
- | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
474
- | 7944504257273452052 | Toulouse | 0.6381999850273132 |
475
- | 9614730213792621885 | toulouse | 0.6381999850273132 |
476
- | 8515538464606421210 | marseille | 0.6370999813079834 |
464
+ | rank | text | score |
465
+ |:-----|:------------|:-------------------|
466
+ | 1 | FRANCE | 0.8346999883651733 |
467
+ | 2 | France | 0.8346999883651733 |
468
+ | 3 | france | 0.8346999883651733 |
469
+ | 4 | PARIS | 0.7703999876976013 |
470
+ | 5 | paris | 0.7703999876976013 |
471
+ | 6 | Paris | 0.7703999876976013 |
472
+ | 7 | TOULOUSE | 0.6381999850273132 |
473
+ | 8 | Toulouse | 0.6381999850273132 |
474
+ | 9 | toulouse | 0.6381999850273132 |
475
+ | 10 | marseille | 0.6370999813079834 |
476
+
477
+
478
+
477
479
 
478
480
 
479
481
  ### Word vector calculation (Japanese)
@@ -494,33 +496,33 @@ france = nlp.get_lexeme("フランス")
494
496
 
495
497
  query = tokyo.vector - japan.vector + france.vector
496
498
 
499
+ headings = ["rank", "text", "score"]
497
500
  rows = []
498
501
 
499
502
  results = nlp.most_similar(query, 10)
500
- results.each do |lexeme|
501
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
503
+ results.each_with_index do |lexeme, i|
504
+ index = (i + 1).to_s
505
+ rows << [index, lexeme.text, lexeme.score]
502
506
  end
503
507
 
504
- headings = ["key", "text", "score"]
505
508
  table = Terminal::Table.new rows: rows, headings: headings
506
509
  puts table
507
510
  ```
508
511
 
509
512
  Output:
510
513
 
511
- | key | text | score |
512
- |:---------------------|:---------------|:-------------------|
513
- | 12090003238699662352 | パリ | 0.7376999855041504 |
514
- | 18290786970454458111 | フランス | 0.7221999764442444 |
515
- | 9360021637096476946 | 東京 | 0.6697999835014343 |
516
- | 2437546359230213520 | ストラスブール | 0.631600022315979 |
517
- | 13988178952745813186 | リヨン | 0.5939000248908997 |
518
- | 10427160276079242800 | Paris | 0.574400007724762 |
519
- | 5562396768860926997 | ベルギー | 0.5683000087738037 |
520
- | 15029176915627965481 | ニース | 0.5679000020027161 |
521
- | 9750625950625019690 | アルザス | 0.5644999742507935 |
522
- | 2381640614569534741 | 南仏 | 0.5547999739646912 |
523
-
514
+ | rank | text | score |
515
+ |:-----|:---------------|:-------------------|
516
+ | 1 | パリ | 0.7376999855041504 |
517
+ | 2 | フランス | 0.7221999764442444 |
518
+ | 3 | 東京 | 0.6697999835014343 |
519
+ | 4 | ストラスブール | 0.631600022315979 |
520
+ | 5 | リヨン | 0.5939000248908997 |
521
+ | 6 | Paris | 0.574400007724762 |
522
+ | 7 | ベルギー | 0.5683000087738037 |
523
+ | 8 | ニース | 0.5679000020027161 |
524
+ | 9 | アルザス | 0.5644999742507935 |
525
+ | 10 | 南仏 | 0.5547999739646912 |
524
526
 
525
527
  ## Author
526
528
 
@@ -537,4 +539,3 @@ I would like to thank the following open source projects and their creators for
537
539
  ## License
538
540
 
539
541
  This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
540
-
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("I love coffee")
6
8
 
7
- headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
9
+ headings = %w[text shape prefix suffix is_alpha is_digit]
8
10
  rows = []
9
11
 
10
12
  doc.each do |word|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "pos", "dep"]
9
+ headings = %w[text pos dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
9
+ headings = %w[text shape is_alpha is_stop morphology]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,38 +11,39 @@ france = nlp.get_lexeme("France")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["key", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
18
+ results.each_with_index do |lexeme, i|
19
+ index = (i + 1).to_s
20
+ rows << [index, lexeme.text, lexeme.score]
18
21
  end
19
22
 
20
23
  table = Terminal::Table.new rows: rows, headings: headings
21
24
  puts table
22
25
 
23
- # +----------------------+-------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+-------------+--------------------+
26
- # | 1432967385481565694 | FRANCE | 0.8346999883651733 |
27
- # | 6613816697677965370 | France | 0.8346999883651733 |
28
- # | 4362406852232399325 | france | 0.8346999883651733 |
29
- # | 1637573253267610771 | PARIS | 0.7703999876976013 |
30
- # | 15322182186497800017 | paris | 0.7703999876976013 |
31
- # | 10427160276079242800 | Paris | 0.7703999876976013 |
32
- # | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
33
- # | 7944504257273452052 | Toulouse | 0.6381999850273132 |
34
- # | 9614730213792621885 | toulouse | 0.6381999850273132 |
35
- # | 8515538464606421210 | marseille | 0.6370999813079834 |
36
- # | 8215995793762630878 | Marseille | 0.6370999813079834 |
37
- # | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
38
- # | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
39
- # | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
40
- # | 13936807859007616770 | bordeaux | 0.6096000075340271 |
41
- # | 8731576325682930212 | prague | 0.6075000166893005 |
42
- # | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
43
- # | 1133963107690000953 | Prague | 0.6075000166893005 |
44
- # | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
45
- # | 6936121537367717968 | switzerland | 0.6068000197410583 |
46
- # +----------------------+-------------+--------------------+
26
+ # +------+-------------+--------------------+
27
+ # | rank | text | score |
28
+ # +------+-------------+--------------------+
29
+ # | 1 | FRANCE | 0.8346999883651733 |
30
+ # | 2 | France | 0.8346999883651733 |
31
+ # | 3 | france | 0.8346999883651733 |
32
+ # | 4 | PARIS | 0.7703999876976013 |
33
+ # | 5 | paris | 0.7703999876976013 |
34
+ # | 6 | Paris | 0.7703999876976013 |
35
+ # | 7 | TOULOUSE | 0.6381999850273132 |
36
+ # | 8 | Toulouse | 0.6381999850273132 |
37
+ # | 9 | toulouse | 0.6381999850273132 |
38
+ # | 10 | marseille | 0.6370999813079834 |
39
+ # | 11 | Marseille | 0.6370999813079834 |
40
+ # | 12 | MARSEILLE | 0.6370999813079834 |
41
+ # | 13 | Bordeaux | 0.6096000075340271 |
42
+ # | 14 | BORDEAUX | 0.6096000075340271 |
43
+ # | 15 | bordeaux | 0.6096000075340271 |
44
+ # | 16 | prague | 0.6075000166893005 |
45
+ # | 17 | PRAGUE | 0.6075000166893005 |
46
+ # | 18 | Prague | 0.6075000166893005 |
47
+ # | 19 | SWITZERLAND | 0.6068000197410583 |
48
+ # | 20 | switzerland | 0.6068000197410583 |
49
+ # +------+-------------+--------------------+
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
- doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "start_char", "end_char", "label"]
9
+ headings = %w[text start_char end_char label]
8
10
  rows = []
9
11
 
10
12
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep"]
9
+ headings = %w[text lemma pos tag dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_lg")
4
6
  doc1 = nlp.read("I like salty fries and hamburgers.")
5
7
  doc2 = nlp.read("Fast food tastes very good.")
6
8
 
7
- puts "Doc 1: " + doc1.text
8
- puts "Doc 2: " + doc2.text
9
+ puts "Doc 1: #{doc1.text}"
10
+ puts "Doc 2: #{doc2.text}"
9
11
  puts "Similarity: #{doc1.similarity(doc2)}"
10
12
 
11
13
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
10
12
  File.open(File.join("test_dep.svg"), "w") do |file|
11
13
  file.write(dep_svg)
12
14
  end
13
-
14
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
7
+ sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
6
8
  doc = nlp.read(sentence)
7
9
 
8
- ent_html = doc.displacy(style: 'ent')
10
+ ent_html = doc.displacy(style: "ent")
9
11
 
10
12
  File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
11
13
  file.write(ent_html)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
  doc = nlp.read("I love coffee")
5
7
 
6
8
  pp doc.vocab.strings["coffee"]
7
- pp doc.vocab.strings[3197928453018144401]
9
+ pp doc.vocab.strings[3_197_928_453_018_144_401]
8
10
 
9
11
  # 3197928453018144401
10
12
  # "coffee"
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc = nlp.read("dog cat banana afskfsd")
6
8
 
7
- headings = ["text", "has_vector", "vector_norm", "is_oov"]
9
+ headings = %w[text has_vector vector_norm is_oov]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "私の父は寿司が好きだ。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  # subject = Spacy::Token.new(root.lefts[0])
20
22
  subject = Spacy::Token.new(root.lefts[0])
21
23
 
22
- puts "The root of the sentence is: " + root.text
23
- puts "The subject of the sentence is: " + subject.text
24
+ puts "The root of the sentence is: #{root.text}"
25
+ puts "The subject of the sentence is: #{subject.text}"
24
26
 
25
27
  subject.subtree.each do |descendant|
26
28
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
5
7
 
6
- sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
8
+ sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,4 +1,6 @@
1
- require( "ruby-spacy")
1
+ # frozen_string_literal: true
2
+
3
+ require("ruby-spacy")
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
7
9
  nlp.add_pipe("merge_noun_chunks")
8
10
 
9
11
  texts = [
10
- "アメリカ合衆国の国土面積は日本の約25倍あります。",
11
- "現在1ドルは日本円で110円です。",
12
+ "アメリカ合衆国の国土面積は日本の約25倍あります。",
13
+ "現在1ドルは日本円で110円です。"
12
14
  ]
13
15
 
14
16
  texts.each do |text|
15
17
  doc = nlp.read(text)
16
18
  doc.each do |token|
17
- if token.dep_ == "case"
18
- puts token.head.text + " --> " + token.text
19
- end
19
+ puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
20
20
  end
21
21
  end
22
22
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
5
7
 
6
8
  doc = nlp.read("私は論文を読んでいるところだった。")
7
9
 
8
- headings = ["text", "lemma"]
10
+ headings = %w[text lemma]
9
11
  rows = []
10
12
 
11
13
  doc.each do |token|