ruby-spacy 0.1.4 → 0.1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +48 -0
  3. data/.solargraph.yml +22 -0
  4. data/CHANGELOG.md +5 -1
  5. data/Gemfile +7 -7
  6. data/Gemfile.lock +3 -3
  7. data/README.md +40 -39
  8. data/examples/get_started/lexeme.rb +3 -1
  9. data/examples/get_started/linguistic_annotations.rb +3 -1
  10. data/examples/get_started/morphology.rb +3 -1
  11. data/examples/get_started/most_similar.rb +30 -27
  12. data/examples/get_started/named_entities.rb +4 -2
  13. data/examples/get_started/pos_tags_and_dependencies.rb +3 -1
  14. data/examples/get_started/similarity.rb +4 -2
  15. data/examples/get_started/tokenization.rb +3 -1
  16. data/examples/get_started/visualizing_dependencies.rb +2 -2
  17. data/examples/get_started/visualizing_dependencies_compact.rb +2 -0
  18. data/examples/get_started/visualizing_named_entities.rb +4 -2
  19. data/examples/get_started/vocab.rb +3 -1
  20. data/examples/get_started/word_vectors.rb +3 -1
  21. data/examples/japanese/ancestors.rb +6 -4
  22. data/examples/japanese/entity_annotations_and_labels.rb +4 -2
  23. data/examples/japanese/information_extraction.rb +6 -6
  24. data/examples/japanese/lemmatization.rb +3 -1
  25. data/examples/japanese/most_similar.rb +30 -27
  26. data/examples/japanese/named_entity_recognition.rb +3 -2
  27. data/examples/japanese/navigating_parse_tree.rb +19 -17
  28. data/examples/japanese/noun_chunks.rb +2 -0
  29. data/examples/japanese/pos_tagging.rb +3 -1
  30. data/examples/japanese/sentence_segmentation.rb +3 -2
  31. data/examples/japanese/similarity.rb +2 -0
  32. data/examples/japanese/tokenization.rb +2 -0
  33. data/examples/japanese/visualizing_dependencies.rb +3 -1
  34. data/examples/japanese/visualizing_named_entities.rb +4 -2
  35. data/examples/linguistic_features/ancestors.rb +7 -5
  36. data/examples/linguistic_features/entity_annotations_and_labels.rb +4 -2
  37. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +3 -5
  38. data/examples/linguistic_features/information_extraction.rb +9 -9
  39. data/examples/linguistic_features/iterating_children.rb +6 -8
  40. data/examples/linguistic_features/iterating_lefts_and_rights.rb +7 -5
  41. data/examples/linguistic_features/lemmatization.rb +3 -1
  42. data/examples/linguistic_features/named_entity_recognition.rb +3 -1
  43. data/examples/linguistic_features/navigating_parse_tree.rb +3 -1
  44. data/examples/linguistic_features/noun_chunks.rb +3 -1
  45. data/examples/linguistic_features/pos_tagging.rb +3 -1
  46. data/examples/linguistic_features/retokenize_1.rb +2 -0
  47. data/examples/linguistic_features/retokenize_2.rb +4 -2
  48. data/examples/linguistic_features/rule_based_morphology.rb +4 -2
  49. data/examples/linguistic_features/sentence_segmentation.rb +3 -2
  50. data/examples/linguistic_features/similarity.rb +4 -2
  51. data/examples/linguistic_features/similarity_between_lexemes.rb +2 -0
  52. data/examples/linguistic_features/similarity_between_spans.rb +7 -5
  53. data/examples/linguistic_features/tokenization.rb +3 -2
  54. data/examples/rule_based_matching/creating_spans_from_matches.rb +5 -3
  55. data/examples/rule_based_matching/matcher.rb +4 -2
  56. data/lib/ruby-spacy/version.rb +1 -1
  57. data/lib/ruby-spacy.rb +142 -136
  58. data/ruby-spacy.gemspec +15 -17
  59. data/tags +132 -0
  60. metadata +69 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd5a1c905e5aed7553ac5b1927a6b9cdecaf887c505ea3e38f806e886adeb60c
4
- data.tar.gz: 6d3f3fd22e9d927d430d2b9e48dcd018da6eb601813192e6ea14e094cf51e331
3
+ metadata.gz: 4cd52dfe6ab652bcefacd7401deef42ccccb5e711d418ca127776e66673b87f0
4
+ data.tar.gz: 5b2bd6ac16341c09e53673a31b60cb31a9c07d89344d35b7a7d9c01fe629881a
5
5
  SHA512:
6
- metadata.gz: b5419fb75109b837465c64da1ace956b91d0a0ab589cdb71ace9a308ce1af263edc0e2f206a80ab71a3ab17e86e6520ab432b657c5f60548c696a36049773c60
7
- data.tar.gz: 385606212f290b701458bd1a555e553417ed20be2d1e2008107396a9adc224590c76317c52d30d7c97435c0650ef8c1a15a43fe4b92c797188944a302da51612
6
+ metadata.gz: 3af0557f6a33c0a4bfbf6a65e1e8922e14dc3f9df70fbc7bbe271212134b3d8b27e908aa79fd172fedf9e8daa4b39c6d1967eb1e2972d186c9ddb0a0bd6685c5
7
+ data.tar.gz: c893e49c75fb0ddb861c052ca5415df0235ef6d1d15960f6b2e5c4b815f1f6018607a8fff6b56bd1cbf5514c9762f8e39c9ce731f8ae713d148eb17eb7d9531b
data/.rubocop.yml ADDED
@@ -0,0 +1,48 @@
1
+ AllCops:
2
+ NewCops: disable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 2.6
5
+
6
+ Documentation:
7
+ Enabled: false
8
+
9
+ Naming/VariableNumber:
10
+ Enabled: false
11
+
12
+ Naming/FileName:
13
+ Enabled: false
14
+
15
+ Style/StringLiterals:
16
+ Enabled: true
17
+ EnforcedStyle: double_quotes
18
+
19
+ Style/StringLiteralsInInterpolation:
20
+ Enabled: true
21
+ EnforcedStyle: double_quotes
22
+
23
+ Layout/LineLength:
24
+ Max: 400
25
+
26
+ Metrics/MethodLength:
27
+ Max: 80
28
+
29
+ Metrics/BlockLength:
30
+ Max: 60
31
+
32
+ Metrics/AbcSize:
33
+ Max: 60
34
+
35
+ Metrics/PerceivedComplexity:
36
+ Max: 10
37
+
38
+ Metrics/ClassLength:
39
+ Max: 400
40
+
41
+ Metrics/CyclomaticComplexity:
42
+ Max: 20
43
+
44
+ Metrics/ParameterLists:
45
+ Max: 8
46
+
47
+ Metrics/ModuleLength:
48
+ Max: 200
data/.solargraph.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ include:
3
+ - "**/*.rb"
4
+ exclude:
5
+ - spec/**/*
6
+ - test/**/*
7
+ - vendor/**/*
8
+ - ".bundle/**/*"
9
+ require: []
10
+ domains: []
11
+ reporters:
12
+ - rubocop
13
+ # - require_not_found
14
+ formatter:
15
+ rubocop:
16
+ cops: safe
17
+ except: []
18
+ only: []
19
+ extra_args: []
20
+ require_paths: []
21
+ plugins: []
22
+ max_files: 5000
data/CHANGELOG.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # Change Log
2
2
 
3
- ## 0.1.2 - 2021-06-26
3
+ ## 0.1.4.1 - 2021-07-06
4
+ - Test code refined
5
+ - `Spacy::Language::most_similar` returns an array of hash-based objects that accepts method calls
6
+
7
+ ## 0.1.4 - 2021-06-26
4
8
  ### Added
5
9
  - `Spacy::Lexeme` class
6
10
 
data/Gemfile CHANGED
@@ -5,14 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in ruby-spacy.gemspec
6
6
  gemspec
7
7
 
8
- gem 'pycall'
9
- gem 'numpy'
10
- gem 'terminal-table'
8
+ gem "numpy"
9
+ gem "pycall"
10
+ gem "terminal-table"
11
11
 
12
12
  group :development do
13
- gem "rake", "~> 13.0"
13
+ gem "github-markup"
14
14
  gem "minitest", "~> 5.0"
15
- gem 'yard'
16
- gem 'redcarpet'
17
- gem 'github-markup'
15
+ gem "rake", "~> 13.0"
16
+ gem "redcarpet"
17
+ gem "yard"
18
18
  end
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-spacy (0.1.4)
4
+ ruby-spacy (0.1.4.1)
5
5
  numpy (~> 0.4.0)
6
6
  pycall (~> 1.4.0)
7
7
  terminal-table (~> 3.0.1)
@@ -13,8 +13,8 @@ GEM
13
13
  minitest (5.14.4)
14
14
  numpy (0.4.0)
15
15
  pycall (>= 1.2.0.beta1)
16
- pycall (1.4.0)
17
- rake (13.0.3)
16
+ pycall (1.4.1)
17
+ rake (13.0.6)
18
18
  redcarpet (3.5.1)
19
19
  terminal-table (3.0.1)
20
20
  unicode-display_width (>= 1.1.1, < 3)
data/README.md CHANGED
@@ -1,6 +1,4 @@
1
- # ruby-spacy
2
-
3
- ⚠️ This project is **work-in-progress** and is provided as-is. There may be breaking changes committed to this repository without notice.
1
+ # 💎 ruby-spacy
4
2
 
5
3
  ## Overview
6
4
 
@@ -16,22 +14,22 @@
16
14
 
17
15
  ## Installation of prerequisites
18
16
 
19
- Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.8.5, for instance, using pyenv with `enable-shared` as follows:
17
+ **IMPORTANT**: Make sure that the `enable-shared` option is enabled in your Python installation. You can use [pyenv](https://github.com/pyenv/pyenv) to install any version of Python you like. Install Python 3.10.6, for instance, using pyenv with `enable-shared` as follows:
20
18
 
21
19
  ```shell
22
- $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.8.5
20
+ $ env CONFIGURE_OPTS="--enable-shared" pyenv install 3.10.6
23
21
  ```
24
22
 
25
23
  Don't forget to make it accessible from your working directory.
26
24
 
27
25
  ```shell
28
- $ pyenv local 3.8.5
26
+ $ pyenv local 3.10.6
29
27
  ```
30
28
 
31
29
  Or alternatively:
32
30
 
33
31
  ```shell
34
- $ pyenv global 3.8.5
32
+ $ pyenv global 3.10.6
35
33
  ```
36
34
 
37
35
  Then, install [spaCy](https://spacy.io/). If you use `pip`, the following command will do:
@@ -448,32 +446,36 @@ france = nlp.get_lexeme("France")
448
446
 
449
447
  query = tokyo.vector - japan.vector + france.vector
450
448
 
449
+ headings = ["rank", "text", "score"]
451
450
  rows = []
452
451
 
453
452
  results = nlp.most_similar(query, 10)
454
- results.each do |lexeme|
455
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
453
+ results.each_with_index do |lexeme, i|
454
+ index = (i + 1).to_s
455
+ rows << [index, lexeme.text, lexeme.score]
456
456
  end
457
457
 
458
- headings = ["key", "text", "score"]
459
458
  table = Terminal::Table.new rows: rows, headings: headings
460
459
  puts table
461
460
  ```
462
461
 
463
462
  Output:
464
463
 
465
- | key | text | score |
466
- |:---------------------|:------------|:-------------------|
467
- | 1432967385481565694 | FRANCE | 0.8346999883651733 |
468
- | 6613816697677965370 | France | 0.8346999883651733 |
469
- | 4362406852232399325 | france | 0.8346999883651733 |
470
- | 1637573253267610771 | PARIS | 0.7703999876976013 |
471
- | 15322182186497800017 | paris | 0.7703999876976013 |
472
- | 10427160276079242800 | Paris | 0.7703999876976013 |
473
- | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
474
- | 7944504257273452052 | Toulouse | 0.6381999850273132 |
475
- | 9614730213792621885 | toulouse | 0.6381999850273132 |
476
- | 8515538464606421210 | marseille | 0.6370999813079834 |
464
+ | rank | text | score |
465
+ |:-----|:------------|:-------------------|
466
+ | 1 | FRANCE | 0.8346999883651733 |
467
+ | 2 | France | 0.8346999883651733 |
468
+ | 3 | france | 0.8346999883651733 |
469
+ | 4 | PARIS | 0.7703999876976013 |
470
+ | 5 | paris | 0.7703999876976013 |
471
+ | 6 | Paris | 0.7703999876976013 |
472
+ | 7 | TOULOUSE | 0.6381999850273132 |
473
+ | 8 | Toulouse | 0.6381999850273132 |
474
+ | 9 | toulouse | 0.6381999850273132 |
475
+ | 10 | marseille | 0.6370999813079834 |
476
+
477
+
478
+
477
479
 
478
480
 
479
481
  ### Word vector calculation (Japanese)
@@ -494,33 +496,33 @@ france = nlp.get_lexeme("フランス")
494
496
 
495
497
  query = tokyo.vector - japan.vector + france.vector
496
498
 
499
+ headings = ["rank", "text", "score"]
497
500
  rows = []
498
501
 
499
502
  results = nlp.most_similar(query, 10)
500
- results.each do |lexeme|
501
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
503
+ results.each_with_index do |lexeme, i|
504
+ index = (i + 1).to_s
505
+ rows << [index, lexeme.text, lexeme.score]
502
506
  end
503
507
 
504
- headings = ["key", "text", "score"]
505
508
  table = Terminal::Table.new rows: rows, headings: headings
506
509
  puts table
507
510
  ```
508
511
 
509
512
  Output:
510
513
 
511
- | key | text | score |
512
- |:---------------------|:---------------|:-------------------|
513
- | 12090003238699662352 | パリ | 0.7376999855041504 |
514
- | 18290786970454458111 | フランス | 0.7221999764442444 |
515
- | 9360021637096476946 | 東京 | 0.6697999835014343 |
516
- | 2437546359230213520 | ストラスブール | 0.631600022315979 |
517
- | 13988178952745813186 | リヨン | 0.5939000248908997 |
518
- | 10427160276079242800 | Paris | 0.574400007724762 |
519
- | 5562396768860926997 | ベルギー | 0.5683000087738037 |
520
- | 15029176915627965481 | ニース | 0.5679000020027161 |
521
- | 9750625950625019690 | アルザス | 0.5644999742507935 |
522
- | 2381640614569534741 | 南仏 | 0.5547999739646912 |
523
-
514
+ | rank | text | score |
515
+ |:-----|:---------------|:-------------------|
516
+ | 1 | パリ | 0.7376999855041504 |
517
+ | 2 | フランス | 0.7221999764442444 |
518
+ | 3 | 東京 | 0.6697999835014343 |
519
+ | 4 | ストラスブール | 0.631600022315979 |
520
+ | 5 | リヨン | 0.5939000248908997 |
521
+ | 6 | Paris | 0.574400007724762 |
522
+ | 7 | ベルギー | 0.5683000087738037 |
523
+ | 8 | ニース | 0.5679000020027161 |
524
+ | 9 | アルザス | 0.5644999742507935 |
525
+ | 10 | 南仏 | 0.5547999739646912 |
524
526
 
525
527
  ## Author
526
528
 
@@ -537,4 +539,3 @@ I would like to thank the following open source projects and their creators for
537
539
  ## License
538
540
 
539
541
  This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
540
-
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("I love coffee")
6
8
 
7
- headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
9
+ headings = %w[text shape prefix suffix is_alpha is_digit]
8
10
  rows = []
9
11
 
10
12
  doc.each do |word|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "pos", "dep"]
9
+ headings = %w[text pos dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "shape", "is_alpha", "is_stop", "morphology"]
9
+ headings = %w[text shape is_alpha is_stop morphology]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -9,38 +11,39 @@ france = nlp.get_lexeme("France")
9
11
 
10
12
  query = tokyo.vector - japan.vector + france.vector
11
13
 
12
- headings = ["key", "text", "score"]
14
+ headings = %w[rank text score]
13
15
  rows = []
14
16
 
15
17
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
18
+ results.each_with_index do |lexeme, i|
19
+ index = (i + 1).to_s
20
+ rows << [index, lexeme.text, lexeme.score]
18
21
  end
19
22
 
20
23
  table = Terminal::Table.new rows: rows, headings: headings
21
24
  puts table
22
25
 
23
- # +----------------------+-------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+-------------+--------------------+
26
- # | 1432967385481565694 | FRANCE | 0.8346999883651733 |
27
- # | 6613816697677965370 | France | 0.8346999883651733 |
28
- # | 4362406852232399325 | france | 0.8346999883651733 |
29
- # | 1637573253267610771 | PARIS | 0.7703999876976013 |
30
- # | 15322182186497800017 | paris | 0.7703999876976013 |
31
- # | 10427160276079242800 | Paris | 0.7703999876976013 |
32
- # | 975948890941980630 | TOULOUSE | 0.6381999850273132 |
33
- # | 7944504257273452052 | Toulouse | 0.6381999850273132 |
34
- # | 9614730213792621885 | toulouse | 0.6381999850273132 |
35
- # | 8515538464606421210 | marseille | 0.6370999813079834 |
36
- # | 8215995793762630878 | Marseille | 0.6370999813079834 |
37
- # | 12360854743603227406 | MARSEILLE | 0.6370999813079834 |
38
- # | 8339539946446536307 | Bordeaux | 0.6096000075340271 |
39
- # | 17690237501437860177 | BORDEAUX | 0.6096000075340271 |
40
- # | 13936807859007616770 | bordeaux | 0.6096000075340271 |
41
- # | 8731576325682930212 | prague | 0.6075000166893005 |
42
- # | 11722746441803481839 | PRAGUE | 0.6075000166893005 |
43
- # | 1133963107690000953 | Prague | 0.6075000166893005 |
44
- # | 16693216792428069950 | SWITZERLAND | 0.6068000197410583 |
45
- # | 6936121537367717968 | switzerland | 0.6068000197410583 |
46
- # +----------------------+-------------+--------------------+
26
+ # +------+-------------+--------------------+
27
+ # | rank | text | score |
28
+ # +------+-------------+--------------------+
29
+ # | 1 | FRANCE | 0.8346999883651733 |
30
+ # | 2 | France | 0.8346999883651733 |
31
+ # | 3 | france | 0.8346999883651733 |
32
+ # | 4 | PARIS | 0.7703999876976013 |
33
+ # | 5 | paris | 0.7703999876976013 |
34
+ # | 6 | Paris | 0.7703999876976013 |
35
+ # | 7 | TOULOUSE | 0.6381999850273132 |
36
+ # | 8 | Toulouse | 0.6381999850273132 |
37
+ # | 9 | toulouse | 0.6381999850273132 |
38
+ # | 10 | marseille | 0.6370999813079834 |
39
+ # | 11 | Marseille | 0.6370999813079834 |
40
+ # | 12 | MARSEILLE | 0.6370999813079834 |
41
+ # | 13 | Bordeaux | 0.6096000075340271 |
42
+ # | 14 | BORDEAUX | 0.6096000075340271 |
43
+ # | 15 | bordeaux | 0.6096000075340271 |
44
+ # | 16 | prague | 0.6075000166893005 |
45
+ # | 17 | PRAGUE | 0.6075000166893005 |
46
+ # | 18 | Prague | 0.6075000166893005 |
47
+ # | 19 | SWITZERLAND | 0.6068000197410583 |
48
+ # | 20 | switzerland | 0.6068000197410583 |
49
+ # +------+-------------+--------------------+
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
- doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
6
8
 
7
- headings = ["text", "start_char", "end_char", "label"]
9
+ headings = %w[text start_char end_char label]
8
10
  rows = []
9
11
 
10
12
  doc.ents.each do |ent|
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_sm")
5
7
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
6
8
 
7
- headings = ["text", "lemma", "pos", "tag", "dep"]
9
+ headings = %w[text lemma pos tag dep]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_lg")
4
6
  doc1 = nlp.read("I like salty fries and hamburgers.")
5
7
  doc2 = nlp.read("Fast food tastes very good.")
6
8
 
7
- puts "Doc 1: " + doc1.text
8
- puts "Doc 2: " + doc2.text
9
+ puts "Doc 1: #{doc1.text}"
10
+ puts "Doc 2: #{doc2.text}"
9
11
  puts "Similarity: #{doc1.similarity(doc2)}"
10
12
 
11
13
  # Doc 1: I like salty fries and hamburgers.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
7
 
6
8
  doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
7
9
 
8
- headings = [1,2,3,4,5,6,7,8,9,10,11]
10
+ headings = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
9
11
  row = []
10
12
 
11
13
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -10,5 +12,3 @@ dep_svg = doc.displacy(style: "dep", compact: false)
10
12
  File.open(File.join("test_dep.svg"), "w") do |file|
11
13
  file.write(dep_svg)
12
14
  end
13
-
14
-
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
 
5
- sentence ="When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
7
+ sentence = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
6
8
  doc = nlp.read(sentence)
7
9
 
8
- ent_html = doc.displacy(style: 'ent')
10
+ ent_html = doc.displacy(style: "ent")
9
11
 
10
12
  File.open(File.join(File.dirname(__FILE__), "test_ent.html"), "w") do |file|
11
13
  file.write(ent_html)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
 
3
5
  nlp = Spacy::Language.new("en_core_web_sm")
4
6
  doc = nlp.read("I love coffee")
5
7
 
6
8
  pp doc.vocab.strings["coffee"]
7
- pp doc.vocab.strings[3197928453018144401]
9
+ pp doc.vocab.strings[3_197_928_453_018_144_401]
8
10
 
9
11
  # 3197928453018144401
10
12
  # "coffee"
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("en_core_web_lg")
5
7
  doc = nlp.read("dog cat banana afskfsd")
6
8
 
7
- headings = ["text", "has_vector", "vector_norm", "is_oov"]
9
+ headings = %w[text has_vector vector_norm is_oov]
8
10
  rows = []
9
11
 
10
12
  doc.each do |token|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -6,7 +8,7 @@ nlp = Spacy::Language.new("ja_core_news_lg")
6
8
  sentence = "私の父は寿司が好きだ。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "dep", "n_lefts", "n_rights", "ancestors"]
11
+ headings = %w[text dep n_lefts n_rights ancestors]
10
12
  rows = []
11
13
 
12
14
  root = doc.tokens.select do |t|
@@ -14,13 +16,13 @@ root = doc.tokens.select do |t|
14
16
  t.i == t.head.i
15
17
  end.first
16
18
 
17
- puts "The sentence: " + sentence
19
+ puts "The sentence: #{sentence}"
18
20
 
19
21
  # subject = Spacy::Token.new(root.lefts[0])
20
22
  subject = Spacy::Token.new(root.lefts[0])
21
23
 
22
- puts "The root of the sentence is: " + root.text
23
- puts "The subject of the sentence is: " + subject.text
24
+ puts "The root of the sentence is: #{root.text}"
25
+ puts "The subject of the sentence is: #{subject.text}"
24
26
 
25
27
  subject.subtree.each do |descendant|
26
28
  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
5
7
 
6
- sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
8
+ sentence = "同志社大学は日本の京都にある私立大学で、新島襄という人物が創立しました。"
7
9
  doc = nlp.read(sentence)
8
10
 
9
- headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
11
+ headings = %w[text ent_iob ent_iob_ ent_type_]
10
12
  rows = []
11
13
 
12
14
  doc.each do |ent|
@@ -1,4 +1,6 @@
1
- require( "ruby-spacy")
1
+ # frozen_string_literal: true
2
+
3
+ require("ruby-spacy")
2
4
  require "terminal-table"
3
5
 
4
6
  nlp = Spacy::Language.new("ja_core_news_lg")
@@ -7,16 +9,14 @@ nlp.add_pipe("merge_entities")
7
9
  nlp.add_pipe("merge_noun_chunks")
8
10
 
9
11
  texts = [
10
- "アメリカ合衆国の国土面積は日本の約25倍あります。",
11
- "現在1ドルは日本円で110円です。",
12
+ "アメリカ合衆国の国土面積は日本の約25倍あります。",
13
+ "現在1ドルは日本円で110円です。"
12
14
  ]
13
15
 
14
16
  texts.each do |text|
15
17
  doc = nlp.read(text)
16
18
  doc.each do |token|
17
- if token.dep_ == "case"
18
- puts token.head.text + " --> " + token.text
19
- end
19
+ puts "#{token.head.text} --> #{token.text}" if token.dep_ == "case"
20
20
  end
21
21
  end
22
22
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "ruby-spacy"
2
4
  require "terminal-table"
3
5
 
@@ -5,7 +7,7 @@ nlp = Spacy::Language.new("ja_core_news_sm")
5
7
 
6
8
  doc = nlp.read("私は論文を読んでいるところだった。")
7
9
 
8
- headings = ["text", "lemma"]
10
+ headings = %w[text lemma]
9
11
  rows = []
10
12
 
11
13
  doc.each do |token|