engtagger 0.3.2 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6aa6da6cfb58bffd900843f62675d5895e80428be7295ae056ed73327286233d
4
- data.tar.gz: dd412266b905ba4d378521540247a368bc4f73dfa89e8d6e58c220625c46e40d
3
+ metadata.gz: fe357706e69ed72bec9569babe91cc8531e2c1d0eac71ac8d248bdd74b97ba98
4
+ data.tar.gz: 02e6bb2ba29ecabf8e5087c5a2dc92ccad57ef3578fbd9c844f93188a4d39ced
5
5
  SHA512:
6
- metadata.gz: de1aa006ea943270e4dcea78690e8a10551c42819abbf3c27b6d2629d600745124ec5cfa6a6104d3cb4c87dbfc14d09e643e7b2143979dee27485841fd76b0fe
7
- data.tar.gz: 3404a699868beb475daee809cc67788a70152c0d5eba045b7d3c007e3b3fccb66ee6bb432832a8e9872cd6d3faf281fab60bf151c01eaf1cf52d6275644012bb
6
+ metadata.gz: 5e477b0d839e825e8d49135cb6d6c72c21555454d6f722d00b994442cdaaba2b1afa84ab8f22f82f64b9540a0e24914180c59a563830ea11b2a0239921d3e88e
7
+ data.tar.gz: 49b02532d7ad940b25b19ba59df364fc553373371f7850f068729af3a339773417ad7f8d5e0e58ecc3facc6ef2168ae86bb9f94a46f9a412bf51d7de36fdab1e
data/.rubocop.yml ADDED
@@ -0,0 +1,72 @@
1
+ AllCops:
2
+ NewCops: disable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 2.6
5
+
6
+ Documentation:
7
+ Enabled: false
8
+
9
+ Naming/AccessorMethodName:
10
+ Enabled: false
11
+
12
+ Naming/VariableNumber:
13
+ Enabled: false
14
+
15
+ Naming/FileName:
16
+ Enabled: false
17
+
18
+ Security/MarshalLoad:
19
+ Enabled: false
20
+
21
+ Style/ClassVars:
22
+ Enabled: false
23
+
24
+ Style/OptionalBooleanParameter:
25
+ Enabled: false
26
+
27
+ Style/StringConcatenation:
28
+ Enabled: false
29
+
30
+ Style/PerlBackrefs:
31
+ Enabled: false
32
+
33
+ Style/StringLiterals:
34
+ Enabled: true
35
+ EnforcedStyle: double_quotes
36
+
37
+ Style/StringLiteralsInInterpolation:
38
+ Enabled: true
39
+ EnforcedStyle: double_quotes
40
+
41
+ Style/WordArray:
42
+ Enabled: false
43
+
44
+ Style/EvalWithLocation:
45
+ Enabled: false
46
+
47
+ Layout/LineLength:
48
+ Max: 400
49
+
50
+ Metrics/MethodLength:
51
+ Max: 80
52
+
53
+ Metrics/BlockLength:
54
+ Max: 60
55
+
56
+ Metrics/AbcSize:
57
+ Max: 60
58
+
59
+ Metrics/PerceivedComplexity:
60
+ Max: 60
61
+
62
+ Metrics/ClassLength:
63
+ Max: 800
64
+
65
+ Metrics/CyclomaticComplexity:
66
+ Max: 60
67
+
68
+ Metrics/ParameterLists:
69
+ Max: 8
70
+
71
+ Metrics/ModuleLength:
72
+ Max: 200
data/.solargraph.yml ADDED
@@ -0,0 +1,22 @@
1
+ ---
2
+ include:
3
+ - "**/*.rb"
4
+ exclude:
5
+ - spec/**/*
6
+ - test/**/*
7
+ - vendor/**/*
8
+ - ".bundle/**/*"
9
+ require: []
10
+ domains: []
11
+ reporters:
12
+ - rubocop
13
+ # - require_not_found
14
+ formatter:
15
+ rubocop:
16
+ cops: safe
17
+ except: []
18
+ only: []
19
+ extra_args: []
20
+ require_paths: []
21
+ plugins: []
22
+ max_files: 5000
data/Gemfile CHANGED
@@ -1,3 +1,7 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
2
 
3
- gem 'lru_redux'
3
+ source "https://rubygems.org"
4
+
5
+ gemspec
6
+
7
+ gem "lru_redux"
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  English Part-of-Speech Tagger Library; a Ruby port of Lingua::EN::Tagger
4
4
 
5
- ### Description
5
+ ## Description
6
6
 
7
7
  A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained
8
8
  tagger that assigns POS tags to English text based on a lookup dictionary and
@@ -13,64 +13,66 @@ word morphology or can be set to be treated as nouns or other parts of speech.
13
13
  The tagger also extracts as many nouns and noun phrases as it can, using a set
14
14
  of regular expressions.
15
15
 
16
- ### Features
16
+ ## Features
17
17
 
18
18
  * Assigns POS tags to English text
19
19
  * Extract noun phrases from tagged text
20
20
  * etc.
21
21
 
22
- ### Synopsis:
22
+ ## Synopsis
23
23
 
24
- require 'engtagger'
24
+ ```ruby
25
+ require 'engtagger'
25
26
 
26
- # Create a parser object
27
- tgr = EngTagger.new
27
+ # Create a parser object
28
+ tgr = EngTagger.new
28
29
 
29
- # Sample text
30
- text = "Alice chased the big fat cat."
30
+ # Sample text
31
+ text = "Alice chased the big fat cat."
31
32
 
32
- # Add part-of-speech tags to text
33
- tagged = tgr.add_tags(text)
33
+ # Add part-of-speech tags to text
34
+ tagged = tgr.add_tags(text)
34
35
 
35
- #=> "<nnp>Alice</nnp> <vbd>chased</vbd> <det>the</det> <jj>big</jj> <jj>fat</jj><nn>cat</nn> <pp>.</pp>"
36
+ #=> "<nnp>Alice</nnp> <vbd>chased</vbd> <det>the</det> <jj>big</jj> <jj>fat</jj><nn>cat</nn> <pp>.</pp>"
36
37
 
37
- # Get a list of all nouns and noun phrases with occurrence counts
38
- word_list = tgr.get_words(text)
38
+ # Get a list of all nouns and noun phrases with occurrence counts
39
+ word_list = tgr.get_words(text)
39
40
 
40
- #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
41
+ #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
41
42
 
42
- # Get a readable version of the tagged text
43
- readable = tgr.get_readable(text)
43
+ # Get a readable version of the tagged text
44
+ readable = tgr.get_readable(text)
44
45
 
45
- #=> "Alice/NNP chased/VBD the/DET big/JJ fat/JJ cat/NN ./PP"
46
+ #=> "Alice/NNP chased/VBD the/DET big/JJ fat/JJ cat/NN ./PP"
46
47
 
47
- # Get all nouns from a tagged output
48
- nouns = tgr.get_nouns(tagged)
48
+ # Get all nouns from a tagged output
49
+ nouns = tgr.get_nouns(tagged)
49
50
 
50
- #=> {"cat"=>1, "Alice"=>1}
51
+ #=> {"cat"=>1, "Alice"=>1}
51
52
 
52
- # Get all proper nouns
53
- proper = tgr.get_proper_nouns(tagged)
53
+ # Get all proper nouns
54
+ proper = tgr.get_proper_nouns(tagged)
54
55
 
55
- #=> {"Alice"=>1}
56
+ #=> {"Alice"=>1}
56
57
 
57
- # Get all past tense verbs
58
- pt_verbs = tgr.get_past_tense_verbs(tagged)
58
+ # Get all past tense verbs
59
+ pt_verbs = tgr.get_past_tense_verbs(tagged)
59
60
 
60
- #=> {"chased"=>1}
61
+ #=> {"chased"=>1}
61
62
 
62
- # Get all the adjectives
63
- adj = tgr.get_adjectives(tagged)
63
+ # Get all the adjectives
64
+ adj = tgr.get_adjectives(tagged)
64
65
 
65
- #=> {"big"=>1, "fat"=>1}
66
+ #=> {"big"=>1, "fat"=>1}
66
67
 
67
- # Get all noun phrases of any syntactic level
68
- # (same as word_list but take a tagged input)
69
- nps = tgr.get_noun_phrases(tagged)
68
+ # Get all noun phrases of any syntactic level
69
+ # (same as word_list but take a tagged input)
70
+ nps = tgr.get_noun_phrases(tagged)
70
71
 
71
- #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
72
+ #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
73
+ ```
72
74
 
73
- ### Tag Set
75
+ ## Tag Set
74
76
 
75
77
  The set of POS tags used here is a modified version of the Penn Treebank tagset. Tags with non-letter characters have been redefined to work better in our data structures. Also, the "Determiner" tag (DET) has been changed from 'DT', in order to avoid confusion with the HTML tag, `<DT>`.
76
78
 
@@ -120,26 +122,56 @@ The set of POS tags used here is a modified version of the Penn Treebank tagset.
120
122
  LRB Punctuation, left bracket (, {, [
121
123
  RRB Punctuation, right bracket ), }, ]
122
124
 
123
- ### Install
125
+ ## Installation
124
126
 
125
- gem install engtagger
127
+ **Recommended Approach (without sudo):**
126
128
 
127
- ### Author
129
+ It is recommended to install the `engtagger` gem within your user environment without root privileges. This ensures proper file permissions and avoids potential issues. You can achieve this by using Ruby version managers like `rbenv` or `rvm` to manage your Ruby versions and gemsets.
128
130
 
129
- of this Ruby library
131
+ To install without `sudo`, simply run:
130
132
 
131
- * Yoichiro Hasebe (yohasebe [at] gmail.com)
133
+ ```bash
134
+ gem install engtagger
135
+ ```
132
136
 
133
- ### Contributors
137
+ **Alternative Approach (with sudo):**
138
+
139
+ If you must use `sudo` for installation, you'll need to adjust file permissions afterward to ensure accessibility.
140
+
141
+ 1. Install the gem with `sudo`:
142
+
143
+ ```bash
144
+ sudo gem install engtagger
145
+ ```
146
+
147
+ 2. Grant necessary permissions to your user:
148
+
149
+ ```bash
150
+ sudo chown -R $(whoami) /Library/Ruby/Gems/2.6.0/gems/engtagger-0.4.1
151
+ ```
152
+
153
+ **Note:** The path above assumes you are using Ruby version 2.6.0. If you are using a different version, you will need to modify the path accordingly. You can find your Ruby version by running `ruby -v`.
154
+
155
+ ## Troubleshooting
156
+
157
+ **Permission Issues:**
158
+
159
+ If you encounter "cannot load such file" errors after installation, it might be due to incorrect file permissions. Ensure you've followed the instructions for adjusting permissions if you used `sudo` during installation.
160
+
161
+ ## Author
162
+
163
+ Yoichiro Hasebe (yohasebe [at] gmail.com)
164
+
165
+ ## Contributors
134
166
 
135
167
  Many thanks to the collaborators listed in the right column of this GitHub page.
136
168
 
137
- ### Acknowledgement
169
+ ## Acknowledgement
138
170
 
139
171
  This Ruby library is a direct port of Lingua::EN::Tagger available at CPAN.
140
172
  The credit for the crucial part of its algorithm/design therefore goes to
141
173
  Aaron Coburn, the author of the original Perl version.
142
174
 
143
- ### License
175
+ ## License
144
176
 
145
177
  This library is distributed under the GPL. Please see the LICENSE file.
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
- #!/usr/bin/env rake
1
+ # frozen_string_literal: true
2
+
2
3
  require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << "test"
8
+ t.test_files = FileList["test/test*.rb"]
9
+ t.verbose = true
10
+ end
data/engtagger.gemspec CHANGED
@@ -1,19 +1,22 @@
1
- # -*- encoding: utf-8 -*-
2
- require File.expand_path('../lib/engtagger/version', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/engtagger/version"
3
4
 
4
5
  Gem::Specification.new do |gem|
5
6
  gem.authors = ["Yoichiro Hasebe"]
6
7
  gem.email = ["yohasebe@gmail.com"]
7
- gem.summary = %q{A probability based, corpus-trained English POS tagger}
8
- gem.description = %q{A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained tagger that assigns POS tags to English text based on a lookup dictionary and a set of probability values.}
9
- gem.homepage = "http://github.com/yohasebe/engtagger"
10
-
11
- gem.files = `git ls-files`.split($\)
12
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
8
+ gem.summary = "A probability based, corpus-trained English POS tagger"
9
+ gem.description = "A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained tagger that assigns POS tags to English text based on a lookup dictionary and a set of probability values."
10
+ gem.homepage = "http://github.com/yohasebe/engtagger"
11
+ gem.license = "GPL"
12
+ gem.required_ruby_version = Gem::Requirement.new(">= 2.6")
13
+ gem.files = Dir.chdir(File.expand_path(__dir__)) do
14
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
15
+ end
16
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
13
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
18
  gem.name = "engtagger"
15
19
  gem.require_paths = ["lib"]
16
20
  gem.version = EngTagger::VERSION
17
-
18
- gem.add_runtime_dependency 'lru_redux'
21
+ gem.add_dependency "lru_redux"
19
22
  end