text_rank 1.1.7 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +1 -6
  3. data/.rubocop.yml +60 -1075
  4. data/.ruby-version +1 -1
  5. data/.travis.yml +13 -5
  6. data/{LICENSE.txt → LICENSE} +0 -0
  7. data/README.md +2 -1
  8. data/bin/console +3 -3
  9. data/lib/page_rank.rb +2 -0
  10. data/lib/page_rank/base.rb +9 -8
  11. data/lib/page_rank/dense.rb +2 -1
  12. data/lib/page_rank/sparse.rb +6 -7
  13. data/lib/text_rank.rb +12 -9
  14. data/lib/text_rank/char_filter.rb +1 -1
  15. data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
  16. data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
  17. data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
  18. data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
  19. data/lib/text_rank/fingerprint.rb +20 -28
  20. data/lib/text_rank/fingerprint_overlap.rb +55 -0
  21. data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
  22. data/lib/text_rank/keyword_extractor.rb +32 -25
  23. data/lib/text_rank/rank_filter/collapse_adjacent.rb +53 -25
  24. data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
  25. data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
  26. data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
  27. data/lib/text_rank/token_filter/stopwords.rb +1 -321
  28. data/lib/text_rank/token_filter/stopwords.yml +317 -0
  29. data/lib/text_rank/tokenizer.rb +1 -1
  30. data/lib/text_rank/tokenizer/money.rb +11 -6
  31. data/lib/text_rank/tokenizer/number.rb +4 -3
  32. data/lib/text_rank/tokenizer/punctuation.rb +4 -1
  33. data/lib/text_rank/tokenizer/url.rb +3 -0
  34. data/lib/text_rank/tokenizer/whitespace.rb +4 -1
  35. data/lib/text_rank/tokenizer/word.rb +5 -2
  36. data/lib/text_rank/version.rb +3 -1
  37. data/text_rank.gemspec +10 -10
  38. metadata +48 -32
@@ -31,7 +31,7 @@ module TextRank
31
31
  tokens = []
32
32
  text.scan(Regexp.new(regular_expressions.flatten.join('|'))) do |matches|
33
33
  m = matches.compact.first
34
- tokens << m if m && m.size > 0
34
+ tokens << m if m&.size&.positive?
35
35
  end
36
36
  tokens
37
37
  end
@@ -1,4 +1,3 @@
1
- #encoding: UTF-8
2
1
  module TextRank
3
2
  module Tokenizer
4
3
 
@@ -12,7 +11,7 @@ module TextRank
12
11
  "\u20a4", # Lira Symbol
13
12
  "\u20a7", # Peseta Sign
14
13
  "\u20ac", # Euro Symbol
15
- "\u20B9", # Rupee
14
+ "\u20B9", # Rupee
16
15
  "\u20a9", # Won Sign
17
16
  "\u20b4", # Hryvnia Sign
18
17
  "\u20af", # Drachma Sign
@@ -34,6 +33,8 @@ module TextRank
34
33
  # A tokenizer regex that preserves money or formatted numbers as a single token. This
35
34
  # currently supports 24 different currency symbols:
36
35
  #
36
+ # rubocop:disable Style/AsciiComments
37
+ #
37
38
  # * ¤
38
39
  # * $
39
40
  # * ¢
@@ -58,19 +59,23 @@ module TextRank
58
59
  # * ₫
59
60
  # * %
60
61
  # * ‰
62
+
63
+ # rubocop:enable Style/AsciiComments
61
64
  #
62
65
  # It also supports two alternative formats for negatives as well as optional three digit comma
63
66
  # separation and optional decimals.
64
67
  ##
65
- Money = %r{
68
+ # rubocop:disable Naming/ConstantName
69
+ Money = /
66
70
  (
67
- #{CURRENCY_SYMBOLS} \-? #{Number} # $-45,231.21
71
+ #{CURRENCY_SYMBOLS} -? #{Number} # $-45,231.21
68
72
  |
69
- \-? #{CURRENCY_SYMBOLS} #{Number} # -$45,231.21
73
+ -? #{CURRENCY_SYMBOLS} #{Number} # -$45,231.21
70
74
  |
71
75
  \( #{CURRENCY_SYMBOLS} #{Number} \) # ($45,231.21)
72
76
  )
73
- }x
77
+ /x
78
+ # rubocop:enable Naming/ConstantName
74
79
 
75
80
  end
76
81
  end
@@ -1,11 +1,11 @@
1
- #encoding: UTF-8
2
1
  module TextRank
3
2
  module Tokenizer
4
3
 
5
4
  ##
6
5
  # A tokenizer regex that preserves (optionally formatted) numbers as a single token.
7
6
  ##
8
- Number = %r{
7
+ # rubocop:disable Naming/ConstantName
8
+ Number = /
9
9
  (
10
10
  [1-9]\d{3,} # 453231162
11
11
  (?:\.\d+)? # 453231162.17
@@ -25,7 +25,8 @@ module TextRank
25
25
 
26
26
  (?:\.\d+) # .17
27
27
  )
28
- }x
28
+ /x
29
+ # rubocop:enable Naming/ConstantName
29
30
 
30
31
  end
31
32
  end
@@ -1,11 +1,14 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves single punctuation symbols as a token. Use
5
6
  # this if one or more of your TokenFilter classes need punctuation in order to
6
7
  # make decisions.
7
8
  ##
8
- Punctuation = %r{([\p{Punct}])}
9
+ # rubocop:disable Naming/ConstantName
10
+ Punctuation = /(\p{Punct})/
11
+ # rubocop:enable Naming/ConstantName
9
12
 
10
13
  end
11
14
  end
@@ -1,8 +1,10 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves entire URL's as a token (rather than split them up)
5
6
  ##
7
+ # rubocop:disable Naming/ConstantName
6
8
  Url = %r{
7
9
  (
8
10
  (?:[\w-]+://?|www[.])
@@ -16,6 +18,7 @@ module TextRank
16
18
  )
17
19
  )
18
20
  }xi
21
+ # rubocop:enable Naming/ConstantName
19
22
 
20
23
  end
21
24
  end
@@ -1,11 +1,14 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves single whitespace characters as a token. Use
5
6
  # this if one or more of your TokenFilter classes need whitespace in order to
6
7
  # make decisions.
7
8
  ##
8
- Whitespace = %r{\s}
9
+ # rubocop:disable Naming/ConstantName
10
+ Whitespace = /\s/
11
+ # rubocop:enable Naming/ConstantName
9
12
 
10
13
  end
11
14
  end
@@ -1,14 +1,17 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves a non-space, non-punctuation "word". It does
5
6
  # allow hyphens and numerals, but the first character must be an A-Z character.
6
7
  ##
7
- Word = %r{
8
+ # rubocop:disable Naming/ConstantName
9
+ Word = /
8
10
  (
9
11
  [a-z][a-z0-9-]*
10
12
  )
11
- }xi
13
+ /xi
14
+ # rubocop:enable Naming/ConstantName
12
15
 
13
16
  end
14
17
  end
@@ -1,4 +1,6 @@
1
1
  module TextRank
2
+
2
3
  # Current gem version
3
- VERSION = '1.1.7'
4
+ VERSION = '1.2.5'
5
+
4
6
  end
@@ -1,4 +1,3 @@
1
- # coding: utf-8
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'text_rank/version'
@@ -9,8 +8,8 @@ Gem::Specification.new do |spec|
9
8
  spec.authors = ['David McCullars']
10
9
  spec.email = ['david.mccullars@gmail.com']
11
10
 
12
- spec.summary = %q{Implementation of TextRank solution to ranked keyword extraction}
13
- spec.description = %q{Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf}
11
+ spec.summary = 'Implementation of TextRank solution to ranked keyword extraction'
12
+ spec.description = 'Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf'
14
13
  spec.homepage = 'https://github.com/david-mccullars/text_rank'
15
14
  spec.license = 'MIT'
16
15
 
@@ -19,12 +18,13 @@ Gem::Specification.new do |spec|
19
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
19
  spec.require_paths = ['lib']
21
20
 
22
- spec.add_development_dependency 'bundler', '~> 1.11'
23
- spec.add_development_dependency 'rake', '~> 10.0'
24
- spec.add_development_dependency 'rspec', '~> 3.0'
25
- spec.add_development_dependency 'simplecov', '~> 0.11'
26
- spec.add_development_dependency 'codeclimate-test-reporter'
21
+ spec.add_development_dependency 'bundler'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'rspec'
24
+ spec.add_development_dependency 'rubocop'
25
+ spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
26
+ spec.add_development_dependency 'yard'
27
27
 
28
- spec.add_development_dependency 'engtagger', '~> 0.2.0' # Optional runtime dependency but needed for specs
29
- spec.add_development_dependency 'nokogiri', '~> 1.0' # Optional runtime dependency but needed for specs
28
+ spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
29
+ spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
30
30
  end
metadata CHANGED
@@ -1,73 +1,87 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - David McCullars
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-05 00:00:00.000000000 Z
11
+ date: 2021-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: simplecov
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '0.11'
75
+ version: 0.17.0
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '0.11'
82
+ version: 0.17.0
69
83
  - !ruby/object:Gem::Dependency
70
- name: codeclimate-test-reporter
84
+ name: yard
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
87
  - - ">="
@@ -84,30 +98,30 @@ dependencies:
84
98
  name: engtagger
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - "~>"
101
+ - - ">="
88
102
  - !ruby/object:Gem::Version
89
- version: 0.2.0
103
+ version: '0'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - "~>"
108
+ - - ">="
95
109
  - !ruby/object:Gem::Version
96
- version: 0.2.0
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: nokogiri
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - "~>"
115
+ - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: '1.0'
117
+ version: '0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
- - - "~>"
122
+ - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: '1.0'
124
+ version: '0'
111
125
  description: Implementation of TextRank solution to ranked keyword extraction. See
112
126
  https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
113
127
  email:
@@ -124,7 +138,7 @@ files:
124
138
  - ".travis.yml"
125
139
  - CODE_OF_CONDUCT.md
126
140
  - Gemfile
127
- - LICENSE.txt
141
+ - LICENSE
128
142
  - README.md
129
143
  - Rakefile
130
144
  - bin/console
@@ -141,7 +155,9 @@ files:
141
155
  - lib/text_rank/char_filter/strip_html.rb
142
156
  - lib/text_rank/char_filter/strip_possessive.rb
143
157
  - lib/text_rank/char_filter/undo_contractions.rb
158
+ - lib/text_rank/char_filter/undo_contractions.yml
144
159
  - lib/text_rank/fingerprint.rb
160
+ - lib/text_rank/fingerprint_overlap.rb
145
161
  - lib/text_rank/graph_strategy.rb
146
162
  - lib/text_rank/graph_strategy/coocurrence.rb
147
163
  - lib/text_rank/keyword_extractor.rb
@@ -154,6 +170,7 @@ files:
154
170
  - lib/text_rank/token_filter/min_length.rb
155
171
  - lib/text_rank/token_filter/part_of_speech.rb
156
172
  - lib/text_rank/token_filter/stopwords.rb
173
+ - lib/text_rank/token_filter/stopwords.yml
157
174
  - lib/text_rank/tokenizer.rb
158
175
  - lib/text_rank/tokenizer/money.rb
159
176
  - lib/text_rank/tokenizer/number.rb
@@ -167,7 +184,7 @@ homepage: https://github.com/david-mccullars/text_rank
167
184
  licenses:
168
185
  - MIT
169
186
  metadata: {}
170
- post_install_message:
187
+ post_install_message:
171
188
  rdoc_options: []
172
189
  require_paths:
173
190
  - lib
@@ -182,10 +199,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
199
  - !ruby/object:Gem::Version
183
200
  version: '0'
184
201
  requirements: []
185
- rubyforge_project:
186
- rubygems_version: 2.5.1
187
- signing_key:
202
+ rubyforge_project:
203
+ rubygems_version: 2.7.6
204
+ signing_key:
188
205
  specification_version: 4
189
206
  summary: Implementation of TextRank solution to ranked keyword extraction
190
207
  test_files: []
191
- has_rdoc: