text_rank 1.2.0 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +1 -6
  3. data/.gitignore +4 -0
  4. data/.rubocop.yml +60 -1075
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +14 -5
  7. data/{LICENSE.txt → LICENSE} +0 -0
  8. data/README.md +2 -1
  9. data/Rakefile +5 -0
  10. data/bin/console +3 -3
  11. data/ext/text_rank/extconf.rb +3 -0
  12. data/ext/text_rank/page_rank_sparse_native.c +296 -0
  13. data/ext/text_rank/page_rank_sparse_native.h +93 -0
  14. data/ext/text_rank/text_rank.c +5 -0
  15. data/lib/page_rank.rb +7 -4
  16. data/lib/page_rank/base.rb +12 -9
  17. data/lib/page_rank/dense.rb +3 -2
  18. data/lib/page_rank/sparse.rb +6 -7
  19. data/lib/page_rank/sparse_native.rb +21 -0
  20. data/lib/text_rank.rb +14 -9
  21. data/lib/text_rank/char_filter.rb +1 -1
  22. data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
  23. data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
  24. data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
  25. data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
  26. data/lib/text_rank/fingerprint.rb +10 -18
  27. data/lib/text_rank/fingerprint_overlap.rb +55 -0
  28. data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
  29. data/lib/text_rank/keyword_extractor.rb +32 -25
  30. data/lib/text_rank/rank_filter/collapse_adjacent.rb +53 -26
  31. data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
  32. data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
  33. data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
  34. data/lib/text_rank/token_filter/stopwords.rb +1 -321
  35. data/lib/text_rank/token_filter/stopwords.yml +317 -0
  36. data/lib/text_rank/tokenizer.rb +1 -1
  37. data/lib/text_rank/tokenizer/money.rb +11 -6
  38. data/lib/text_rank/tokenizer/number.rb +4 -3
  39. data/lib/text_rank/tokenizer/punctuation.rb +4 -1
  40. data/lib/text_rank/tokenizer/url.rb +3 -0
  41. data/lib/text_rank/tokenizer/whitespace.rb +4 -1
  42. data/lib/text_rank/tokenizer/word.rb +5 -2
  43. data/lib/text_rank/version.rb +3 -1
  44. data/text_rank.gemspec +12 -10
  45. metadata +69 -33
@@ -1,14 +1,17 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves a non-space, non-punctuation "word". It does
5
6
  # allow hyphens and numerals, but the first character must be an A-Z character.
6
7
  ##
7
- Word = %r{
8
+ # rubocop:disable Naming/ConstantName
9
+ Word = /
8
10
  (
9
11
  [a-z][a-z0-9-]*
10
12
  )
11
- }xi
13
+ /xi
14
+ # rubocop:enable Naming/ConstantName
12
15
 
13
16
  end
14
17
  end
@@ -1,4 +1,6 @@
1
1
  module TextRank
2
+
2
3
  # Current gem version
3
- VERSION = '1.2.0'
4
+ VERSION = '1.2.9'
5
+
4
6
  end
data/text_rank.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # coding: utf-8
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'text_rank/version'
@@ -9,22 +8,25 @@ Gem::Specification.new do |spec|
9
8
  spec.authors = ['David McCullars']
10
9
  spec.email = ['david.mccullars@gmail.com']
11
10
 
12
- spec.summary = %q{Implementation of TextRank solution to ranked keyword extraction}
13
- spec.description = %q{Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf}
11
+ spec.summary = 'Implementation of TextRank solution to ranked keyword extraction'
12
+ spec.description = 'Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf'
14
13
  spec.homepage = 'https://github.com/david-mccullars/text_rank'
15
14
  spec.license = 'MIT'
16
15
 
17
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
17
  spec.bindir = 'exe'
19
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.extensions = ['ext/text_rank/extconf.rb']
20
20
  spec.require_paths = ['lib']
21
21
 
22
- spec.add_development_dependency 'bundler', '~> 1.11'
23
- spec.add_development_dependency 'rake', '~> 10.0'
24
- spec.add_development_dependency 'rspec', '~> 3.0'
25
- spec.add_development_dependency 'simplecov', '~> 0.11'
26
- spec.add_development_dependency 'codeclimate-test-reporter'
22
+ spec.add_development_dependency 'bundler'
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'rake-compiler'
25
+ spec.add_development_dependency 'rspec'
26
+ spec.add_development_dependency 'rubocop'
27
+ spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
28
+ spec.add_development_dependency 'yard'
27
29
 
28
- spec.add_development_dependency 'engtagger', '~> 0.2.0' # Optional runtime dependency but needed for specs
29
- spec.add_development_dependency 'nokogiri', '~> 1.0' # Optional runtime dependency but needed for specs
30
+ spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
31
+ spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
30
32
  end
metadata CHANGED
@@ -1,73 +1,101 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - David McCullars
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-19 00:00:00.000000000 Z
11
+ date: 2021-02-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
39
53
  - !ruby/object:Gem::Version
40
- version: '10.0'
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - "~>"
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
46
74
  - !ruby/object:Gem::Version
47
- version: '3.0'
75
+ version: '0'
48
76
  type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
- - - "~>"
80
+ - - ">="
53
81
  - !ruby/object:Gem::Version
54
- version: '3.0'
82
+ version: '0'
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: simplecov
57
85
  requirement: !ruby/object:Gem::Requirement
58
86
  requirements:
59
87
  - - "~>"
60
88
  - !ruby/object:Gem::Version
61
- version: '0.11'
89
+ version: 0.17.0
62
90
  type: :development
63
91
  prerelease: false
64
92
  version_requirements: !ruby/object:Gem::Requirement
65
93
  requirements:
66
94
  - - "~>"
67
95
  - !ruby/object:Gem::Version
68
- version: '0.11'
96
+ version: 0.17.0
69
97
  - !ruby/object:Gem::Dependency
70
- name: codeclimate-test-reporter
98
+ name: yard
71
99
  requirement: !ruby/object:Gem::Requirement
72
100
  requirements:
73
101
  - - ">="
@@ -84,36 +112,37 @@ dependencies:
84
112
  name: engtagger
85
113
  requirement: !ruby/object:Gem::Requirement
86
114
  requirements:
87
- - - "~>"
115
+ - - ">="
88
116
  - !ruby/object:Gem::Version
89
- version: 0.2.0
117
+ version: '0'
90
118
  type: :development
91
119
  prerelease: false
92
120
  version_requirements: !ruby/object:Gem::Requirement
93
121
  requirements:
94
- - - "~>"
122
+ - - ">="
95
123
  - !ruby/object:Gem::Version
96
- version: 0.2.0
124
+ version: '0'
97
125
  - !ruby/object:Gem::Dependency
98
126
  name: nokogiri
99
127
  requirement: !ruby/object:Gem::Requirement
100
128
  requirements:
101
- - - "~>"
129
+ - - ">="
102
130
  - !ruby/object:Gem::Version
103
- version: '1.0'
131
+ version: '0'
104
132
  type: :development
105
133
  prerelease: false
106
134
  version_requirements: !ruby/object:Gem::Requirement
107
135
  requirements:
108
- - - "~>"
136
+ - - ">="
109
137
  - !ruby/object:Gem::Version
110
- version: '1.0'
138
+ version: '0'
111
139
  description: Implementation of TextRank solution to ranked keyword extraction. See
112
140
  https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
113
141
  email:
114
142
  - david.mccullars@gmail.com
115
143
  executables: []
116
- extensions: []
144
+ extensions:
145
+ - ext/text_rank/extconf.rb
117
146
  extra_rdoc_files: []
118
147
  files:
119
148
  - ".codeclimate.yml"
@@ -124,15 +153,20 @@ files:
124
153
  - ".travis.yml"
125
154
  - CODE_OF_CONDUCT.md
126
155
  - Gemfile
127
- - LICENSE.txt
156
+ - LICENSE
128
157
  - README.md
129
158
  - Rakefile
130
159
  - bin/console
131
160
  - bin/setup
161
+ - ext/text_rank/extconf.rb
162
+ - ext/text_rank/page_rank_sparse_native.c
163
+ - ext/text_rank/page_rank_sparse_native.h
164
+ - ext/text_rank/text_rank.c
132
165
  - lib/page_rank.rb
133
166
  - lib/page_rank/base.rb
134
167
  - lib/page_rank/dense.rb
135
168
  - lib/page_rank/sparse.rb
169
+ - lib/page_rank/sparse_native.rb
136
170
  - lib/text_rank.rb
137
171
  - lib/text_rank/char_filter.rb
138
172
  - lib/text_rank/char_filter/ascii_folding.rb
@@ -141,7 +175,9 @@ files:
141
175
  - lib/text_rank/char_filter/strip_html.rb
142
176
  - lib/text_rank/char_filter/strip_possessive.rb
143
177
  - lib/text_rank/char_filter/undo_contractions.rb
178
+ - lib/text_rank/char_filter/undo_contractions.yml
144
179
  - lib/text_rank/fingerprint.rb
180
+ - lib/text_rank/fingerprint_overlap.rb
145
181
  - lib/text_rank/graph_strategy.rb
146
182
  - lib/text_rank/graph_strategy/coocurrence.rb
147
183
  - lib/text_rank/keyword_extractor.rb
@@ -154,6 +190,7 @@ files:
154
190
  - lib/text_rank/token_filter/min_length.rb
155
191
  - lib/text_rank/token_filter/part_of_speech.rb
156
192
  - lib/text_rank/token_filter/stopwords.rb
193
+ - lib/text_rank/token_filter/stopwords.yml
157
194
  - lib/text_rank/tokenizer.rb
158
195
  - lib/text_rank/tokenizer/money.rb
159
196
  - lib/text_rank/tokenizer/number.rb
@@ -167,7 +204,7 @@ homepage: https://github.com/david-mccullars/text_rank
167
204
  licenses:
168
205
  - MIT
169
206
  metadata: {}
170
- post_install_message:
207
+ post_install_message:
171
208
  rdoc_options: []
172
209
  require_paths:
173
210
  - lib
@@ -182,10 +219,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
219
  - !ruby/object:Gem::Version
183
220
  version: '0'
184
221
  requirements: []
185
- rubyforge_project:
186
- rubygems_version: 2.6.7
187
- signing_key:
222
+ rubyforge_project:
223
+ rubygems_version: 2.7.6
224
+ signing_key:
188
225
  specification_version: 4
189
226
  summary: Implementation of TextRank solution to ranked keyword extraction
190
227
  test_files: []
191
- has_rdoc: