text_rank 1.2.0 → 1.2.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +1 -6
  3. data/.gitignore +4 -0
  4. data/.rubocop.yml +60 -1075
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +14 -5
  7. data/{LICENSE.txt → LICENSE} +0 -0
  8. data/README.md +2 -1
  9. data/Rakefile +5 -0
  10. data/bin/console +3 -3
  11. data/ext/text_rank/extconf.rb +3 -0
  12. data/ext/text_rank/page_rank_sparse_native.c +296 -0
  13. data/ext/text_rank/page_rank_sparse_native.h +93 -0
  14. data/ext/text_rank/text_rank.c +5 -0
  15. data/lib/page_rank.rb +7 -4
  16. data/lib/page_rank/base.rb +12 -9
  17. data/lib/page_rank/dense.rb +3 -2
  18. data/lib/page_rank/sparse.rb +6 -7
  19. data/lib/page_rank/sparse_native.rb +21 -0
  20. data/lib/text_rank.rb +14 -9
  21. data/lib/text_rank/char_filter.rb +1 -1
  22. data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
  23. data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
  24. data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
  25. data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
  26. data/lib/text_rank/fingerprint.rb +10 -18
  27. data/lib/text_rank/fingerprint_overlap.rb +55 -0
  28. data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
  29. data/lib/text_rank/keyword_extractor.rb +32 -25
  30. data/lib/text_rank/rank_filter/collapse_adjacent.rb +53 -26
  31. data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
  32. data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
  33. data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
  34. data/lib/text_rank/token_filter/stopwords.rb +1 -321
  35. data/lib/text_rank/token_filter/stopwords.yml +317 -0
  36. data/lib/text_rank/tokenizer.rb +1 -1
  37. data/lib/text_rank/tokenizer/money.rb +11 -6
  38. data/lib/text_rank/tokenizer/number.rb +4 -3
  39. data/lib/text_rank/tokenizer/punctuation.rb +4 -1
  40. data/lib/text_rank/tokenizer/url.rb +3 -0
  41. data/lib/text_rank/tokenizer/whitespace.rb +4 -1
  42. data/lib/text_rank/tokenizer/word.rb +5 -2
  43. data/lib/text_rank/version.rb +3 -1
  44. data/text_rank.gemspec +12 -10
  45. metadata +69 -33
@@ -1,14 +1,17 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves a non-space, non-punctuation "word". It does
5
6
  # allow hyphens and numerals, but the first character must be an A-Z character.
6
7
  ##
7
- Word = %r{
8
+ # rubocop:disable Naming/ConstantName
9
+ Word = /
8
10
  (
9
11
  [a-z][a-z0-9-]*
10
12
  )
11
- }xi
13
+ /xi
14
+ # rubocop:enable Naming/ConstantName
12
15
 
13
16
  end
14
17
  end
@@ -1,4 +1,6 @@
1
1
  module TextRank
2
+
2
3
  # Current gem version
3
- VERSION = '1.2.0'
4
+ VERSION = '1.2.9'
5
+
4
6
  end
data/text_rank.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # coding: utf-8
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'text_rank/version'
@@ -9,22 +8,25 @@ Gem::Specification.new do |spec|
9
8
  spec.authors = ['David McCullars']
10
9
  spec.email = ['david.mccullars@gmail.com']
11
10
 
12
- spec.summary = %q{Implementation of TextRank solution to ranked keyword extraction}
13
- spec.description = %q{Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf}
11
+ spec.summary = 'Implementation of TextRank solution to ranked keyword extraction'
12
+ spec.description = 'Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf'
14
13
  spec.homepage = 'https://github.com/david-mccullars/text_rank'
15
14
  spec.license = 'MIT'
16
15
 
17
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
17
  spec.bindir = 'exe'
19
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.extensions = ['ext/text_rank/extconf.rb']
20
20
  spec.require_paths = ['lib']
21
21
 
22
- spec.add_development_dependency 'bundler', '~> 1.11'
23
- spec.add_development_dependency 'rake', '~> 10.0'
24
- spec.add_development_dependency 'rspec', '~> 3.0'
25
- spec.add_development_dependency 'simplecov', '~> 0.11'
26
- spec.add_development_dependency 'codeclimate-test-reporter'
22
+ spec.add_development_dependency 'bundler'
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'rake-compiler'
25
+ spec.add_development_dependency 'rspec'
26
+ spec.add_development_dependency 'rubocop'
27
+ spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
28
+ spec.add_development_dependency 'yard'
27
29
 
28
- spec.add_development_dependency 'engtagger', '~> 0.2.0' # Optional runtime dependency but needed for specs
29
- spec.add_development_dependency 'nokogiri', '~> 1.0' # Optional runtime dependency but needed for specs
30
+ spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
31
+ spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
30
32
  end
metadata CHANGED
@@ -1,73 +1,101 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - David McCullars
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-19 00:00:00.000000000 Z
11
+ date: 2021-02-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
39
53
  - !ruby/object:Gem::Version
40
- version: '10.0'
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - "~>"
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
46
74
  - !ruby/object:Gem::Version
47
- version: '3.0'
75
+ version: '0'
48
76
  type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
- - - "~>"
80
+ - - ">="
53
81
  - !ruby/object:Gem::Version
54
- version: '3.0'
82
+ version: '0'
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: simplecov
57
85
  requirement: !ruby/object:Gem::Requirement
58
86
  requirements:
59
87
  - - "~>"
60
88
  - !ruby/object:Gem::Version
61
- version: '0.11'
89
+ version: 0.17.0
62
90
  type: :development
63
91
  prerelease: false
64
92
  version_requirements: !ruby/object:Gem::Requirement
65
93
  requirements:
66
94
  - - "~>"
67
95
  - !ruby/object:Gem::Version
68
- version: '0.11'
96
+ version: 0.17.0
69
97
  - !ruby/object:Gem::Dependency
70
- name: codeclimate-test-reporter
98
+ name: yard
71
99
  requirement: !ruby/object:Gem::Requirement
72
100
  requirements:
73
101
  - - ">="
@@ -84,36 +112,37 @@ dependencies:
84
112
  name: engtagger
85
113
  requirement: !ruby/object:Gem::Requirement
86
114
  requirements:
87
- - - "~>"
115
+ - - ">="
88
116
  - !ruby/object:Gem::Version
89
- version: 0.2.0
117
+ version: '0'
90
118
  type: :development
91
119
  prerelease: false
92
120
  version_requirements: !ruby/object:Gem::Requirement
93
121
  requirements:
94
- - - "~>"
122
+ - - ">="
95
123
  - !ruby/object:Gem::Version
96
- version: 0.2.0
124
+ version: '0'
97
125
  - !ruby/object:Gem::Dependency
98
126
  name: nokogiri
99
127
  requirement: !ruby/object:Gem::Requirement
100
128
  requirements:
101
- - - "~>"
129
+ - - ">="
102
130
  - !ruby/object:Gem::Version
103
- version: '1.0'
131
+ version: '0'
104
132
  type: :development
105
133
  prerelease: false
106
134
  version_requirements: !ruby/object:Gem::Requirement
107
135
  requirements:
108
- - - "~>"
136
+ - - ">="
109
137
  - !ruby/object:Gem::Version
110
- version: '1.0'
138
+ version: '0'
111
139
  description: Implementation of TextRank solution to ranked keyword extraction. See
112
140
  https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
113
141
  email:
114
142
  - david.mccullars@gmail.com
115
143
  executables: []
116
- extensions: []
144
+ extensions:
145
+ - ext/text_rank/extconf.rb
117
146
  extra_rdoc_files: []
118
147
  files:
119
148
  - ".codeclimate.yml"
@@ -124,15 +153,20 @@ files:
124
153
  - ".travis.yml"
125
154
  - CODE_OF_CONDUCT.md
126
155
  - Gemfile
127
- - LICENSE.txt
156
+ - LICENSE
128
157
  - README.md
129
158
  - Rakefile
130
159
  - bin/console
131
160
  - bin/setup
161
+ - ext/text_rank/extconf.rb
162
+ - ext/text_rank/page_rank_sparse_native.c
163
+ - ext/text_rank/page_rank_sparse_native.h
164
+ - ext/text_rank/text_rank.c
132
165
  - lib/page_rank.rb
133
166
  - lib/page_rank/base.rb
134
167
  - lib/page_rank/dense.rb
135
168
  - lib/page_rank/sparse.rb
169
+ - lib/page_rank/sparse_native.rb
136
170
  - lib/text_rank.rb
137
171
  - lib/text_rank/char_filter.rb
138
172
  - lib/text_rank/char_filter/ascii_folding.rb
@@ -141,7 +175,9 @@ files:
141
175
  - lib/text_rank/char_filter/strip_html.rb
142
176
  - lib/text_rank/char_filter/strip_possessive.rb
143
177
  - lib/text_rank/char_filter/undo_contractions.rb
178
+ - lib/text_rank/char_filter/undo_contractions.yml
144
179
  - lib/text_rank/fingerprint.rb
180
+ - lib/text_rank/fingerprint_overlap.rb
145
181
  - lib/text_rank/graph_strategy.rb
146
182
  - lib/text_rank/graph_strategy/coocurrence.rb
147
183
  - lib/text_rank/keyword_extractor.rb
@@ -154,6 +190,7 @@ files:
154
190
  - lib/text_rank/token_filter/min_length.rb
155
191
  - lib/text_rank/token_filter/part_of_speech.rb
156
192
  - lib/text_rank/token_filter/stopwords.rb
193
+ - lib/text_rank/token_filter/stopwords.yml
157
194
  - lib/text_rank/tokenizer.rb
158
195
  - lib/text_rank/tokenizer/money.rb
159
196
  - lib/text_rank/tokenizer/number.rb
@@ -167,7 +204,7 @@ homepage: https://github.com/david-mccullars/text_rank
167
204
  licenses:
168
205
  - MIT
169
206
  metadata: {}
170
- post_install_message:
207
+ post_install_message:
171
208
  rdoc_options: []
172
209
  require_paths:
173
210
  - lib
@@ -182,10 +219,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
219
  - !ruby/object:Gem::Version
183
220
  version: '0'
184
221
  requirements: []
185
- rubyforge_project:
186
- rubygems_version: 2.6.7
187
- signing_key:
222
+ rubyforge_project:
223
+ rubygems_version: 2.7.6
224
+ signing_key:
188
225
  specification_version: 4
189
226
  summary: Implementation of TextRank solution to ranked keyword extraction
190
227
  test_files: []
191
- has_rdoc: