text_rank 1.2.0 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.codeclimate.yml +1 -6
- data/.gitignore +4 -0
- data/.rubocop.yml +60 -1075
- data/.ruby-version +1 -1
- data/.travis.yml +14 -5
- data/{LICENSE.txt → LICENSE} +0 -0
- data/README.md +2 -1
- data/Rakefile +5 -0
- data/bin/console +3 -3
- data/ext/text_rank/extconf.rb +3 -0
- data/ext/text_rank/page_rank_sparse_native.c +296 -0
- data/ext/text_rank/page_rank_sparse_native.h +93 -0
- data/ext/text_rank/text_rank.c +5 -0
- data/lib/page_rank.rb +7 -4
- data/lib/page_rank/base.rb +12 -9
- data/lib/page_rank/dense.rb +3 -2
- data/lib/page_rank/sparse.rb +6 -7
- data/lib/page_rank/sparse_native.rb +21 -0
- data/lib/text_rank.rb +14 -9
- data/lib/text_rank/char_filter.rb +1 -1
- data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
- data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
- data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
- data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
- data/lib/text_rank/fingerprint.rb +10 -18
- data/lib/text_rank/fingerprint_overlap.rb +55 -0
- data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
- data/lib/text_rank/keyword_extractor.rb +32 -25
- data/lib/text_rank/rank_filter/collapse_adjacent.rb +53 -26
- data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
- data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
- data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
- data/lib/text_rank/token_filter/stopwords.rb +1 -321
- data/lib/text_rank/token_filter/stopwords.yml +317 -0
- data/lib/text_rank/tokenizer.rb +1 -1
- data/lib/text_rank/tokenizer/money.rb +11 -6
- data/lib/text_rank/tokenizer/number.rb +4 -3
- data/lib/text_rank/tokenizer/punctuation.rb +4 -1
- data/lib/text_rank/tokenizer/url.rb +3 -0
- data/lib/text_rank/tokenizer/whitespace.rb +4 -1
- data/lib/text_rank/tokenizer/word.rb +5 -2
- data/lib/text_rank/version.rb +3 -1
- data/text_rank.gemspec +12 -10
- metadata +69 -33
@@ -1,14 +1,17 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
|
+
|
3
4
|
##
|
4
5
|
# A tokenizer regex that preserves a non-space, non-punctuation "word". It does
|
5
6
|
# allow hyphens and numerals, but the first character must be an A-Z character.
|
6
7
|
##
|
7
|
-
|
8
|
+
# rubocop:disable Naming/ConstantName
|
9
|
+
Word = /
|
8
10
|
(
|
9
11
|
[a-z][a-z0-9-]*
|
10
12
|
)
|
11
|
-
|
13
|
+
/xi
|
14
|
+
# rubocop:enable Naming/ConstantName
|
12
15
|
|
13
16
|
end
|
14
17
|
end
|
data/lib/text_rank/version.rb
CHANGED
data/text_rank.gemspec
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# coding: utf-8
|
2
1
|
lib = File.expand_path('../lib', __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'text_rank/version'
|
@@ -9,22 +8,25 @@ Gem::Specification.new do |spec|
|
|
9
8
|
spec.authors = ['David McCullars']
|
10
9
|
spec.email = ['david.mccullars@gmail.com']
|
11
10
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
11
|
+
spec.summary = 'Implementation of TextRank solution to ranked keyword extraction'
|
12
|
+
spec.description = 'Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf'
|
14
13
|
spec.homepage = 'https://github.com/david-mccullars/text_rank'
|
15
14
|
spec.license = 'MIT'
|
16
15
|
|
17
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
17
|
spec.bindir = 'exe'
|
19
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.extensions = ['ext/text_rank/extconf.rb']
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.add_development_dependency 'bundler'
|
23
|
-
spec.add_development_dependency 'rake'
|
24
|
-
spec.add_development_dependency '
|
25
|
-
spec.add_development_dependency '
|
26
|
-
spec.add_development_dependency '
|
22
|
+
spec.add_development_dependency 'bundler'
|
23
|
+
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'rake-compiler'
|
25
|
+
spec.add_development_dependency 'rspec'
|
26
|
+
spec.add_development_dependency 'rubocop'
|
27
|
+
spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
|
28
|
+
spec.add_development_dependency 'yard'
|
27
29
|
|
28
|
-
spec.add_development_dependency 'engtagger'
|
29
|
-
spec.add_development_dependency 'nokogiri'
|
30
|
+
spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
|
31
|
+
spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
|
30
32
|
end
|
metadata
CHANGED
@@ -1,73 +1,101 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David McCullars
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rspec
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - "
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubocop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
46
74
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
75
|
+
version: '0'
|
48
76
|
type: :development
|
49
77
|
prerelease: false
|
50
78
|
version_requirements: !ruby/object:Gem::Requirement
|
51
79
|
requirements:
|
52
|
-
- - "
|
80
|
+
- - ">="
|
53
81
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: simplecov
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
58
86
|
requirements:
|
59
87
|
- - "~>"
|
60
88
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
89
|
+
version: 0.17.0
|
62
90
|
type: :development
|
63
91
|
prerelease: false
|
64
92
|
version_requirements: !ruby/object:Gem::Requirement
|
65
93
|
requirements:
|
66
94
|
- - "~>"
|
67
95
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
96
|
+
version: 0.17.0
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
98
|
+
name: yard
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
72
100
|
requirements:
|
73
101
|
- - ">="
|
@@ -84,36 +112,37 @@ dependencies:
|
|
84
112
|
name: engtagger
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
86
114
|
requirements:
|
87
|
-
- - "
|
115
|
+
- - ">="
|
88
116
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0
|
117
|
+
version: '0'
|
90
118
|
type: :development
|
91
119
|
prerelease: false
|
92
120
|
version_requirements: !ruby/object:Gem::Requirement
|
93
121
|
requirements:
|
94
|
-
- - "
|
122
|
+
- - ">="
|
95
123
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0
|
124
|
+
version: '0'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: nokogiri
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
100
128
|
requirements:
|
101
|
-
- - "
|
129
|
+
- - ">="
|
102
130
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
131
|
+
version: '0'
|
104
132
|
type: :development
|
105
133
|
prerelease: false
|
106
134
|
version_requirements: !ruby/object:Gem::Requirement
|
107
135
|
requirements:
|
108
|
-
- - "
|
136
|
+
- - ">="
|
109
137
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
138
|
+
version: '0'
|
111
139
|
description: Implementation of TextRank solution to ranked keyword extraction. See
|
112
140
|
https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
|
113
141
|
email:
|
114
142
|
- david.mccullars@gmail.com
|
115
143
|
executables: []
|
116
|
-
extensions:
|
144
|
+
extensions:
|
145
|
+
- ext/text_rank/extconf.rb
|
117
146
|
extra_rdoc_files: []
|
118
147
|
files:
|
119
148
|
- ".codeclimate.yml"
|
@@ -124,15 +153,20 @@ files:
|
|
124
153
|
- ".travis.yml"
|
125
154
|
- CODE_OF_CONDUCT.md
|
126
155
|
- Gemfile
|
127
|
-
- LICENSE
|
156
|
+
- LICENSE
|
128
157
|
- README.md
|
129
158
|
- Rakefile
|
130
159
|
- bin/console
|
131
160
|
- bin/setup
|
161
|
+
- ext/text_rank/extconf.rb
|
162
|
+
- ext/text_rank/page_rank_sparse_native.c
|
163
|
+
- ext/text_rank/page_rank_sparse_native.h
|
164
|
+
- ext/text_rank/text_rank.c
|
132
165
|
- lib/page_rank.rb
|
133
166
|
- lib/page_rank/base.rb
|
134
167
|
- lib/page_rank/dense.rb
|
135
168
|
- lib/page_rank/sparse.rb
|
169
|
+
- lib/page_rank/sparse_native.rb
|
136
170
|
- lib/text_rank.rb
|
137
171
|
- lib/text_rank/char_filter.rb
|
138
172
|
- lib/text_rank/char_filter/ascii_folding.rb
|
@@ -141,7 +175,9 @@ files:
|
|
141
175
|
- lib/text_rank/char_filter/strip_html.rb
|
142
176
|
- lib/text_rank/char_filter/strip_possessive.rb
|
143
177
|
- lib/text_rank/char_filter/undo_contractions.rb
|
178
|
+
- lib/text_rank/char_filter/undo_contractions.yml
|
144
179
|
- lib/text_rank/fingerprint.rb
|
180
|
+
- lib/text_rank/fingerprint_overlap.rb
|
145
181
|
- lib/text_rank/graph_strategy.rb
|
146
182
|
- lib/text_rank/graph_strategy/coocurrence.rb
|
147
183
|
- lib/text_rank/keyword_extractor.rb
|
@@ -154,6 +190,7 @@ files:
|
|
154
190
|
- lib/text_rank/token_filter/min_length.rb
|
155
191
|
- lib/text_rank/token_filter/part_of_speech.rb
|
156
192
|
- lib/text_rank/token_filter/stopwords.rb
|
193
|
+
- lib/text_rank/token_filter/stopwords.yml
|
157
194
|
- lib/text_rank/tokenizer.rb
|
158
195
|
- lib/text_rank/tokenizer/money.rb
|
159
196
|
- lib/text_rank/tokenizer/number.rb
|
@@ -167,7 +204,7 @@ homepage: https://github.com/david-mccullars/text_rank
|
|
167
204
|
licenses:
|
168
205
|
- MIT
|
169
206
|
metadata: {}
|
170
|
-
post_install_message:
|
207
|
+
post_install_message:
|
171
208
|
rdoc_options: []
|
172
209
|
require_paths:
|
173
210
|
- lib
|
@@ -182,10 +219,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
182
219
|
- !ruby/object:Gem::Version
|
183
220
|
version: '0'
|
184
221
|
requirements: []
|
185
|
-
rubyforge_project:
|
186
|
-
rubygems_version: 2.6
|
187
|
-
signing_key:
|
222
|
+
rubyforge_project:
|
223
|
+
rubygems_version: 2.7.6
|
224
|
+
signing_key:
|
188
225
|
specification_version: 4
|
189
226
|
summary: Implementation of TextRank solution to ranked keyword extraction
|
190
227
|
test_files: []
|
191
|
-
has_rdoc:
|