text_rank 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +1 -1
- data/.gitignore +4 -0
- data/.rubocop.yml +7 -0
- data/.ruby-version +1 -1
- data/.travis.yml +1 -0
- data/Rakefile +5 -0
- data/bin/console +3 -3
- data/ext/text_rank/extconf.rb +3 -0
- data/ext/text_rank/page_rank_sparse_native.c +300 -0
- data/ext/text_rank/page_rank_sparse_native.h +93 -0
- data/ext/text_rank/text_rank.c +5 -0
- data/lib/page_rank/base.rb +12 -9
- data/lib/page_rank/dense.rb +3 -2
- data/lib/page_rank/sparse.rb +6 -7
- data/lib/page_rank/sparse_native.rb +21 -0
- data/lib/page_rank.rb +7 -4
- data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
- data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
- data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
- data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
- data/lib/text_rank/char_filter.rb +1 -1
- data/lib/text_rank/fingerprint.rb +10 -18
- data/lib/text_rank/fingerprint_overlap.rb +55 -0
- data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
- data/lib/text_rank/keyword_extractor.rb +32 -25
- data/lib/text_rank/rank_filter/collapse_adjacent.rb +48 -25
- data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
- data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
- data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
- data/lib/text_rank/token_filter/stopwords.rb +1 -321
- data/lib/text_rank/token_filter/stopwords.yml +317 -0
- data/lib/text_rank/tokenizer/money.rb +11 -6
- data/lib/text_rank/tokenizer/number.rb +4 -3
- data/lib/text_rank/tokenizer/punctuation.rb +4 -1
- data/lib/text_rank/tokenizer/url.rb +3 -0
- data/lib/text_rank/tokenizer/whitespace.rb +4 -1
- data/lib/text_rank/tokenizer/word.rb +5 -2
- data/lib/text_rank/tokenizer.rb +1 -1
- data/lib/text_rank/version.rb +3 -1
- data/lib/text_rank.rb +14 -9
- data/text_rank.gemspec +4 -1
- metadata +48 -12
@@ -0,0 +1,317 @@
|
|
1
|
+
- a
|
2
|
+
- about
|
3
|
+
- above
|
4
|
+
- across
|
5
|
+
- after
|
6
|
+
- afterwards
|
7
|
+
- again
|
8
|
+
- against
|
9
|
+
- all
|
10
|
+
- almost
|
11
|
+
- alone
|
12
|
+
- along
|
13
|
+
- already
|
14
|
+
- also
|
15
|
+
- although
|
16
|
+
- always
|
17
|
+
- am
|
18
|
+
- among
|
19
|
+
- amongst
|
20
|
+
- amoungst
|
21
|
+
- amount
|
22
|
+
- an
|
23
|
+
- and
|
24
|
+
- another
|
25
|
+
- any
|
26
|
+
- anyhow
|
27
|
+
- anyone
|
28
|
+
- anything
|
29
|
+
- anyway
|
30
|
+
- anywhere
|
31
|
+
- are
|
32
|
+
- around
|
33
|
+
- as
|
34
|
+
- at
|
35
|
+
- back
|
36
|
+
- be
|
37
|
+
- became
|
38
|
+
- because
|
39
|
+
- become
|
40
|
+
- becomes
|
41
|
+
- becoming
|
42
|
+
- been
|
43
|
+
- before
|
44
|
+
- beforehand
|
45
|
+
- behind
|
46
|
+
- being
|
47
|
+
- below
|
48
|
+
- beside
|
49
|
+
- besides
|
50
|
+
- between
|
51
|
+
- beyond
|
52
|
+
- bill
|
53
|
+
- both
|
54
|
+
- bottom
|
55
|
+
- but
|
56
|
+
- by
|
57
|
+
- call
|
58
|
+
- can
|
59
|
+
- cannot
|
60
|
+
- cant
|
61
|
+
- co
|
62
|
+
- con
|
63
|
+
- could
|
64
|
+
- couldnt
|
65
|
+
- cry
|
66
|
+
- de
|
67
|
+
- describe
|
68
|
+
- detail
|
69
|
+
- do
|
70
|
+
- done
|
71
|
+
- down
|
72
|
+
- due
|
73
|
+
- during
|
74
|
+
- each
|
75
|
+
- eg
|
76
|
+
- eight
|
77
|
+
- either
|
78
|
+
- eleven
|
79
|
+
- else
|
80
|
+
- elsewhere
|
81
|
+
- empty
|
82
|
+
- enough
|
83
|
+
- etc
|
84
|
+
- even
|
85
|
+
- ever
|
86
|
+
- every
|
87
|
+
- everyone
|
88
|
+
- everything
|
89
|
+
- everywhere
|
90
|
+
- except
|
91
|
+
- few
|
92
|
+
- fifteen
|
93
|
+
- fify
|
94
|
+
- fill
|
95
|
+
- find
|
96
|
+
- fire
|
97
|
+
- first
|
98
|
+
- five
|
99
|
+
- for
|
100
|
+
- former
|
101
|
+
- formerly
|
102
|
+
- forty
|
103
|
+
- found
|
104
|
+
- four
|
105
|
+
- from
|
106
|
+
- front
|
107
|
+
- full
|
108
|
+
- further
|
109
|
+
- get
|
110
|
+
- give
|
111
|
+
- go
|
112
|
+
- had
|
113
|
+
- has
|
114
|
+
- hasnt
|
115
|
+
- have
|
116
|
+
- he
|
117
|
+
- hence
|
118
|
+
- her
|
119
|
+
- here
|
120
|
+
- hereafter
|
121
|
+
- hereby
|
122
|
+
- herein
|
123
|
+
- hereupon
|
124
|
+
- hers
|
125
|
+
- herself
|
126
|
+
- him
|
127
|
+
- himself
|
128
|
+
- his
|
129
|
+
- how
|
130
|
+
- however
|
131
|
+
- hundred
|
132
|
+
- ie
|
133
|
+
- if
|
134
|
+
- in
|
135
|
+
- inc
|
136
|
+
- indeed
|
137
|
+
- interest
|
138
|
+
- into
|
139
|
+
- is
|
140
|
+
- it
|
141
|
+
- its
|
142
|
+
- itself
|
143
|
+
- keep
|
144
|
+
- last
|
145
|
+
- latter
|
146
|
+
- latterly
|
147
|
+
- least
|
148
|
+
- less
|
149
|
+
- ltd
|
150
|
+
- made
|
151
|
+
- many
|
152
|
+
- may
|
153
|
+
- me
|
154
|
+
- meanwhile
|
155
|
+
- might
|
156
|
+
- mill
|
157
|
+
- mine
|
158
|
+
- more
|
159
|
+
- moreover
|
160
|
+
- most
|
161
|
+
- mostly
|
162
|
+
- move
|
163
|
+
- much
|
164
|
+
- must
|
165
|
+
- my
|
166
|
+
- myself
|
167
|
+
- name
|
168
|
+
- namely
|
169
|
+
- neither
|
170
|
+
- never
|
171
|
+
- nevertheless
|
172
|
+
- next
|
173
|
+
- nine
|
174
|
+
- no
|
175
|
+
- nobody
|
176
|
+
- none
|
177
|
+
- noone
|
178
|
+
- nor
|
179
|
+
- not
|
180
|
+
- nothing
|
181
|
+
- now
|
182
|
+
- nowhere
|
183
|
+
- of
|
184
|
+
- off
|
185
|
+
- often
|
186
|
+
- on
|
187
|
+
- once
|
188
|
+
- one
|
189
|
+
- only
|
190
|
+
- onto
|
191
|
+
- or
|
192
|
+
- other
|
193
|
+
- others
|
194
|
+
- otherwise
|
195
|
+
- our
|
196
|
+
- ours
|
197
|
+
- ourselves
|
198
|
+
- out
|
199
|
+
- over
|
200
|
+
- own
|
201
|
+
- part
|
202
|
+
- per
|
203
|
+
- perhaps
|
204
|
+
- please
|
205
|
+
- put
|
206
|
+
- rather
|
207
|
+
- re
|
208
|
+
- same
|
209
|
+
- see
|
210
|
+
- seem
|
211
|
+
- seemed
|
212
|
+
- seeming
|
213
|
+
- seems
|
214
|
+
- serious
|
215
|
+
- several
|
216
|
+
- she
|
217
|
+
- should
|
218
|
+
- show
|
219
|
+
- side
|
220
|
+
- since
|
221
|
+
- sincere
|
222
|
+
- six
|
223
|
+
- sixty
|
224
|
+
- so
|
225
|
+
- some
|
226
|
+
- somehow
|
227
|
+
- someone
|
228
|
+
- something
|
229
|
+
- sometime
|
230
|
+
- sometimes
|
231
|
+
- somewhere
|
232
|
+
- still
|
233
|
+
- such
|
234
|
+
- system
|
235
|
+
- take
|
236
|
+
- ten
|
237
|
+
- than
|
238
|
+
- that
|
239
|
+
- the
|
240
|
+
- their
|
241
|
+
- them
|
242
|
+
- themselves
|
243
|
+
- then
|
244
|
+
- thence
|
245
|
+
- there
|
246
|
+
- thereafter
|
247
|
+
- thereby
|
248
|
+
- therefore
|
249
|
+
- therein
|
250
|
+
- thereupon
|
251
|
+
- these
|
252
|
+
- they
|
253
|
+
- thickv
|
254
|
+
- thin
|
255
|
+
- third
|
256
|
+
- this
|
257
|
+
- those
|
258
|
+
- though
|
259
|
+
- three
|
260
|
+
- through
|
261
|
+
- throughout
|
262
|
+
- thru
|
263
|
+
- thus
|
264
|
+
- to
|
265
|
+
- together
|
266
|
+
- too
|
267
|
+
- top
|
268
|
+
- toward
|
269
|
+
- towards
|
270
|
+
- twelve
|
271
|
+
- twenty
|
272
|
+
- two
|
273
|
+
- un
|
274
|
+
- under
|
275
|
+
- until
|
276
|
+
- up
|
277
|
+
- upon
|
278
|
+
- us
|
279
|
+
- very
|
280
|
+
- via
|
281
|
+
- was
|
282
|
+
- we
|
283
|
+
- well
|
284
|
+
- were
|
285
|
+
- what
|
286
|
+
- whatever
|
287
|
+
- when
|
288
|
+
- whence
|
289
|
+
- whenever
|
290
|
+
- where
|
291
|
+
- whereafter
|
292
|
+
- whereas
|
293
|
+
- whereby
|
294
|
+
- wherein
|
295
|
+
- whereupon
|
296
|
+
- wherever
|
297
|
+
- whether
|
298
|
+
- which
|
299
|
+
- while
|
300
|
+
- whither
|
301
|
+
- who
|
302
|
+
- whoever
|
303
|
+
- whole
|
304
|
+
- whom
|
305
|
+
- whose
|
306
|
+
- why
|
307
|
+
- will
|
308
|
+
- with
|
309
|
+
- within
|
310
|
+
- without
|
311
|
+
- would
|
312
|
+
- yet
|
313
|
+
- you
|
314
|
+
- your
|
315
|
+
- yours
|
316
|
+
- yourself
|
317
|
+
- yourselves
|
@@ -1,4 +1,3 @@
|
|
1
|
-
#encoding: UTF-8
|
2
1
|
module TextRank
|
3
2
|
module Tokenizer
|
4
3
|
|
@@ -12,7 +11,7 @@ module TextRank
|
|
12
11
|
"\u20a4", # Lira Symbol
|
13
12
|
"\u20a7", # Peseta Sign
|
14
13
|
"\u20ac", # Euro Symbol
|
15
|
-
"\u20B9", # Rupee
|
14
|
+
"\u20B9", # Rupee
|
16
15
|
"\u20a9", # Won Sign
|
17
16
|
"\u20b4", # Hryvnia Sign
|
18
17
|
"\u20af", # Drachma Sign
|
@@ -34,6 +33,8 @@ module TextRank
|
|
34
33
|
# A tokenizer regex that preserves money or formatted numbers as a single token. This
|
35
34
|
# currently supports 24 different currency symbols:
|
36
35
|
#
|
36
|
+
# rubocop:disable Style/AsciiComments
|
37
|
+
#
|
37
38
|
# * ¤
|
38
39
|
# * $
|
39
40
|
# * ¢
|
@@ -58,19 +59,23 @@ module TextRank
|
|
58
59
|
# * ₫
|
59
60
|
# * %
|
60
61
|
# * ‰
|
62
|
+
|
63
|
+
# rubocop:enable Style/AsciiComments
|
61
64
|
#
|
62
65
|
# It also supports two alternative formats for negatives as well as optional three digit comma
|
63
66
|
# separation and optional decimals.
|
64
67
|
##
|
65
|
-
|
68
|
+
# rubocop:disable Naming/ConstantName
|
69
|
+
Money = /
|
66
70
|
(
|
67
|
-
#{CURRENCY_SYMBOLS}
|
71
|
+
#{CURRENCY_SYMBOLS} -? #{Number} # $-45,231.21
|
68
72
|
|
|
69
|
-
|
73
|
+
-? #{CURRENCY_SYMBOLS} #{Number} # -$45,231.21
|
70
74
|
|
|
71
75
|
\( #{CURRENCY_SYMBOLS} #{Number} \) # ($45,231.21)
|
72
76
|
)
|
73
|
-
|
77
|
+
/x
|
78
|
+
# rubocop:enable Naming/ConstantName
|
74
79
|
|
75
80
|
end
|
76
81
|
end
|
@@ -1,11 +1,11 @@
|
|
1
|
-
#encoding: UTF-8
|
2
1
|
module TextRank
|
3
2
|
module Tokenizer
|
4
3
|
|
5
4
|
##
|
6
5
|
# A tokenizer regex that preserves (optionally formatted) numbers as a single token.
|
7
6
|
##
|
8
|
-
|
7
|
+
# rubocop:disable Naming/ConstantName
|
8
|
+
Number = /
|
9
9
|
(
|
10
10
|
[1-9]\d{3,} # 453231162
|
11
11
|
(?:\.\d+)? # 453231162.17
|
@@ -25,7 +25,8 @@ module TextRank
|
|
25
25
|
|
26
26
|
(?:\.\d+) # .17
|
27
27
|
)
|
28
|
-
|
28
|
+
/x
|
29
|
+
# rubocop:enable Naming/ConstantName
|
29
30
|
|
30
31
|
end
|
31
32
|
end
|
@@ -1,11 +1,14 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
|
+
|
3
4
|
##
|
4
5
|
# A tokenizer regex that preserves single punctuation symbols as a token. Use
|
5
6
|
# this if one or more of your TokenFilter classes need punctuation in order to
|
6
7
|
# make decisions.
|
7
8
|
##
|
8
|
-
|
9
|
+
# rubocop:disable Naming/ConstantName
|
10
|
+
Punctuation = /(\p{Punct})/
|
11
|
+
# rubocop:enable Naming/ConstantName
|
9
12
|
|
10
13
|
end
|
11
14
|
end
|
@@ -1,8 +1,10 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
|
+
|
3
4
|
##
|
4
5
|
# A tokenizer regex that preserves entire URL's as a token (rather than split them up)
|
5
6
|
##
|
7
|
+
# rubocop:disable Naming/ConstantName
|
6
8
|
Url = %r{
|
7
9
|
(
|
8
10
|
(?:[\w-]+://?|www[.])
|
@@ -16,6 +18,7 @@ module TextRank
|
|
16
18
|
)
|
17
19
|
)
|
18
20
|
}xi
|
21
|
+
# rubocop:enable Naming/ConstantName
|
19
22
|
|
20
23
|
end
|
21
24
|
end
|
@@ -1,11 +1,14 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
|
+
|
3
4
|
##
|
4
5
|
# A tokenizer regex that preserves single whitespace characters as a token. Use
|
5
6
|
# this if one or more of your TokenFilter classes need whitespace in order to
|
6
7
|
# make decisions.
|
7
8
|
##
|
8
|
-
|
9
|
+
# rubocop:disable Naming/ConstantName
|
10
|
+
Whitespace = /\s/
|
11
|
+
# rubocop:enable Naming/ConstantName
|
9
12
|
|
10
13
|
end
|
11
14
|
end
|
@@ -1,14 +1,17 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
|
+
|
3
4
|
##
|
4
5
|
# A tokenizer regex that preserves a non-space, non-punctuation "word". It does
|
5
6
|
# allow hyphens and numerals, but the first character must be an A-Z character.
|
6
7
|
##
|
7
|
-
|
8
|
+
# rubocop:disable Naming/ConstantName
|
9
|
+
Word = /
|
8
10
|
(
|
9
11
|
[a-z][a-z0-9-]*
|
10
12
|
)
|
11
|
-
|
13
|
+
/xi
|
14
|
+
# rubocop:enable Naming/ConstantName
|
12
15
|
|
13
16
|
end
|
14
17
|
end
|
data/lib/text_rank/tokenizer.rb
CHANGED
data/lib/text_rank/version.rb
CHANGED
data/lib/text_rank.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'page_rank'
|
2
|
+
require 'set'
|
3
|
+
require 'yaml'
|
2
4
|
|
3
5
|
##
|
4
6
|
# Provides convenience methods for quickly extracting keywords.
|
@@ -7,17 +9,18 @@ require 'page_rank'
|
|
7
9
|
##
|
8
10
|
module TextRank
|
9
11
|
|
10
|
-
autoload :CharFilter,
|
11
|
-
autoload :Fingerprint,
|
12
|
-
autoload :
|
13
|
-
autoload :
|
14
|
-
autoload :
|
15
|
-
autoload :
|
16
|
-
autoload :
|
17
|
-
autoload :
|
12
|
+
autoload :CharFilter, 'text_rank/char_filter'
|
13
|
+
autoload :Fingerprint, 'text_rank/fingerprint'
|
14
|
+
autoload :FingerprintOverlap, 'text_rank/fingerprint_overlap'
|
15
|
+
autoload :GraphStrategy, 'text_rank/graph_strategy'
|
16
|
+
autoload :KeywordExtractor, 'text_rank/keyword_extractor'
|
17
|
+
autoload :RankFilter, 'text_rank/rank_filter'
|
18
|
+
autoload :TokenFilter, 'text_rank/token_filter'
|
19
|
+
autoload :Tokenizer, 'text_rank/tokenizer'
|
20
|
+
autoload :VERSION, 'text_rank/version'
|
18
21
|
|
19
22
|
# A convenience method for quickly extracting keywords from text with default options
|
20
|
-
# @param text [String] text from which to extract keywords
|
23
|
+
# @param text [String,Array<String>] text from which to extract keywords
|
21
24
|
# @option (see KeywordExtractor.basic)
|
22
25
|
# @return [Hash<String, Float>] of tokens and text rank (in descending order)
|
23
26
|
def self.extract_keywords(text, **options)
|
@@ -37,3 +40,5 @@ module TextRank
|
|
37
40
|
end
|
38
41
|
|
39
42
|
end
|
43
|
+
|
44
|
+
require 'text_rank/text_rank'
|
data/text_rank.gemspec
CHANGED
@@ -16,13 +16,16 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
17
|
spec.bindir = 'exe'
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.extensions = ['ext/text_rank/extconf.rb']
|
19
20
|
spec.require_paths = ['lib']
|
20
21
|
|
21
22
|
spec.add_development_dependency 'bundler'
|
22
23
|
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'rake-compiler'
|
23
25
|
spec.add_development_dependency 'rspec'
|
24
26
|
spec.add_development_dependency 'rubocop'
|
25
|
-
spec.add_development_dependency 'simplecov'
|
27
|
+
spec.add_development_dependency 'simplecov'
|
28
|
+
spec.add_development_dependency 'yard'
|
26
29
|
|
27
30
|
spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
|
28
31
|
spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David McCullars
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rspec
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,16 +84,30 @@ dependencies:
|
|
70
84
|
name: simplecov
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
72
86
|
requirements:
|
73
|
-
- - "
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: yard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
74
102
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0
|
103
|
+
version: '0'
|
76
104
|
type: :development
|
77
105
|
prerelease: false
|
78
106
|
version_requirements: !ruby/object:Gem::Requirement
|
79
107
|
requirements:
|
80
|
-
- - "
|
108
|
+
- - ">="
|
81
109
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0
|
110
|
+
version: '0'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: engtagger
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -113,7 +141,8 @@ description: Implementation of TextRank solution to ranked keyword extraction.
|
|
113
141
|
email:
|
114
142
|
- david.mccullars@gmail.com
|
115
143
|
executables: []
|
116
|
-
extensions:
|
144
|
+
extensions:
|
145
|
+
- ext/text_rank/extconf.rb
|
117
146
|
extra_rdoc_files: []
|
118
147
|
files:
|
119
148
|
- ".codeclimate.yml"
|
@@ -129,10 +158,15 @@ files:
|
|
129
158
|
- Rakefile
|
130
159
|
- bin/console
|
131
160
|
- bin/setup
|
161
|
+
- ext/text_rank/extconf.rb
|
162
|
+
- ext/text_rank/page_rank_sparse_native.c
|
163
|
+
- ext/text_rank/page_rank_sparse_native.h
|
164
|
+
- ext/text_rank/text_rank.c
|
132
165
|
- lib/page_rank.rb
|
133
166
|
- lib/page_rank/base.rb
|
134
167
|
- lib/page_rank/dense.rb
|
135
168
|
- lib/page_rank/sparse.rb
|
169
|
+
- lib/page_rank/sparse_native.rb
|
136
170
|
- lib/text_rank.rb
|
137
171
|
- lib/text_rank/char_filter.rb
|
138
172
|
- lib/text_rank/char_filter/ascii_folding.rb
|
@@ -141,7 +175,9 @@ files:
|
|
141
175
|
- lib/text_rank/char_filter/strip_html.rb
|
142
176
|
- lib/text_rank/char_filter/strip_possessive.rb
|
143
177
|
- lib/text_rank/char_filter/undo_contractions.rb
|
178
|
+
- lib/text_rank/char_filter/undo_contractions.yml
|
144
179
|
- lib/text_rank/fingerprint.rb
|
180
|
+
- lib/text_rank/fingerprint_overlap.rb
|
145
181
|
- lib/text_rank/graph_strategy.rb
|
146
182
|
- lib/text_rank/graph_strategy/coocurrence.rb
|
147
183
|
- lib/text_rank/keyword_extractor.rb
|
@@ -154,6 +190,7 @@ files:
|
|
154
190
|
- lib/text_rank/token_filter/min_length.rb
|
155
191
|
- lib/text_rank/token_filter/part_of_speech.rb
|
156
192
|
- lib/text_rank/token_filter/stopwords.rb
|
193
|
+
- lib/text_rank/token_filter/stopwords.yml
|
157
194
|
- lib/text_rank/tokenizer.rb
|
158
195
|
- lib/text_rank/tokenizer/money.rb
|
159
196
|
- lib/text_rank/tokenizer/number.rb
|
@@ -167,7 +204,7 @@ homepage: https://github.com/david-mccullars/text_rank
|
|
167
204
|
licenses:
|
168
205
|
- MIT
|
169
206
|
metadata: {}
|
170
|
-
post_install_message:
|
207
|
+
post_install_message:
|
171
208
|
rdoc_options: []
|
172
209
|
require_paths:
|
173
210
|
- lib
|
@@ -182,9 +219,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
182
219
|
- !ruby/object:Gem::Version
|
183
220
|
version: '0'
|
184
221
|
requirements: []
|
185
|
-
|
186
|
-
|
187
|
-
signing_key:
|
222
|
+
rubygems_version: 3.2.32
|
223
|
+
signing_key:
|
188
224
|
specification_version: 4
|
189
225
|
summary: Implementation of TextRank solution to ranked keyword extraction
|
190
226
|
test_files: []
|