text_rank 1.1.7 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +1 -6
  3. data/.rubocop.yml +60 -1075
  4. data/.ruby-version +1 -1
  5. data/.travis.yml +13 -5
  6. data/{LICENSE.txt → LICENSE} +0 -0
  7. data/README.md +2 -1
  8. data/bin/console +3 -3
  9. data/lib/page_rank.rb +2 -0
  10. data/lib/page_rank/base.rb +9 -8
  11. data/lib/page_rank/dense.rb +2 -1
  12. data/lib/page_rank/sparse.rb +6 -7
  13. data/lib/text_rank.rb +12 -9
  14. data/lib/text_rank/char_filter.rb +1 -1
  15. data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
  16. data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
  17. data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
  18. data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
  19. data/lib/text_rank/fingerprint.rb +20 -28
  20. data/lib/text_rank/fingerprint_overlap.rb +55 -0
  21. data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
  22. data/lib/text_rank/keyword_extractor.rb +32 -25
  23. data/lib/text_rank/rank_filter/collapse_adjacent.rb +53 -25
  24. data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
  25. data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
  26. data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
  27. data/lib/text_rank/token_filter/stopwords.rb +1 -321
  28. data/lib/text_rank/token_filter/stopwords.yml +317 -0
  29. data/lib/text_rank/tokenizer.rb +1 -1
  30. data/lib/text_rank/tokenizer/money.rb +11 -6
  31. data/lib/text_rank/tokenizer/number.rb +4 -3
  32. data/lib/text_rank/tokenizer/punctuation.rb +4 -1
  33. data/lib/text_rank/tokenizer/url.rb +3 -0
  34. data/lib/text_rank/tokenizer/whitespace.rb +4 -1
  35. data/lib/text_rank/tokenizer/word.rb +5 -2
  36. data/lib/text_rank/version.rb +3 -1
  37. data/text_rank.gemspec +10 -10
  38. metadata +48 -32
@@ -1,5 +1,3 @@
1
- require 'set'
2
-
3
1
  module TextRank
4
2
  module TokenFilter
5
3
  ##
@@ -15,325 +13,7 @@ module TextRank
15
13
  class Stopwords
16
14
 
17
15
  # Default English stop-word list.
18
- STOP_WORDS = Set.new(%w[
19
- a
20
- about
21
- above
22
- across
23
- after
24
- afterwards
25
- again
26
- against
27
- all
28
- almost
29
- alone
30
- along
31
- already
32
- also
33
- although
34
- always
35
- am
36
- among
37
- amongst
38
- amoungst
39
- amount
40
- an
41
- and
42
- another
43
- any
44
- anyhow
45
- anyone
46
- anything
47
- anyway
48
- anywhere
49
- are
50
- around
51
- as
52
- at
53
- back
54
- be
55
- became
56
- because
57
- become
58
- becomes
59
- becoming
60
- been
61
- before
62
- beforehand
63
- behind
64
- being
65
- below
66
- beside
67
- besides
68
- between
69
- beyond
70
- bill
71
- both
72
- bottom
73
- but
74
- by
75
- call
76
- can
77
- cannot
78
- cant
79
- co
80
- con
81
- could
82
- couldnt
83
- cry
84
- de
85
- describe
86
- detail
87
- do
88
- done
89
- down
90
- due
91
- during
92
- each
93
- eg
94
- eight
95
- either
96
- eleven
97
- else
98
- elsewhere
99
- empty
100
- enough
101
- etc
102
- even
103
- ever
104
- every
105
- everyone
106
- everything
107
- everywhere
108
- except
109
- few
110
- fifteen
111
- fify
112
- fill
113
- find
114
- fire
115
- first
116
- five
117
- for
118
- former
119
- formerly
120
- forty
121
- found
122
- four
123
- from
124
- front
125
- full
126
- further
127
- get
128
- give
129
- go
130
- had
131
- has
132
- hasnt
133
- have
134
- he
135
- hence
136
- her
137
- here
138
- hereafter
139
- hereby
140
- herein
141
- hereupon
142
- hers
143
- herself
144
- him
145
- himself
146
- his
147
- how
148
- however
149
- hundred
150
- ie
151
- if
152
- in
153
- inc
154
- indeed
155
- interest
156
- into
157
- is
158
- it
159
- its
160
- itself
161
- keep
162
- last
163
- latter
164
- latterly
165
- least
166
- less
167
- ltd
168
- made
169
- many
170
- may
171
- me
172
- meanwhile
173
- might
174
- mill
175
- mine
176
- more
177
- moreover
178
- most
179
- mostly
180
- move
181
- much
182
- must
183
- my
184
- myself
185
- name
186
- namely
187
- neither
188
- never
189
- nevertheless
190
- next
191
- nine
192
- no
193
- nobody
194
- none
195
- noone
196
- nor
197
- not
198
- nothing
199
- now
200
- nowhere
201
- of
202
- off
203
- often
204
- on
205
- once
206
- one
207
- only
208
- onto
209
- or
210
- other
211
- others
212
- otherwise
213
- our
214
- ours
215
- ourselves
216
- out
217
- over
218
- own
219
- part
220
- per
221
- perhaps
222
- please
223
- put
224
- rather
225
- re
226
- same
227
- see
228
- seem
229
- seemed
230
- seeming
231
- seems
232
- serious
233
- several
234
- she
235
- should
236
- show
237
- side
238
- since
239
- sincere
240
- six
241
- sixty
242
- so
243
- some
244
- somehow
245
- someone
246
- something
247
- sometime
248
- sometimes
249
- somewhere
250
- still
251
- such
252
- system
253
- take
254
- ten
255
- than
256
- that
257
- the
258
- their
259
- them
260
- themselves
261
- then
262
- thence
263
- there
264
- thereafter
265
- thereby
266
- therefore
267
- therein
268
- thereupon
269
- these
270
- they
271
- thickv
272
- thin
273
- third
274
- this
275
- those
276
- though
277
- three
278
- through
279
- throughout
280
- thru
281
- thus
282
- to
283
- together
284
- too
285
- top
286
- toward
287
- towards
288
- twelve
289
- twenty
290
- two
291
- un
292
- under
293
- until
294
- up
295
- upon
296
- us
297
- very
298
- via
299
- was
300
- we
301
- well
302
- were
303
- what
304
- whatever
305
- when
306
- whence
307
- whenever
308
- where
309
- whereafter
310
- whereas
311
- whereby
312
- wherein
313
- whereupon
314
- wherever
315
- whether
316
- which
317
- while
318
- whither
319
- who
320
- whoever
321
- whole
322
- whom
323
- whose
324
- why
325
- will
326
- with
327
- within
328
- without
329
- would
330
- yet
331
- you
332
- your
333
- yours
334
- yourself
335
- yourselves
336
- ])
16
+ STOP_WORDS = Set.new(YAML.load_file(File.expand_path('stopwords.yml', __dir__)))
337
17
 
338
18
  # Perform the filter
339
19
  # @param tokens [Array<String>]
@@ -0,0 +1,317 @@
1
+ - a
2
+ - about
3
+ - above
4
+ - across
5
+ - after
6
+ - afterwards
7
+ - again
8
+ - against
9
+ - all
10
+ - almost
11
+ - alone
12
+ - along
13
+ - already
14
+ - also
15
+ - although
16
+ - always
17
+ - am
18
+ - among
19
+ - amongst
20
+ - amoungst
21
+ - amount
22
+ - an
23
+ - and
24
+ - another
25
+ - any
26
+ - anyhow
27
+ - anyone
28
+ - anything
29
+ - anyway
30
+ - anywhere
31
+ - are
32
+ - around
33
+ - as
34
+ - at
35
+ - back
36
+ - be
37
+ - became
38
+ - because
39
+ - become
40
+ - becomes
41
+ - becoming
42
+ - been
43
+ - before
44
+ - beforehand
45
+ - behind
46
+ - being
47
+ - below
48
+ - beside
49
+ - besides
50
+ - between
51
+ - beyond
52
+ - bill
53
+ - both
54
+ - bottom
55
+ - but
56
+ - by
57
+ - call
58
+ - can
59
+ - cannot
60
+ - cant
61
+ - co
62
+ - con
63
+ - could
64
+ - couldnt
65
+ - cry
66
+ - de
67
+ - describe
68
+ - detail
69
+ - do
70
+ - done
71
+ - down
72
+ - due
73
+ - during
74
+ - each
75
+ - eg
76
+ - eight
77
+ - either
78
+ - eleven
79
+ - else
80
+ - elsewhere
81
+ - empty
82
+ - enough
83
+ - etc
84
+ - even
85
+ - ever
86
+ - every
87
+ - everyone
88
+ - everything
89
+ - everywhere
90
+ - except
91
+ - few
92
+ - fifteen
93
+ - fify
94
+ - fill
95
+ - find
96
+ - fire
97
+ - first
98
+ - five
99
+ - for
100
+ - former
101
+ - formerly
102
+ - forty
103
+ - found
104
+ - four
105
+ - from
106
+ - front
107
+ - full
108
+ - further
109
+ - get
110
+ - give
111
+ - go
112
+ - had
113
+ - has
114
+ - hasnt
115
+ - have
116
+ - he
117
+ - hence
118
+ - her
119
+ - here
120
+ - hereafter
121
+ - hereby
122
+ - herein
123
+ - hereupon
124
+ - hers
125
+ - herself
126
+ - him
127
+ - himself
128
+ - his
129
+ - how
130
+ - however
131
+ - hundred
132
+ - ie
133
+ - if
134
+ - in
135
+ - inc
136
+ - indeed
137
+ - interest
138
+ - into
139
+ - is
140
+ - it
141
+ - its
142
+ - itself
143
+ - keep
144
+ - last
145
+ - latter
146
+ - latterly
147
+ - least
148
+ - less
149
+ - ltd
150
+ - made
151
+ - many
152
+ - may
153
+ - me
154
+ - meanwhile
155
+ - might
156
+ - mill
157
+ - mine
158
+ - more
159
+ - moreover
160
+ - most
161
+ - mostly
162
+ - move
163
+ - much
164
+ - must
165
+ - my
166
+ - myself
167
+ - name
168
+ - namely
169
+ - neither
170
+ - never
171
+ - nevertheless
172
+ - next
173
+ - nine
174
+ - no
175
+ - nobody
176
+ - none
177
+ - noone
178
+ - nor
179
+ - not
180
+ - nothing
181
+ - now
182
+ - nowhere
183
+ - of
184
+ - off
185
+ - often
186
+ - on
187
+ - once
188
+ - one
189
+ - only
190
+ - onto
191
+ - or
192
+ - other
193
+ - others
194
+ - otherwise
195
+ - our
196
+ - ours
197
+ - ourselves
198
+ - out
199
+ - over
200
+ - own
201
+ - part
202
+ - per
203
+ - perhaps
204
+ - please
205
+ - put
206
+ - rather
207
+ - re
208
+ - same
209
+ - see
210
+ - seem
211
+ - seemed
212
+ - seeming
213
+ - seems
214
+ - serious
215
+ - several
216
+ - she
217
+ - should
218
+ - show
219
+ - side
220
+ - since
221
+ - sincere
222
+ - six
223
+ - sixty
224
+ - so
225
+ - some
226
+ - somehow
227
+ - someone
228
+ - something
229
+ - sometime
230
+ - sometimes
231
+ - somewhere
232
+ - still
233
+ - such
234
+ - system
235
+ - take
236
+ - ten
237
+ - than
238
+ - that
239
+ - the
240
+ - their
241
+ - them
242
+ - themselves
243
+ - then
244
+ - thence
245
+ - there
246
+ - thereafter
247
+ - thereby
248
+ - therefore
249
+ - therein
250
+ - thereupon
251
+ - these
252
+ - they
253
+ - thickv
254
+ - thin
255
+ - third
256
+ - this
257
+ - those
258
+ - though
259
+ - three
260
+ - through
261
+ - throughout
262
+ - thru
263
+ - thus
264
+ - to
265
+ - together
266
+ - too
267
+ - top
268
+ - toward
269
+ - towards
270
+ - twelve
271
+ - twenty
272
+ - two
273
+ - un
274
+ - under
275
+ - until
276
+ - up
277
+ - upon
278
+ - us
279
+ - very
280
+ - via
281
+ - was
282
+ - we
283
+ - well
284
+ - were
285
+ - what
286
+ - whatever
287
+ - when
288
+ - whence
289
+ - whenever
290
+ - where
291
+ - whereafter
292
+ - whereas
293
+ - whereby
294
+ - wherein
295
+ - whereupon
296
+ - wherever
297
+ - whether
298
+ - which
299
+ - while
300
+ - whither
301
+ - who
302
+ - whoever
303
+ - whole
304
+ - whom
305
+ - whose
306
+ - why
307
+ - will
308
+ - with
309
+ - within
310
+ - without
311
+ - would
312
+ - yet
313
+ - you
314
+ - your
315
+ - yours
316
+ - yourself
317
+ - yourselves