text_rank 1.2.3 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,317 @@
1
+ - a
2
+ - about
3
+ - above
4
+ - across
5
+ - after
6
+ - afterwards
7
+ - again
8
+ - against
9
+ - all
10
+ - almost
11
+ - alone
12
+ - along
13
+ - already
14
+ - also
15
+ - although
16
+ - always
17
+ - am
18
+ - among
19
+ - amongst
20
+ - amoungst
21
+ - amount
22
+ - an
23
+ - and
24
+ - another
25
+ - any
26
+ - anyhow
27
+ - anyone
28
+ - anything
29
+ - anyway
30
+ - anywhere
31
+ - are
32
+ - around
33
+ - as
34
+ - at
35
+ - back
36
+ - be
37
+ - became
38
+ - because
39
+ - become
40
+ - becomes
41
+ - becoming
42
+ - been
43
+ - before
44
+ - beforehand
45
+ - behind
46
+ - being
47
+ - below
48
+ - beside
49
+ - besides
50
+ - between
51
+ - beyond
52
+ - bill
53
+ - both
54
+ - bottom
55
+ - but
56
+ - by
57
+ - call
58
+ - can
59
+ - cannot
60
+ - cant
61
+ - co
62
+ - con
63
+ - could
64
+ - couldnt
65
+ - cry
66
+ - de
67
+ - describe
68
+ - detail
69
+ - do
70
+ - done
71
+ - down
72
+ - due
73
+ - during
74
+ - each
75
+ - eg
76
+ - eight
77
+ - either
78
+ - eleven
79
+ - else
80
+ - elsewhere
81
+ - empty
82
+ - enough
83
+ - etc
84
+ - even
85
+ - ever
86
+ - every
87
+ - everyone
88
+ - everything
89
+ - everywhere
90
+ - except
91
+ - few
92
+ - fifteen
93
+ - fify
94
+ - fill
95
+ - find
96
+ - fire
97
+ - first
98
+ - five
99
+ - for
100
+ - former
101
+ - formerly
102
+ - forty
103
+ - found
104
+ - four
105
+ - from
106
+ - front
107
+ - full
108
+ - further
109
+ - get
110
+ - give
111
+ - go
112
+ - had
113
+ - has
114
+ - hasnt
115
+ - have
116
+ - he
117
+ - hence
118
+ - her
119
+ - here
120
+ - hereafter
121
+ - hereby
122
+ - herein
123
+ - hereupon
124
+ - hers
125
+ - herself
126
+ - him
127
+ - himself
128
+ - his
129
+ - how
130
+ - however
131
+ - hundred
132
+ - ie
133
+ - if
134
+ - in
135
+ - inc
136
+ - indeed
137
+ - interest
138
+ - into
139
+ - is
140
+ - it
141
+ - its
142
+ - itself
143
+ - keep
144
+ - last
145
+ - latter
146
+ - latterly
147
+ - least
148
+ - less
149
+ - ltd
150
+ - made
151
+ - many
152
+ - may
153
+ - me
154
+ - meanwhile
155
+ - might
156
+ - mill
157
+ - mine
158
+ - more
159
+ - moreover
160
+ - most
161
+ - mostly
162
+ - move
163
+ - much
164
+ - must
165
+ - my
166
+ - myself
167
+ - name
168
+ - namely
169
+ - neither
170
+ - never
171
+ - nevertheless
172
+ - next
173
+ - nine
174
+ - no
175
+ - nobody
176
+ - none
177
+ - noone
178
+ - nor
179
+ - not
180
+ - nothing
181
+ - now
182
+ - nowhere
183
+ - of
184
+ - off
185
+ - often
186
+ - on
187
+ - once
188
+ - one
189
+ - only
190
+ - onto
191
+ - or
192
+ - other
193
+ - others
194
+ - otherwise
195
+ - our
196
+ - ours
197
+ - ourselves
198
+ - out
199
+ - over
200
+ - own
201
+ - part
202
+ - per
203
+ - perhaps
204
+ - please
205
+ - put
206
+ - rather
207
+ - re
208
+ - same
209
+ - see
210
+ - seem
211
+ - seemed
212
+ - seeming
213
+ - seems
214
+ - serious
215
+ - several
216
+ - she
217
+ - should
218
+ - show
219
+ - side
220
+ - since
221
+ - sincere
222
+ - six
223
+ - sixty
224
+ - so
225
+ - some
226
+ - somehow
227
+ - someone
228
+ - something
229
+ - sometime
230
+ - sometimes
231
+ - somewhere
232
+ - still
233
+ - such
234
+ - system
235
+ - take
236
+ - ten
237
+ - than
238
+ - that
239
+ - the
240
+ - their
241
+ - them
242
+ - themselves
243
+ - then
244
+ - thence
245
+ - there
246
+ - thereafter
247
+ - thereby
248
+ - therefore
249
+ - therein
250
+ - thereupon
251
+ - these
252
+ - they
253
+ - thickv
254
+ - thin
255
+ - third
256
+ - this
257
+ - those
258
+ - though
259
+ - three
260
+ - through
261
+ - throughout
262
+ - thru
263
+ - thus
264
+ - to
265
+ - together
266
+ - too
267
+ - top
268
+ - toward
269
+ - towards
270
+ - twelve
271
+ - twenty
272
+ - two
273
+ - un
274
+ - under
275
+ - until
276
+ - up
277
+ - upon
278
+ - us
279
+ - very
280
+ - via
281
+ - was
282
+ - we
283
+ - well
284
+ - were
285
+ - what
286
+ - whatever
287
+ - when
288
+ - whence
289
+ - whenever
290
+ - where
291
+ - whereafter
292
+ - whereas
293
+ - whereby
294
+ - wherein
295
+ - whereupon
296
+ - wherever
297
+ - whether
298
+ - which
299
+ - while
300
+ - whither
301
+ - who
302
+ - whoever
303
+ - whole
304
+ - whom
305
+ - whose
306
+ - why
307
+ - will
308
+ - with
309
+ - within
310
+ - without
311
+ - would
312
+ - yet
313
+ - you
314
+ - your
315
+ - yours
316
+ - yourself
317
+ - yourselves
@@ -31,7 +31,7 @@ module TextRank
31
31
  tokens = []
32
32
  text.scan(Regexp.new(regular_expressions.flatten.join('|'))) do |matches|
33
33
  m = matches.compact.first
34
- tokens << m if m && m.size > 0
34
+ tokens << m if m&.size&.positive?
35
35
  end
36
36
  tokens
37
37
  end
@@ -1,4 +1,3 @@
1
- #encoding: UTF-8
2
1
  module TextRank
3
2
  module Tokenizer
4
3
 
@@ -12,7 +11,7 @@ module TextRank
12
11
  "\u20a4", # Lira Symbol
13
12
  "\u20a7", # Peseta Sign
14
13
  "\u20ac", # Euro Symbol
15
- "\u20B9", # Rupee
14
+ "\u20B9", # Rupee
16
15
  "\u20a9", # Won Sign
17
16
  "\u20b4", # Hryvnia Sign
18
17
  "\u20af", # Drachma Sign
@@ -34,6 +33,8 @@ module TextRank
34
33
  # A tokenizer regex that preserves money or formatted numbers as a single token. This
35
34
  # currently supports 24 different currency symbols:
36
35
  #
36
+ # rubocop:disable Style/AsciiComments
37
+ #
37
38
  # * ¤
38
39
  # * $
39
40
  # * ¢
@@ -58,19 +59,23 @@ module TextRank
58
59
  # * ₫
59
60
  # * %
60
61
  # * ‰
62
+
63
+ # rubocop:enable Style/AsciiComments
61
64
  #
62
65
  # It also supports two alternative formats for negatives as well as optional three digit comma
63
66
  # separation and optional decimals.
64
67
  ##
65
- Money = %r{
68
+ # rubocop:disable Naming/ConstantName
69
+ Money = /
66
70
  (
67
- #{CURRENCY_SYMBOLS} \-? #{Number} # $-45,231.21
71
+ #{CURRENCY_SYMBOLS} -? #{Number} # $-45,231.21
68
72
  |
69
- \-? #{CURRENCY_SYMBOLS} #{Number} # -$45,231.21
73
+ -? #{CURRENCY_SYMBOLS} #{Number} # -$45,231.21
70
74
  |
71
75
  \( #{CURRENCY_SYMBOLS} #{Number} \) # ($45,231.21)
72
76
  )
73
- }x
77
+ /x
78
+ # rubocop:enable Naming/ConstantName
74
79
 
75
80
  end
76
81
  end
@@ -1,11 +1,11 @@
1
- #encoding: UTF-8
2
1
  module TextRank
3
2
  module Tokenizer
4
3
 
5
4
  ##
6
5
  # A tokenizer regex that preserves (optionally formatted) numbers as a single token.
7
6
  ##
8
- Number = %r{
7
+ # rubocop:disable Naming/ConstantName
8
+ Number = /
9
9
  (
10
10
  [1-9]\d{3,} # 453231162
11
11
  (?:\.\d+)? # 453231162.17
@@ -25,7 +25,8 @@ module TextRank
25
25
 
26
26
  (?:\.\d+) # .17
27
27
  )
28
- }x
28
+ /x
29
+ # rubocop:enable Naming/ConstantName
29
30
 
30
31
  end
31
32
  end
@@ -1,11 +1,14 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves single punctuation symbols as a token. Use
5
6
  # this if one or more of your TokenFilter classes need punctuation in order to
6
7
  # make decisions.
7
8
  ##
8
- Punctuation = %r{([\p{Punct}])}
9
+ # rubocop:disable Naming/ConstantName
10
+ Punctuation = /(\p{Punct})/
11
+ # rubocop:enable Naming/ConstantName
9
12
 
10
13
  end
11
14
  end
@@ -1,8 +1,10 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves entire URL's as a token (rather than split them up)
5
6
  ##
7
+ # rubocop:disable Naming/ConstantName
6
8
  Url = %r{
7
9
  (
8
10
  (?:[\w-]+://?|www[.])
@@ -16,6 +18,7 @@ module TextRank
16
18
  )
17
19
  )
18
20
  }xi
21
+ # rubocop:enable Naming/ConstantName
19
22
 
20
23
  end
21
24
  end
@@ -1,11 +1,14 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves single whitespace characters as a token. Use
5
6
  # this if one or more of your TokenFilter classes need whitespace in order to
6
7
  # make decisions.
7
8
  ##
8
- Whitespace = %r{\s}
9
+ # rubocop:disable Naming/ConstantName
10
+ Whitespace = /\s/
11
+ # rubocop:enable Naming/ConstantName
9
12
 
10
13
  end
11
14
  end