rake-text 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e1fcb875e8a81148b9d549f6b4745d891daf0526
4
+ data.tar.gz: 4927d7e77f3b4913b2f9752040a6a7e12f251b0f
5
+ SHA512:
6
+ metadata.gz: bb9a3b526573090ae0c0ed26ebc09cf5599c6388b764fa500cdbabf6f5bfbccff01afd6d831f9d7c63eb3a43852e7e5f4afcdb2142dac2aa3019adc3d47e3c19
7
+ data.tar.gz: 5c9304e25a04afc1db23fe4cd07cd3bd578aedfe60de774e314b712e73abfecdfd4ddf6898fc114b7aafa515dc8ddb5e45a8d40a45ad2771a6118aa77a121700
@@ -0,0 +1,5 @@
1
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2
+
3
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
4
+
5
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
File without changes
@@ -0,0 +1,572 @@
1
+ #stop word list from SMART (Salton,1971). Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop
2
+ a
3
+ a's
4
+ able
5
+ about
6
+ above
7
+ according
8
+ accordingly
9
+ across
10
+ actually
11
+ after
12
+ afterwards
13
+ again
14
+ against
15
+ ain't
16
+ all
17
+ allow
18
+ allows
19
+ almost
20
+ alone
21
+ along
22
+ already
23
+ also
24
+ although
25
+ always
26
+ am
27
+ among
28
+ amongst
29
+ an
30
+ and
31
+ another
32
+ any
33
+ anybody
34
+ anyhow
35
+ anyone
36
+ anything
37
+ anyway
38
+ anyways
39
+ anywhere
40
+ apart
41
+ appear
42
+ appreciate
43
+ appropriate
44
+ are
45
+ aren't
46
+ around
47
+ as
48
+ aside
49
+ ask
50
+ asking
51
+ associated
52
+ at
53
+ available
54
+ away
55
+ awfully
56
+ b
57
+ be
58
+ became
59
+ because
60
+ become
61
+ becomes
62
+ becoming
63
+ been
64
+ before
65
+ beforehand
66
+ behind
67
+ being
68
+ believe
69
+ below
70
+ beside
71
+ besides
72
+ best
73
+ better
74
+ between
75
+ beyond
76
+ both
77
+ brief
78
+ but
79
+ by
80
+ c
81
+ c'mon
82
+ c's
83
+ came
84
+ can
85
+ can't
86
+ cannot
87
+ cant
88
+ cause
89
+ causes
90
+ certain
91
+ certainly
92
+ changes
93
+ clearly
94
+ co
95
+ com
96
+ come
97
+ comes
98
+ concerning
99
+ consequently
100
+ consider
101
+ considering
102
+ contain
103
+ containing
104
+ contains
105
+ corresponding
106
+ could
107
+ couldn't
108
+ course
109
+ currently
110
+ d
111
+ definitely
112
+ described
113
+ despite
114
+ did
115
+ didn't
116
+ different
117
+ do
118
+ does
119
+ doesn't
120
+ doing
121
+ don't
122
+ done
123
+ down
124
+ downwards
125
+ during
126
+ e
127
+ each
128
+ edu
129
+ eg
130
+ eight
131
+ either
132
+ else
133
+ elsewhere
134
+ enough
135
+ entirely
136
+ especially
137
+ et
138
+ etc
139
+ even
140
+ ever
141
+ every
142
+ everybody
143
+ everyone
144
+ everything
145
+ everywhere
146
+ ex
147
+ exactly
148
+ example
149
+ except
150
+ f
151
+ far
152
+ few
153
+ fifth
154
+ first
155
+ five
156
+ followed
157
+ following
158
+ follows
159
+ for
160
+ former
161
+ formerly
162
+ forth
163
+ four
164
+ from
165
+ further
166
+ furthermore
167
+ g
168
+ get
169
+ gets
170
+ getting
171
+ given
172
+ gives
173
+ go
174
+ goes
175
+ going
176
+ gone
177
+ got
178
+ gotten
179
+ greetings
180
+ h
181
+ had
182
+ hadn't
183
+ happens
184
+ hardly
185
+ has
186
+ hasn't
187
+ have
188
+ haven't
189
+ having
190
+ he
191
+ he's
192
+ hello
193
+ help
194
+ hence
195
+ her
196
+ here
197
+ here's
198
+ hereafter
199
+ hereby
200
+ herein
201
+ hereupon
202
+ hers
203
+ herself
204
+ hi
205
+ him
206
+ himself
207
+ his
208
+ hither
209
+ hopefully
210
+ how
211
+ howbeit
212
+ however
213
+ i
214
+ i'd
215
+ i'll
216
+ i'm
217
+ i've
218
+ ie
219
+ if
220
+ ignored
221
+ immediate
222
+ in
223
+ inasmuch
224
+ inc
225
+ indeed
226
+ indicate
227
+ indicated
228
+ indicates
229
+ inner
230
+ insofar
231
+ instead
232
+ into
233
+ inward
234
+ is
235
+ isn't
236
+ it
237
+ it'd
238
+ it'll
239
+ it's
240
+ its
241
+ itself
242
+ j
243
+ just
244
+ k
245
+ keep
246
+ keeps
247
+ kept
248
+ know
249
+ knows
250
+ known
251
+ l
252
+ last
253
+ lately
254
+ later
255
+ latter
256
+ latterly
257
+ least
258
+ less
259
+ lest
260
+ let
261
+ let's
262
+ like
263
+ liked
264
+ likely
265
+ little
266
+ look
267
+ looking
268
+ looks
269
+ ltd
270
+ m
271
+ mainly
272
+ many
273
+ may
274
+ maybe
275
+ me
276
+ mean
277
+ meanwhile
278
+ merely
279
+ might
280
+ more
281
+ moreover
282
+ most
283
+ mostly
284
+ much
285
+ must
286
+ my
287
+ myself
288
+ n
289
+ name
290
+ namely
291
+ nd
292
+ near
293
+ nearly
294
+ necessary
295
+ need
296
+ needs
297
+ neither
298
+ never
299
+ nevertheless
300
+ new
301
+ next
302
+ nine
303
+ no
304
+ nobody
305
+ non
306
+ none
307
+ noone
308
+ nor
309
+ normally
310
+ not
311
+ nothing
312
+ novel
313
+ now
314
+ nowhere
315
+ o
316
+ obviously
317
+ of
318
+ off
319
+ often
320
+ oh
321
+ ok
322
+ okay
323
+ old
324
+ on
325
+ once
326
+ one
327
+ ones
328
+ only
329
+ onto
330
+ or
331
+ other
332
+ others
333
+ otherwise
334
+ ought
335
+ our
336
+ ours
337
+ ourselves
338
+ out
339
+ outside
340
+ over
341
+ overall
342
+ own
343
+ p
344
+ particular
345
+ particularly
346
+ per
347
+ perhaps
348
+ placed
349
+ please
350
+ plus
351
+ possible
352
+ presumably
353
+ probably
354
+ provides
355
+ q
356
+ que
357
+ quite
358
+ qv
359
+ r
360
+ rather
361
+ rd
362
+ re
363
+ really
364
+ reasonably
365
+ regarding
366
+ regardless
367
+ regards
368
+ relatively
369
+ respectively
370
+ right
371
+ s
372
+ said
373
+ same
374
+ saw
375
+ say
376
+ saying
377
+ says
378
+ second
379
+ secondly
380
+ see
381
+ seeing
382
+ seem
383
+ seemed
384
+ seeming
385
+ seems
386
+ seen
387
+ self
388
+ selves
389
+ sensible
390
+ sent
391
+ serious
392
+ seriously
393
+ seven
394
+ several
395
+ shall
396
+ she
397
+ should
398
+ shouldn't
399
+ since
400
+ six
401
+ so
402
+ some
403
+ somebody
404
+ somehow
405
+ someone
406
+ something
407
+ sometime
408
+ sometimes
409
+ somewhat
410
+ somewhere
411
+ soon
412
+ sorry
413
+ specified
414
+ specify
415
+ specifying
416
+ still
417
+ sub
418
+ such
419
+ sup
420
+ sure
421
+ t
422
+ t's
423
+ take
424
+ taken
425
+ tell
426
+ tends
427
+ th
428
+ than
429
+ thank
430
+ thanks
431
+ thanx
432
+ that
433
+ that's
434
+ thats
435
+ the
436
+ their
437
+ theirs
438
+ them
439
+ themselves
440
+ then
441
+ thence
442
+ there
443
+ there's
444
+ thereafter
445
+ thereby
446
+ therefore
447
+ therein
448
+ theres
449
+ thereupon
450
+ these
451
+ they
452
+ they'd
453
+ they'll
454
+ they're
455
+ they've
456
+ think
457
+ third
458
+ this
459
+ thorough
460
+ thoroughly
461
+ those
462
+ though
463
+ three
464
+ through
465
+ throughout
466
+ thru
467
+ thus
468
+ to
469
+ together
470
+ too
471
+ took
472
+ toward
473
+ towards
474
+ tried
475
+ tries
476
+ truly
477
+ try
478
+ trying
479
+ twice
480
+ two
481
+ u
482
+ un
483
+ under
484
+ unfortunately
485
+ unless
486
+ unlikely
487
+ until
488
+ unto
489
+ up
490
+ upon
491
+ us
492
+ use
493
+ used
494
+ useful
495
+ uses
496
+ using
497
+ usually
498
+ uucp
499
+ v
500
+ value
501
+ various
502
+ very
503
+ via
504
+ viz
505
+ vs
506
+ w
507
+ want
508
+ wants
509
+ was
510
+ wasn't
511
+ way
512
+ we
513
+ we'd
514
+ we'll
515
+ we're
516
+ we've
517
+ welcome
518
+ well
519
+ went
520
+ were
521
+ weren't
522
+ what
523
+ what's
524
+ whatever
525
+ when
526
+ whence
527
+ whenever
528
+ where
529
+ where's
530
+ whereafter
531
+ whereas
532
+ whereby
533
+ wherein
534
+ whereupon
535
+ wherever
536
+ whether
537
+ which
538
+ while
539
+ whither
540
+ who
541
+ who's
542
+ whoever
543
+ whole
544
+ whom
545
+ whose
546
+ why
547
+ will
548
+ willing
549
+ wish
550
+ with
551
+ within
552
+ without
553
+ won't
554
+ wonder
555
+ would
556
+ would
557
+ wouldn't
558
+ x
559
+ y
560
+ yes
561
+ yet
562
+ you
563
+ you'd
564
+ you'll
565
+ you're
566
+ you've
567
+ your
568
+ yours
569
+ yourself
570
+ yourselves
571
+ z
572
+ zero
@@ -0,0 +1,426 @@
1
+ #From "A stop list for general text" Fox 1989
2
+ a
3
+ about
4
+ above
5
+ across
6
+ after
7
+ again
8
+ against
9
+ all
10
+ almost
11
+ alone
12
+ along
13
+ already
14
+ also
15
+ although
16
+ always
17
+ among
18
+ an
19
+ and
20
+ another
21
+ any
22
+ anybody
23
+ anyone
24
+ anything
25
+ anywhere
26
+ are
27
+ area
28
+ areas
29
+ around
30
+ as
31
+ ask
32
+ asked
33
+ asking
34
+ asks
35
+ at
36
+ away
37
+ b
38
+ back
39
+ backed
40
+ backing
41
+ backs
42
+ be
43
+ because
44
+ became
45
+ become
46
+ becomes
47
+ been
48
+ before
49
+ began
50
+ behind
51
+ being
52
+ beings
53
+ best
54
+ better
55
+ between
56
+ big
57
+ both
58
+ but
59
+ by
60
+ c
61
+ came
62
+ can
63
+ cannot
64
+ case
65
+ cases
66
+ certain
67
+ certainly
68
+ clear
69
+ clearly
70
+ come
71
+ could
72
+ d
73
+ did
74
+ differ
75
+ different
76
+ differently
77
+ do
78
+ does
79
+ done
80
+ down
81
+ downed
82
+ downing
83
+ downs
84
+ during
85
+ e
86
+ each
87
+ early
88
+ either
89
+ end
90
+ ended
91
+ ending
92
+ ends
93
+ enough
94
+ even
95
+ evenly
96
+ ever
97
+ every
98
+ everybody
99
+ everyone
100
+ everything
101
+ everywhere
102
+ f
103
+ face
104
+ faces
105
+ fact
106
+ facts
107
+ far
108
+ felt
109
+ few
110
+ find
111
+ finds
112
+ first
113
+ for
114
+ four
115
+ from
116
+ full
117
+ fully
118
+ further
119
+ furthered
120
+ furthering
121
+ furthers
122
+ g
123
+ gave
124
+ general
125
+ generally
126
+ get
127
+ gets
128
+ give
129
+ given
130
+ gives
131
+ go
132
+ going
133
+ good
134
+ goods
135
+ got
136
+ great
137
+ greater
138
+ greatest
139
+ group
140
+ grouped
141
+ grouping
142
+ groups
143
+ h
144
+ had
145
+ has
146
+ have
147
+ having
148
+ he
149
+ her
150
+ herself
151
+ here
152
+ high
153
+ higher
154
+ highest
155
+ him
156
+ himself
157
+ his
158
+ how
159
+ however
160
+ i
161
+ if
162
+ important
163
+ in
164
+ interest
165
+ interested
166
+ interesting
167
+ interests
168
+ into
169
+ is
170
+ it
171
+ its
172
+ itself
173
+ j
174
+ just
175
+ k
176
+ keep
177
+ keeps
178
+ kind
179
+ knew
180
+ know
181
+ known
182
+ knows
183
+ l
184
+ large
185
+ largely
186
+ last
187
+ later
188
+ latest
189
+ least
190
+ less
191
+ let
192
+ lets
193
+ like
194
+ likely
195
+ long
196
+ longer
197
+ longest
198
+ m
199
+ made
200
+ make
201
+ making
202
+ man
203
+ many
204
+ may
205
+ me
206
+ member
207
+ members
208
+ men
209
+ might
210
+ more
211
+ most
212
+ mostly
213
+ mr
214
+ mrs
215
+ much
216
+ must
217
+ my
218
+ myself
219
+ n
220
+ necessary
221
+ need
222
+ needed
223
+ needing
224
+ needs
225
+ never
226
+ new
227
+ newer
228
+ newest
229
+ next
230
+ no
231
+ non
232
+ not
233
+ nobody
234
+ noone
235
+ nothing
236
+ now
237
+ nowhere
238
+ number
239
+ numbered
240
+ numbering
241
+ numbers
242
+ o
243
+ of
244
+ off
245
+ often
246
+ old
247
+ older
248
+ oldest
249
+ on
250
+ once
251
+ one
252
+ only
253
+ open
254
+ opened
255
+ opening
256
+ opens
257
+ or
258
+ order
259
+ ordered
260
+ ordering
261
+ orders
262
+ other
263
+ others
264
+ our
265
+ out
266
+ over
267
+ p
268
+ part
269
+ parted
270
+ parting
271
+ parts
272
+ per
273
+ perhaps
274
+ place
275
+ places
276
+ point
277
+ pointed
278
+ pointing
279
+ points
280
+ possible
281
+ present
282
+ presented
283
+ presenting
284
+ presents
285
+ problem
286
+ problems
287
+ put
288
+ puts
289
+ q
290
+ quite
291
+ r
292
+ rather
293
+ really
294
+ right
295
+ room
296
+ rooms
297
+ s
298
+ said
299
+ same
300
+ saw
301
+ say
302
+ says
303
+ second
304
+ seconds
305
+ see
306
+ seem
307
+ seemed
308
+ seeming
309
+ seems
310
+ sees
311
+ several
312
+ shall
313
+ she
314
+ should
315
+ show
316
+ showed
317
+ showing
318
+ shows
319
+ side
320
+ sides
321
+ since
322
+ small
323
+ smaller
324
+ smallest
325
+ so
326
+ some
327
+ somebody
328
+ someone
329
+ something
330
+ somewhere
331
+ state
332
+ states
333
+ still
334
+ such
335
+ sure
336
+ t
337
+ take
338
+ taken
339
+ than
340
+ that
341
+ the
342
+ their
343
+ them
344
+ then
345
+ there
346
+ therefore
347
+ these
348
+ they
349
+ thing
350
+ things
351
+ think
352
+ thinks
353
+ this
354
+ those
355
+ though
356
+ thought
357
+ thoughts
358
+ three
359
+ through
360
+ thus
361
+ to
362
+ today
363
+ together
364
+ too
365
+ took
366
+ toward
367
+ turn
368
+ turned
369
+ turning
370
+ turns
371
+ two
372
+ u
373
+ under
374
+ until
375
+ up
376
+ upon
377
+ us
378
+ use
379
+ uses
380
+ used
381
+ v
382
+ very
383
+ w
384
+ want
385
+ wanted
386
+ wanting
387
+ wants
388
+ was
389
+ way
390
+ ways
391
+ we
392
+ well
393
+ wells
394
+ went
395
+ were
396
+ what
397
+ when
398
+ where
399
+ whether
400
+ which
401
+ while
402
+ who
403
+ whole
404
+ whose
405
+ why
406
+ will
407
+ with
408
+ within
409
+ without
410
+ work
411
+ worked
412
+ working
413
+ works
414
+ would
415
+ x
416
+ y
417
+ year
418
+ years
419
+ yet
420
+ you
421
+ young
422
+ younger
423
+ youngest
424
+ your
425
+ yours
426
+ z
@@ -0,0 +1,3 @@
1
+ require_relative 'rake_text/version'
2
+ require_relative 'rake_text/engine'
3
+ require_relative 'rake_text/stoplist'
@@ -0,0 +1,122 @@
1
+ module RakeText
2
+ class Engine
3
+ @stoplist = nil
4
+
5
+ def initialize(stoplist_file = '')
6
+ @stoplist = Stoplist.new(stoplist_file)
7
+ end
8
+
9
+ def sentences(text)
10
+ # Get words from the text
11
+ words = text.split(/[.!?,;:\t\-"\(\)']/).map { |word| word.strip.downcase }
12
+
13
+ # Remove empty
14
+ words = words.select { |x| x != '' }
15
+
16
+ # Return the words
17
+ return words
18
+ end
19
+
20
+ def phrases(sentences)
21
+ # Hold phrases for output
22
+ phrases = Array.new
23
+
24
+ # Loop through the sentences
25
+ sentences.each do |sentence|
26
+ # Remove the stop-words
27
+ sentence = sentence.gsub(@stoplist.pattern, '|').split('|')
28
+
29
+ # Split into phrases
30
+ temp = sentence.map { |x| x.strip.downcase }
31
+
32
+ # Remove empty phrases
33
+ temp = temp.reject { |x| x.empty? }
34
+
35
+ # Add to output
36
+ phrases.concat(temp)
37
+ end
38
+
39
+ # Return the phrases
40
+ return phrases
41
+ end
42
+
43
+ def words(text)
44
+ # Split the string into words
45
+ words = text.split(/[\P{L}+]/)
46
+
47
+ # Clean the words
48
+ words = words.map { |x| x.strip.downcase }
49
+
50
+ # Remove empty words
51
+ words = words.reject { |x| x.empty? }
52
+
53
+ # Return the words
54
+ return words
55
+
56
+ end
57
+
58
+ def word_scores(phrases)
59
+ # Hold output
60
+ frequency = Hash.new(0)
61
+ degree = Hash.new(0)
62
+ score = Hash.new(0)
63
+
64
+ # Loop through the phrases
65
+ phrases.each do |phrase|
66
+ # Get words
67
+ words = words(phrase)
68
+
69
+ # Loop words
70
+ words.each do |word|
71
+ frequency[word] += 1
72
+ degree[word] += words.length
73
+ end
74
+ end
75
+
76
+ # Calculate store
77
+ frequency.each do |word, counter|
78
+ score[word] = (degree[word] + counter) / (counter * 1.0)
79
+ end
80
+
81
+ # Return the score
82
+ return score
83
+ end
84
+
85
+ def phrase_scores(phrases)
86
+ # Get word scores
87
+ word_scores = word_scores(phrases)
88
+
89
+ # Hold candidates
90
+ candidates = Hash.new(0)
91
+
92
+ # Loop phrases
93
+ phrases.each do |phrase|
94
+ # Get words
95
+ words = words(phrase)
96
+
97
+ # Loop words and get scores
98
+ words.each { |word| candidates[phrase] += word_scores[word] }
99
+ end
100
+
101
+ # Return the candidates
102
+ return candidates
103
+ end
104
+
105
+ def analyse(text, min_score = 0)
106
+ # Get sentences
107
+ sentences = sentences(text)
108
+
109
+ # Get phrases
110
+ phrases = phrases(sentences)
111
+
112
+ # Get phrase-scores
113
+ phrase_scores = phrase_scores(phrases)
114
+
115
+ # Delete phrases with too low scores
116
+ return phrase_scores.reject { |key, value| value < min_score }
117
+ end
118
+
119
+ private :sentences, :phrases, :words, :word_scores, :phrase_scores
120
+ attr_reader :stoplist
121
+ end
122
+ end
@@ -0,0 +1,54 @@
1
+ module RakeText
2
+ class Stoplist
3
+
4
+ def initialize(file_path = '')
5
+ unless file_path.empty?
6
+ self.load_file(file_path)
7
+ else
8
+ self.load_default
9
+ end
10
+ end
11
+
12
+ def load_default
13
+ self.load_file(File.join( File.dirname(__FILE__), '../../data/stoplists/default.stoplist'))
14
+ end
15
+
16
+ def load_fox
17
+ self.load_file(File.join( File.dirname(__FILE__), '../../data/stoplists/fox.stoplist'))
18
+ end
19
+
20
+ def clear
21
+ @words = []
22
+ end
23
+
24
+ def add(word)
25
+ @words.push(word)
26
+ end
27
+
28
+ def load_file(file_path)
29
+ # Reset word array
30
+ @words = []
31
+
32
+ # Open the file
33
+ file = File.new(file_path)
34
+
35
+ # Loop the files contents
36
+ while (line = file.gets)
37
+ # Check if it's a comment
38
+ if line.strip[0] == '#' then next end
39
+
40
+ # Loop the words on the line
41
+ line.split(/ /).each { |word| @words.push(word.strip) }
42
+ end
43
+ end
44
+
45
+ def words
46
+ return @words
47
+ end
48
+
49
+ def pattern
50
+ pattern = @words.map {|word| '\\b' + word + '\\b'}.join('|')
51
+ return Regexp.new(pattern, Regexp::IGNORECASE)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,3 @@
1
+ module RakeText
2
+ VERSION = '1.0.1'
3
+ end
@@ -0,0 +1,29 @@
1
+ require 'minitest/autorun'
2
+ require 'rake_text'
3
+
4
+ class RakeTextTest < MiniTest::Unit::TestCase
5
+ def setup
6
+ @rake = RakeText::Engine.new
7
+ @text = 'Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.'
8
+ end
9
+
10
+ def test_stoplist_builtin_default
11
+ assert_equal(@rake.stoplist.words.length, 571)
12
+ end
13
+
14
+ def test_stoplist_builtin_fox
15
+ @rake.stoplist.load_fox
16
+ assert_equal(@rake.stoplist.words.length, 425)
17
+ end
18
+
19
+ def test_stoplist_load
20
+ @rake.stoplist.load_file(File.join( File.dirname(__FILE__), '../data/stoplists/default.stoplist'))
21
+ assert_equal(@rake.stoplist.words.length, 571)
22
+ end
23
+
24
+ def test_analyse
25
+ @rake.stoplist.load_default
26
+ result = @rake.analyse(@text, 11)
27
+ assert_equal(result, {'linear diophantine equations' => 11.5, 'minimal generating sets' => 11.666666666666666})
28
+ end
29
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rake-text
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Felix Lindström
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-07 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Process text and calculate RAKE.
14
+ email:
15
+ - felix.lindstrom@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - LICENSE.md
22
+ - data/stoplists/default.stoplist
23
+ - data/stoplists/fox.stoplist
24
+ - lib/rake_text/engine.rb
25
+ - lib/rake_text/stoplist.rb
26
+ - lib/rake_text/version.rb
27
+ - lib/rake_text.rb
28
+ - test/test_rake_text.rb
29
+ homepage: https://github.com/felixlindstrom/rake-ruby
30
+ licenses: []
31
+ metadata: {}
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubyforge_project:
48
+ rubygems_version: 2.0.14
49
+ signing_key:
50
+ specification_version: 4
51
+ summary: Process text and calulcate RAKE.
52
+ test_files:
53
+ - test/test_rake_text.rb