summa 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/summa.rb +336 -4
- metadata +1 -1
data/lib/summa.rb
CHANGED
@@ -2,12 +2,16 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
2
2
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
3
|
|
4
4
|
module Summa
|
5
|
-
VERSION = '0.0.
|
5
|
+
VERSION = '0.0.3'
|
6
6
|
end
|
7
7
|
|
8
8
|
class String
|
9
|
-
def
|
10
|
-
puts "
|
9
|
+
def summarize
|
10
|
+
puts "Moooore testing!... of Summa #{VERSION}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def removePunctuation!
|
14
|
+
self.delete!(".,-:;()?!\"\'")
|
11
15
|
end
|
12
16
|
end
|
13
17
|
|
@@ -157,7 +161,8 @@ class FrequencyAnalyzer
|
|
157
161
|
for i in 0..@document.docArray.length
|
158
162
|
word = @document.docArray[i]
|
159
163
|
if word != nil
|
160
|
-
word = CGWordOps.removePunctuation(word)
|
164
|
+
#word = CGWordOps.removePunctuation(word)
|
165
|
+
word.delete!(".,;:()?!\"")
|
161
166
|
if !@stopWords.docArray.include?(word.downcase)
|
162
167
|
#stemmed = word.stem
|
163
168
|
if @freqCount.has_key?(word)
|
@@ -249,3 +254,330 @@ class CGDocument
|
|
249
254
|
|
250
255
|
end
|
251
256
|
|
257
|
+
|
258
|
+
class SummaData
|
259
|
+
|
260
|
+
@stopwords = Array.[]("a", \
|
261
|
+
"about", \
|
262
|
+
"above", \
|
263
|
+
"across", \
|
264
|
+
"after", \
|
265
|
+
"afterwards", \
|
266
|
+
"again", \
|
267
|
+
"against", \
|
268
|
+
"all", \
|
269
|
+
"almost", \
|
270
|
+
"alone", \
|
271
|
+
"along", \
|
272
|
+
"already", \
|
273
|
+
"also", \
|
274
|
+
"although", \
|
275
|
+
"always", \
|
276
|
+
"am", \
|
277
|
+
"among", \
|
278
|
+
"amongst", \
|
279
|
+
"amoungst", \
|
280
|
+
"amount", \
|
281
|
+
"an", \
|
282
|
+
"and", \
|
283
|
+
"another", \
|
284
|
+
"any", \
|
285
|
+
"anyhow", \
|
286
|
+
"anyone", \
|
287
|
+
"anything", \
|
288
|
+
"anyway", \
|
289
|
+
"anywhere", \
|
290
|
+
"are", \
|
291
|
+
"around", \
|
292
|
+
"as", \
|
293
|
+
"at", \
|
294
|
+
"back", \
|
295
|
+
"be", \
|
296
|
+
"became", \
|
297
|
+
"because", \
|
298
|
+
"become", \
|
299
|
+
"becomes", \
|
300
|
+
"becoming", \
|
301
|
+
"been", \
|
302
|
+
"before", \
|
303
|
+
"beforehand", \
|
304
|
+
"behind", \
|
305
|
+
"being", \
|
306
|
+
"below", \
|
307
|
+
"beside", \
|
308
|
+
"besides", \
|
309
|
+
"between", \
|
310
|
+
"beyond", \
|
311
|
+
"bill", \
|
312
|
+
"both", \
|
313
|
+
"bottom", \
|
314
|
+
"but", \
|
315
|
+
"by", \
|
316
|
+
"call", \
|
317
|
+
"can", \
|
318
|
+
"cannot", \
|
319
|
+
"cant", \
|
320
|
+
"co", \
|
321
|
+
"computer", \
|
322
|
+
"con", \
|
323
|
+
"could", \
|
324
|
+
"couldnt", \
|
325
|
+
"cry", \
|
326
|
+
"de", \
|
327
|
+
"describe", \
|
328
|
+
"detail", \
|
329
|
+
"do", \
|
330
|
+
"done", \
|
331
|
+
"down", \
|
332
|
+
"due", \
|
333
|
+
"during", \
|
334
|
+
"each", \
|
335
|
+
"eg", \
|
336
|
+
"eight", \
|
337
|
+
"either", \
|
338
|
+
"eleven", \
|
339
|
+
"else", \
|
340
|
+
"elsewhere", \
|
341
|
+
"empty", \
|
342
|
+
"enough", \
|
343
|
+
"etc", \
|
344
|
+
"even", \
|
345
|
+
"ever", \
|
346
|
+
"every", \
|
347
|
+
"everyone", \
|
348
|
+
"everything", \
|
349
|
+
"everywhere", \
|
350
|
+
"except", \
|
351
|
+
"few", \
|
352
|
+
"fifteen", \
|
353
|
+
"fify", \
|
354
|
+
"fill", \
|
355
|
+
"find", \
|
356
|
+
"fire", \
|
357
|
+
"first", \
|
358
|
+
"five", \
|
359
|
+
"for", \
|
360
|
+
"former", \
|
361
|
+
"formerly", \
|
362
|
+
"forty", \
|
363
|
+
"found", \
|
364
|
+
"four", \
|
365
|
+
"from", \
|
366
|
+
"front", \
|
367
|
+
"full", \
|
368
|
+
"further", \
|
369
|
+
"get", \
|
370
|
+
"give", \
|
371
|
+
"go", \
|
372
|
+
"had", \
|
373
|
+
"has", \
|
374
|
+
"hasnt", \
|
375
|
+
"have", \
|
376
|
+
"he", \
|
377
|
+
"hence", \
|
378
|
+
"her", \
|
379
|
+
"here", \
|
380
|
+
"hereafter", \
|
381
|
+
"hereby", \
|
382
|
+
"herein", \
|
383
|
+
"hereupon", \
|
384
|
+
"hers", \
|
385
|
+
"herself", \
|
386
|
+
"him", \
|
387
|
+
"himself", \
|
388
|
+
"his", \
|
389
|
+
"how", \
|
390
|
+
"however", \
|
391
|
+
"hundred", \
|
392
|
+
"i", \
|
393
|
+
"ie", \
|
394
|
+
"if", \
|
395
|
+
"in", \
|
396
|
+
"inc", \
|
397
|
+
"indeed", \
|
398
|
+
"interest", \
|
399
|
+
"into", \
|
400
|
+
"is", \
|
401
|
+
"it", \
|
402
|
+
"its", \
|
403
|
+
"itself", \
|
404
|
+
"keep", \
|
405
|
+
"last", \
|
406
|
+
"latter", \
|
407
|
+
"latterly", \
|
408
|
+
"least", \
|
409
|
+
"less", \
|
410
|
+
"ltd", \
|
411
|
+
"made", \
|
412
|
+
"many", \
|
413
|
+
"may", \
|
414
|
+
"me", \
|
415
|
+
"meanwhile", \
|
416
|
+
"might", \
|
417
|
+
"mill", \
|
418
|
+
"mine", \
|
419
|
+
"more", \
|
420
|
+
"moreover", \
|
421
|
+
"most", \
|
422
|
+
"mostly", \
|
423
|
+
"move", \
|
424
|
+
"much", \
|
425
|
+
"must", \
|
426
|
+
"my", \
|
427
|
+
"myself", \
|
428
|
+
"name", \
|
429
|
+
"namely", \
|
430
|
+
"neither", \
|
431
|
+
"never", \
|
432
|
+
"nevertheless", \
|
433
|
+
"next", \
|
434
|
+
"nine", \
|
435
|
+
"no", \
|
436
|
+
"nobody", \
|
437
|
+
"none", \
|
438
|
+
"noone", \
|
439
|
+
"nor", \
|
440
|
+
"not", \
|
441
|
+
"nothing", \
|
442
|
+
"now", \
|
443
|
+
"nowhere", \
|
444
|
+
"of", \
|
445
|
+
"off", \
|
446
|
+
"often", \
|
447
|
+
"on", \
|
448
|
+
"once", \
|
449
|
+
"one", \
|
450
|
+
"only", \
|
451
|
+
"onto", \
|
452
|
+
"or", \
|
453
|
+
"other", \
|
454
|
+
"others", \
|
455
|
+
"otherwise", \
|
456
|
+
"our", \
|
457
|
+
"ours", \
|
458
|
+
"ourselves", \
|
459
|
+
"out", \
|
460
|
+
"over", \
|
461
|
+
"own", \
|
462
|
+
"part", \
|
463
|
+
"per", \
|
464
|
+
"perhaps", \
|
465
|
+
"please", \
|
466
|
+
"put", \
|
467
|
+
"rather", \
|
468
|
+
"re", \
|
469
|
+
"same", \
|
470
|
+
"see", \
|
471
|
+
"seem", \
|
472
|
+
"seemed", \
|
473
|
+
"seeming", \
|
474
|
+
"seems", \
|
475
|
+
"serious", \
|
476
|
+
"several", \
|
477
|
+
"she", \
|
478
|
+
"should", \
|
479
|
+
"show", \
|
480
|
+
"side", \
|
481
|
+
"since", \
|
482
|
+
"sincere", \
|
483
|
+
"six", \
|
484
|
+
"sixty", \
|
485
|
+
"so", \
|
486
|
+
"some", \
|
487
|
+
"somehow", \
|
488
|
+
"someone", \
|
489
|
+
"something", \
|
490
|
+
"sometime", \
|
491
|
+
"sometimes", \
|
492
|
+
"somewhere", \
|
493
|
+
"still", \
|
494
|
+
"such", \
|
495
|
+
"system", \
|
496
|
+
"take", \
|
497
|
+
"ten", \
|
498
|
+
"than", \
|
499
|
+
"that", \
|
500
|
+
"the", \
|
501
|
+
"their", \
|
502
|
+
"them", \
|
503
|
+
"themselves", \
|
504
|
+
"then", \
|
505
|
+
"thence", \
|
506
|
+
"there", \
|
507
|
+
"thereafter", \
|
508
|
+
"thereby", \
|
509
|
+
"therefore", \
|
510
|
+
"therein", \
|
511
|
+
"thereupon", \
|
512
|
+
"these", \
|
513
|
+
"they", \
|
514
|
+
"thick", \
|
515
|
+
"thin", \
|
516
|
+
"third", \
|
517
|
+
"this", \
|
518
|
+
"those", \
|
519
|
+
"though", \
|
520
|
+
"three", \
|
521
|
+
"through", \
|
522
|
+
"throughout", \
|
523
|
+
"thru", \
|
524
|
+
"thus", \
|
525
|
+
"to", \
|
526
|
+
"together", \
|
527
|
+
"too", \
|
528
|
+
"top", \
|
529
|
+
"toward", \
|
530
|
+
"towards", \
|
531
|
+
"twelve", \
|
532
|
+
"twenty", \
|
533
|
+
"two", \
|
534
|
+
"un", \
|
535
|
+
"under", \
|
536
|
+
"until", \
|
537
|
+
"up", \
|
538
|
+
"upon", \
|
539
|
+
"us", \
|
540
|
+
"very", \
|
541
|
+
"via", \
|
542
|
+
"was", \
|
543
|
+
"we", \
|
544
|
+
"well", \
|
545
|
+
"were", \
|
546
|
+
"what", \
|
547
|
+
"whatever", \
|
548
|
+
"when", \
|
549
|
+
"whence", \
|
550
|
+
"whenever", \
|
551
|
+
"where", \
|
552
|
+
"whereafter", \
|
553
|
+
"whereas", \
|
554
|
+
"whereby", \
|
555
|
+
"wherein", \
|
556
|
+
"whereupon", \
|
557
|
+
"wherever", \
|
558
|
+
"whether", \
|
559
|
+
"which", \
|
560
|
+
"while", \
|
561
|
+
"whither", \
|
562
|
+
"who", \
|
563
|
+
"whoever", \
|
564
|
+
"whole", \
|
565
|
+
"whom", \
|
566
|
+
"whose", \
|
567
|
+
"why", \
|
568
|
+
"will", \
|
569
|
+
"with", \
|
570
|
+
"within", \
|
571
|
+
"without", \
|
572
|
+
"would", \
|
573
|
+
"yet", \
|
574
|
+
"you", \
|
575
|
+
"your", \
|
576
|
+
"yours", \
|
577
|
+
"yourself", \
|
578
|
+
"yourselves" )
|
579
|
+
|
580
|
+
attr_accessor :stopwords
|
581
|
+
end
|
582
|
+
|
583
|
+
|