summa 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/summa.rb +336 -4
- metadata +1 -1
data/lib/summa.rb
CHANGED
|
@@ -2,12 +2,16 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
|
2
2
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
|
3
3
|
|
|
4
4
|
module Summa
|
|
5
|
-
VERSION = '0.0.
|
|
5
|
+
VERSION = '0.0.3'
|
|
6
6
|
end
|
|
7
7
|
|
|
8
8
|
class String
|
|
9
|
-
def
|
|
10
|
-
puts "
|
|
9
|
+
def summarize
|
|
10
|
+
puts "Moooore testing!... of Summa #{VERSION}"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def removePunctuation!
|
|
14
|
+
self.delete!(".,-:;()?!\"\'")
|
|
11
15
|
end
|
|
12
16
|
end
|
|
13
17
|
|
|
@@ -157,7 +161,8 @@ class FrequencyAnalyzer
|
|
|
157
161
|
for i in 0..@document.docArray.length
|
|
158
162
|
word = @document.docArray[i]
|
|
159
163
|
if word != nil
|
|
160
|
-
word = CGWordOps.removePunctuation(word)
|
|
164
|
+
#word = CGWordOps.removePunctuation(word)
|
|
165
|
+
word.delete!(".,;:()?!\"")
|
|
161
166
|
if !@stopWords.docArray.include?(word.downcase)
|
|
162
167
|
#stemmed = word.stem
|
|
163
168
|
if @freqCount.has_key?(word)
|
|
@@ -249,3 +254,330 @@ class CGDocument
|
|
|
249
254
|
|
|
250
255
|
end
|
|
251
256
|
|
|
257
|
+
|
|
258
|
+
class SummaData
|
|
259
|
+
|
|
260
|
+
@stopwords = Array.[]("a", \
|
|
261
|
+
"about", \
|
|
262
|
+
"above", \
|
|
263
|
+
"across", \
|
|
264
|
+
"after", \
|
|
265
|
+
"afterwards", \
|
|
266
|
+
"again", \
|
|
267
|
+
"against", \
|
|
268
|
+
"all", \
|
|
269
|
+
"almost", \
|
|
270
|
+
"alone", \
|
|
271
|
+
"along", \
|
|
272
|
+
"already", \
|
|
273
|
+
"also", \
|
|
274
|
+
"although", \
|
|
275
|
+
"always", \
|
|
276
|
+
"am", \
|
|
277
|
+
"among", \
|
|
278
|
+
"amongst", \
|
|
279
|
+
"amoungst", \
|
|
280
|
+
"amount", \
|
|
281
|
+
"an", \
|
|
282
|
+
"and", \
|
|
283
|
+
"another", \
|
|
284
|
+
"any", \
|
|
285
|
+
"anyhow", \
|
|
286
|
+
"anyone", \
|
|
287
|
+
"anything", \
|
|
288
|
+
"anyway", \
|
|
289
|
+
"anywhere", \
|
|
290
|
+
"are", \
|
|
291
|
+
"around", \
|
|
292
|
+
"as", \
|
|
293
|
+
"at", \
|
|
294
|
+
"back", \
|
|
295
|
+
"be", \
|
|
296
|
+
"became", \
|
|
297
|
+
"because", \
|
|
298
|
+
"become", \
|
|
299
|
+
"becomes", \
|
|
300
|
+
"becoming", \
|
|
301
|
+
"been", \
|
|
302
|
+
"before", \
|
|
303
|
+
"beforehand", \
|
|
304
|
+
"behind", \
|
|
305
|
+
"being", \
|
|
306
|
+
"below", \
|
|
307
|
+
"beside", \
|
|
308
|
+
"besides", \
|
|
309
|
+
"between", \
|
|
310
|
+
"beyond", \
|
|
311
|
+
"bill", \
|
|
312
|
+
"both", \
|
|
313
|
+
"bottom", \
|
|
314
|
+
"but", \
|
|
315
|
+
"by", \
|
|
316
|
+
"call", \
|
|
317
|
+
"can", \
|
|
318
|
+
"cannot", \
|
|
319
|
+
"cant", \
|
|
320
|
+
"co", \
|
|
321
|
+
"computer", \
|
|
322
|
+
"con", \
|
|
323
|
+
"could", \
|
|
324
|
+
"couldnt", \
|
|
325
|
+
"cry", \
|
|
326
|
+
"de", \
|
|
327
|
+
"describe", \
|
|
328
|
+
"detail", \
|
|
329
|
+
"do", \
|
|
330
|
+
"done", \
|
|
331
|
+
"down", \
|
|
332
|
+
"due", \
|
|
333
|
+
"during", \
|
|
334
|
+
"each", \
|
|
335
|
+
"eg", \
|
|
336
|
+
"eight", \
|
|
337
|
+
"either", \
|
|
338
|
+
"eleven", \
|
|
339
|
+
"else", \
|
|
340
|
+
"elsewhere", \
|
|
341
|
+
"empty", \
|
|
342
|
+
"enough", \
|
|
343
|
+
"etc", \
|
|
344
|
+
"even", \
|
|
345
|
+
"ever", \
|
|
346
|
+
"every", \
|
|
347
|
+
"everyone", \
|
|
348
|
+
"everything", \
|
|
349
|
+
"everywhere", \
|
|
350
|
+
"except", \
|
|
351
|
+
"few", \
|
|
352
|
+
"fifteen", \
|
|
353
|
+
"fify", \
|
|
354
|
+
"fill", \
|
|
355
|
+
"find", \
|
|
356
|
+
"fire", \
|
|
357
|
+
"first", \
|
|
358
|
+
"five", \
|
|
359
|
+
"for", \
|
|
360
|
+
"former", \
|
|
361
|
+
"formerly", \
|
|
362
|
+
"forty", \
|
|
363
|
+
"found", \
|
|
364
|
+
"four", \
|
|
365
|
+
"from", \
|
|
366
|
+
"front", \
|
|
367
|
+
"full", \
|
|
368
|
+
"further", \
|
|
369
|
+
"get", \
|
|
370
|
+
"give", \
|
|
371
|
+
"go", \
|
|
372
|
+
"had", \
|
|
373
|
+
"has", \
|
|
374
|
+
"hasnt", \
|
|
375
|
+
"have", \
|
|
376
|
+
"he", \
|
|
377
|
+
"hence", \
|
|
378
|
+
"her", \
|
|
379
|
+
"here", \
|
|
380
|
+
"hereafter", \
|
|
381
|
+
"hereby", \
|
|
382
|
+
"herein", \
|
|
383
|
+
"hereupon", \
|
|
384
|
+
"hers", \
|
|
385
|
+
"herself", \
|
|
386
|
+
"him", \
|
|
387
|
+
"himself", \
|
|
388
|
+
"his", \
|
|
389
|
+
"how", \
|
|
390
|
+
"however", \
|
|
391
|
+
"hundred", \
|
|
392
|
+
"i", \
|
|
393
|
+
"ie", \
|
|
394
|
+
"if", \
|
|
395
|
+
"in", \
|
|
396
|
+
"inc", \
|
|
397
|
+
"indeed", \
|
|
398
|
+
"interest", \
|
|
399
|
+
"into", \
|
|
400
|
+
"is", \
|
|
401
|
+
"it", \
|
|
402
|
+
"its", \
|
|
403
|
+
"itself", \
|
|
404
|
+
"keep", \
|
|
405
|
+
"last", \
|
|
406
|
+
"latter", \
|
|
407
|
+
"latterly", \
|
|
408
|
+
"least", \
|
|
409
|
+
"less", \
|
|
410
|
+
"ltd", \
|
|
411
|
+
"made", \
|
|
412
|
+
"many", \
|
|
413
|
+
"may", \
|
|
414
|
+
"me", \
|
|
415
|
+
"meanwhile", \
|
|
416
|
+
"might", \
|
|
417
|
+
"mill", \
|
|
418
|
+
"mine", \
|
|
419
|
+
"more", \
|
|
420
|
+
"moreover", \
|
|
421
|
+
"most", \
|
|
422
|
+
"mostly", \
|
|
423
|
+
"move", \
|
|
424
|
+
"much", \
|
|
425
|
+
"must", \
|
|
426
|
+
"my", \
|
|
427
|
+
"myself", \
|
|
428
|
+
"name", \
|
|
429
|
+
"namely", \
|
|
430
|
+
"neither", \
|
|
431
|
+
"never", \
|
|
432
|
+
"nevertheless", \
|
|
433
|
+
"next", \
|
|
434
|
+
"nine", \
|
|
435
|
+
"no", \
|
|
436
|
+
"nobody", \
|
|
437
|
+
"none", \
|
|
438
|
+
"noone", \
|
|
439
|
+
"nor", \
|
|
440
|
+
"not", \
|
|
441
|
+
"nothing", \
|
|
442
|
+
"now", \
|
|
443
|
+
"nowhere", \
|
|
444
|
+
"of", \
|
|
445
|
+
"off", \
|
|
446
|
+
"often", \
|
|
447
|
+
"on", \
|
|
448
|
+
"once", \
|
|
449
|
+
"one", \
|
|
450
|
+
"only", \
|
|
451
|
+
"onto", \
|
|
452
|
+
"or", \
|
|
453
|
+
"other", \
|
|
454
|
+
"others", \
|
|
455
|
+
"otherwise", \
|
|
456
|
+
"our", \
|
|
457
|
+
"ours", \
|
|
458
|
+
"ourselves", \
|
|
459
|
+
"out", \
|
|
460
|
+
"over", \
|
|
461
|
+
"own", \
|
|
462
|
+
"part", \
|
|
463
|
+
"per", \
|
|
464
|
+
"perhaps", \
|
|
465
|
+
"please", \
|
|
466
|
+
"put", \
|
|
467
|
+
"rather", \
|
|
468
|
+
"re", \
|
|
469
|
+
"same", \
|
|
470
|
+
"see", \
|
|
471
|
+
"seem", \
|
|
472
|
+
"seemed", \
|
|
473
|
+
"seeming", \
|
|
474
|
+
"seems", \
|
|
475
|
+
"serious", \
|
|
476
|
+
"several", \
|
|
477
|
+
"she", \
|
|
478
|
+
"should", \
|
|
479
|
+
"show", \
|
|
480
|
+
"side", \
|
|
481
|
+
"since", \
|
|
482
|
+
"sincere", \
|
|
483
|
+
"six", \
|
|
484
|
+
"sixty", \
|
|
485
|
+
"so", \
|
|
486
|
+
"some", \
|
|
487
|
+
"somehow", \
|
|
488
|
+
"someone", \
|
|
489
|
+
"something", \
|
|
490
|
+
"sometime", \
|
|
491
|
+
"sometimes", \
|
|
492
|
+
"somewhere", \
|
|
493
|
+
"still", \
|
|
494
|
+
"such", \
|
|
495
|
+
"system", \
|
|
496
|
+
"take", \
|
|
497
|
+
"ten", \
|
|
498
|
+
"than", \
|
|
499
|
+
"that", \
|
|
500
|
+
"the", \
|
|
501
|
+
"their", \
|
|
502
|
+
"them", \
|
|
503
|
+
"themselves", \
|
|
504
|
+
"then", \
|
|
505
|
+
"thence", \
|
|
506
|
+
"there", \
|
|
507
|
+
"thereafter", \
|
|
508
|
+
"thereby", \
|
|
509
|
+
"therefore", \
|
|
510
|
+
"therein", \
|
|
511
|
+
"thereupon", \
|
|
512
|
+
"these", \
|
|
513
|
+
"they", \
|
|
514
|
+
"thick", \
|
|
515
|
+
"thin", \
|
|
516
|
+
"third", \
|
|
517
|
+
"this", \
|
|
518
|
+
"those", \
|
|
519
|
+
"though", \
|
|
520
|
+
"three", \
|
|
521
|
+
"through", \
|
|
522
|
+
"throughout", \
|
|
523
|
+
"thru", \
|
|
524
|
+
"thus", \
|
|
525
|
+
"to", \
|
|
526
|
+
"together", \
|
|
527
|
+
"too", \
|
|
528
|
+
"top", \
|
|
529
|
+
"toward", \
|
|
530
|
+
"towards", \
|
|
531
|
+
"twelve", \
|
|
532
|
+
"twenty", \
|
|
533
|
+
"two", \
|
|
534
|
+
"un", \
|
|
535
|
+
"under", \
|
|
536
|
+
"until", \
|
|
537
|
+
"up", \
|
|
538
|
+
"upon", \
|
|
539
|
+
"us", \
|
|
540
|
+
"very", \
|
|
541
|
+
"via", \
|
|
542
|
+
"was", \
|
|
543
|
+
"we", \
|
|
544
|
+
"well", \
|
|
545
|
+
"were", \
|
|
546
|
+
"what", \
|
|
547
|
+
"whatever", \
|
|
548
|
+
"when", \
|
|
549
|
+
"whence", \
|
|
550
|
+
"whenever", \
|
|
551
|
+
"where", \
|
|
552
|
+
"whereafter", \
|
|
553
|
+
"whereas", \
|
|
554
|
+
"whereby", \
|
|
555
|
+
"wherein", \
|
|
556
|
+
"whereupon", \
|
|
557
|
+
"wherever", \
|
|
558
|
+
"whether", \
|
|
559
|
+
"which", \
|
|
560
|
+
"while", \
|
|
561
|
+
"whither", \
|
|
562
|
+
"who", \
|
|
563
|
+
"whoever", \
|
|
564
|
+
"whole", \
|
|
565
|
+
"whom", \
|
|
566
|
+
"whose", \
|
|
567
|
+
"why", \
|
|
568
|
+
"will", \
|
|
569
|
+
"with", \
|
|
570
|
+
"within", \
|
|
571
|
+
"without", \
|
|
572
|
+
"would", \
|
|
573
|
+
"yet", \
|
|
574
|
+
"you", \
|
|
575
|
+
"your", \
|
|
576
|
+
"yours", \
|
|
577
|
+
"yourself", \
|
|
578
|
+
"yourselves" )
|
|
579
|
+
|
|
580
|
+
attr_accessor :stopwords
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
|