summa 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/summa.rb +336 -4
  2. metadata +1 -1
data/lib/summa.rb CHANGED
@@ -2,12 +2,16 @@ $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
4
  module Summa
5
- VERSION = '0.0.2'
5
+ VERSION = '0.0.3'
6
6
  end
7
7
 
8
8
  class String
9
- def stem
10
- puts "Still testing!... v0.0.2"
9
+ def summarize
10
+ puts "Moooore testing!... of Summa #{VERSION}"
11
+ end
12
+
13
+ def removePunctuation!
14
+ self.delete!(".,-:;()?!\"\'")
11
15
  end
12
16
  end
13
17
 
@@ -157,7 +161,8 @@ class FrequencyAnalyzer
157
161
  for i in 0..@document.docArray.length
158
162
  word = @document.docArray[i]
159
163
  if word != nil
160
- word = CGWordOps.removePunctuation(word)
164
+ #word = CGWordOps.removePunctuation(word)
165
+ word.delete!(".,;:()?!\"")
161
166
  if !@stopWords.docArray.include?(word.downcase)
162
167
  #stemmed = word.stem
163
168
  if @freqCount.has_key?(word)
@@ -249,3 +254,330 @@ class CGDocument
249
254
 
250
255
  end
251
256
 
257
+
258
+ class SummaData
259
+
260
+ @stopwords = Array.[]("a", \
261
+ "about", \
262
+ "above", \
263
+ "across", \
264
+ "after", \
265
+ "afterwards", \
266
+ "again", \
267
+ "against", \
268
+ "all", \
269
+ "almost", \
270
+ "alone", \
271
+ "along", \
272
+ "already", \
273
+ "also", \
274
+ "although", \
275
+ "always", \
276
+ "am", \
277
+ "among", \
278
+ "amongst", \
279
+ "amoungst", \
280
+ "amount", \
281
+ "an", \
282
+ "and", \
283
+ "another", \
284
+ "any", \
285
+ "anyhow", \
286
+ "anyone", \
287
+ "anything", \
288
+ "anyway", \
289
+ "anywhere", \
290
+ "are", \
291
+ "around", \
292
+ "as", \
293
+ "at", \
294
+ "back", \
295
+ "be", \
296
+ "became", \
297
+ "because", \
298
+ "become", \
299
+ "becomes", \
300
+ "becoming", \
301
+ "been", \
302
+ "before", \
303
+ "beforehand", \
304
+ "behind", \
305
+ "being", \
306
+ "below", \
307
+ "beside", \
308
+ "besides", \
309
+ "between", \
310
+ "beyond", \
311
+ "bill", \
312
+ "both", \
313
+ "bottom", \
314
+ "but", \
315
+ "by", \
316
+ "call", \
317
+ "can", \
318
+ "cannot", \
319
+ "cant", \
320
+ "co", \
321
+ "computer", \
322
+ "con", \
323
+ "could", \
324
+ "couldnt", \
325
+ "cry", \
326
+ "de", \
327
+ "describe", \
328
+ "detail", \
329
+ "do", \
330
+ "done", \
331
+ "down", \
332
+ "due", \
333
+ "during", \
334
+ "each", \
335
+ "eg", \
336
+ "eight", \
337
+ "either", \
338
+ "eleven", \
339
+ "else", \
340
+ "elsewhere", \
341
+ "empty", \
342
+ "enough", \
343
+ "etc", \
344
+ "even", \
345
+ "ever", \
346
+ "every", \
347
+ "everyone", \
348
+ "everything", \
349
+ "everywhere", \
350
+ "except", \
351
+ "few", \
352
+ "fifteen", \
353
+ "fify", \
354
+ "fill", \
355
+ "find", \
356
+ "fire", \
357
+ "first", \
358
+ "five", \
359
+ "for", \
360
+ "former", \
361
+ "formerly", \
362
+ "forty", \
363
+ "found", \
364
+ "four", \
365
+ "from", \
366
+ "front", \
367
+ "full", \
368
+ "further", \
369
+ "get", \
370
+ "give", \
371
+ "go", \
372
+ "had", \
373
+ "has", \
374
+ "hasnt", \
375
+ "have", \
376
+ "he", \
377
+ "hence", \
378
+ "her", \
379
+ "here", \
380
+ "hereafter", \
381
+ "hereby", \
382
+ "herein", \
383
+ "hereupon", \
384
+ "hers", \
385
+ "herself", \
386
+ "him", \
387
+ "himself", \
388
+ "his", \
389
+ "how", \
390
+ "however", \
391
+ "hundred", \
392
+ "i", \
393
+ "ie", \
394
+ "if", \
395
+ "in", \
396
+ "inc", \
397
+ "indeed", \
398
+ "interest", \
399
+ "into", \
400
+ "is", \
401
+ "it", \
402
+ "its", \
403
+ "itself", \
404
+ "keep", \
405
+ "last", \
406
+ "latter", \
407
+ "latterly", \
408
+ "least", \
409
+ "less", \
410
+ "ltd", \
411
+ "made", \
412
+ "many", \
413
+ "may", \
414
+ "me", \
415
+ "meanwhile", \
416
+ "might", \
417
+ "mill", \
418
+ "mine", \
419
+ "more", \
420
+ "moreover", \
421
+ "most", \
422
+ "mostly", \
423
+ "move", \
424
+ "much", \
425
+ "must", \
426
+ "my", \
427
+ "myself", \
428
+ "name", \
429
+ "namely", \
430
+ "neither", \
431
+ "never", \
432
+ "nevertheless", \
433
+ "next", \
434
+ "nine", \
435
+ "no", \
436
+ "nobody", \
437
+ "none", \
438
+ "noone", \
439
+ "nor", \
440
+ "not", \
441
+ "nothing", \
442
+ "now", \
443
+ "nowhere", \
444
+ "of", \
445
+ "off", \
446
+ "often", \
447
+ "on", \
448
+ "once", \
449
+ "one", \
450
+ "only", \
451
+ "onto", \
452
+ "or", \
453
+ "other", \
454
+ "others", \
455
+ "otherwise", \
456
+ "our", \
457
+ "ours", \
458
+ "ourselves", \
459
+ "out", \
460
+ "over", \
461
+ "own", \
462
+ "part", \
463
+ "per", \
464
+ "perhaps", \
465
+ "please", \
466
+ "put", \
467
+ "rather", \
468
+ "re", \
469
+ "same", \
470
+ "see", \
471
+ "seem", \
472
+ "seemed", \
473
+ "seeming", \
474
+ "seems", \
475
+ "serious", \
476
+ "several", \
477
+ "she", \
478
+ "should", \
479
+ "show", \
480
+ "side", \
481
+ "since", \
482
+ "sincere", \
483
+ "six", \
484
+ "sixty", \
485
+ "so", \
486
+ "some", \
487
+ "somehow", \
488
+ "someone", \
489
+ "something", \
490
+ "sometime", \
491
+ "sometimes", \
492
+ "somewhere", \
493
+ "still", \
494
+ "such", \
495
+ "system", \
496
+ "take", \
497
+ "ten", \
498
+ "than", \
499
+ "that", \
500
+ "the", \
501
+ "their", \
502
+ "them", \
503
+ "themselves", \
504
+ "then", \
505
+ "thence", \
506
+ "there", \
507
+ "thereafter", \
508
+ "thereby", \
509
+ "therefore", \
510
+ "therein", \
511
+ "thereupon", \
512
+ "these", \
513
+ "they", \
514
+ "thick", \
515
+ "thin", \
516
+ "third", \
517
+ "this", \
518
+ "those", \
519
+ "though", \
520
+ "three", \
521
+ "through", \
522
+ "throughout", \
523
+ "thru", \
524
+ "thus", \
525
+ "to", \
526
+ "together", \
527
+ "too", \
528
+ "top", \
529
+ "toward", \
530
+ "towards", \
531
+ "twelve", \
532
+ "twenty", \
533
+ "two", \
534
+ "un", \
535
+ "under", \
536
+ "until", \
537
+ "up", \
538
+ "upon", \
539
+ "us", \
540
+ "very", \
541
+ "via", \
542
+ "was", \
543
+ "we", \
544
+ "well", \
545
+ "were", \
546
+ "what", \
547
+ "whatever", \
548
+ "when", \
549
+ "whence", \
550
+ "whenever", \
551
+ "where", \
552
+ "whereafter", \
553
+ "whereas", \
554
+ "whereby", \
555
+ "wherein", \
556
+ "whereupon", \
557
+ "wherever", \
558
+ "whether", \
559
+ "which", \
560
+ "while", \
561
+ "whither", \
562
+ "who", \
563
+ "whoever", \
564
+ "whole", \
565
+ "whom", \
566
+ "whose", \
567
+ "why", \
568
+ "will", \
569
+ "with", \
570
+ "within", \
571
+ "without", \
572
+ "would", \
573
+ "yet", \
574
+ "you", \
575
+ "your", \
576
+ "yours", \
577
+ "yourself", \
578
+ "yourselves" )
579
+
580
+ attr_accessor :stopwords
581
+ end
582
+
583
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: summa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - HyLiter.org