summa 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -5,6 +5,8 @@ README.rdoc
5
5
  Rakefile
6
6
  lib/summa.rb
7
7
  lib/FrequencyAnalyzer.rb
8
+ lib/SummaUtils.rb
9
+ lib/SummaData.rb
8
10
  script/console
9
11
  script/destroy
10
12
  script/generate
data/lib/SummaData.rb ADDED
@@ -0,0 +1,343 @@
1
+ class SummaData
2
+
3
+ @@threshold = 0.8
4
+ @@sigma = 16
5
+
6
+ @@stopwords = Array.[]("a", \
7
+ "about", \
8
+ "above", \
9
+ "across", \
10
+ "after", \
11
+ "afterwards", \
12
+ "again", \
13
+ "against", \
14
+ "all", \
15
+ "almost", \
16
+ "alone", \
17
+ "along", \
18
+ "already", \
19
+ "also", \
20
+ "although", \
21
+ "always", \
22
+ "am", \
23
+ "among", \
24
+ "amongst", \
25
+ "amoungst", \
26
+ "amount", \
27
+ "an", \
28
+ "and", \
29
+ "another", \
30
+ "any", \
31
+ "anyhow", \
32
+ "anyone", \
33
+ "anything", \
34
+ "anyway", \
35
+ "anywhere", \
36
+ "are", \
37
+ "around", \
38
+ "as", \
39
+ "at", \
40
+ "back", \
41
+ "be", \
42
+ "became", \
43
+ "because", \
44
+ "become", \
45
+ "becomes", \
46
+ "becoming", \
47
+ "been", \
48
+ "before", \
49
+ "beforehand", \
50
+ "behind", \
51
+ "being", \
52
+ "below", \
53
+ "beside", \
54
+ "besides", \
55
+ "between", \
56
+ "beyond", \
57
+ "bill", \
58
+ "both", \
59
+ "bottom", \
60
+ "but", \
61
+ "by", \
62
+ "call", \
63
+ "can", \
64
+ "cannot", \
65
+ "cant", \
66
+ "co", \
67
+ "computer", \
68
+ "con", \
69
+ "could", \
70
+ "couldnt", \
71
+ "cry", \
72
+ "de", \
73
+ "describe", \
74
+ "detail", \
75
+ "do", \
76
+ "done", \
77
+ "down", \
78
+ "due", \
79
+ "during", \
80
+ "each", \
81
+ "eg", \
82
+ "eight", \
83
+ "either", \
84
+ "eleven", \
85
+ "else", \
86
+ "elsewhere", \
87
+ "empty", \
88
+ "enough", \
89
+ "etc", \
90
+ "even", \
91
+ "ever", \
92
+ "every", \
93
+ "everyone", \
94
+ "everything", \
95
+ "everywhere", \
96
+ "except", \
97
+ "few", \
98
+ "fifteen", \
99
+ "fify", \
100
+ "fill", \
101
+ "find", \
102
+ "fire", \
103
+ "first", \
104
+ "five", \
105
+ "for", \
106
+ "former", \
107
+ "formerly", \
108
+ "forty", \
109
+ "found", \
110
+ "four", \
111
+ "from", \
112
+ "front", \
113
+ "full", \
114
+ "further", \
115
+ "get", \
116
+ "give", \
117
+ "go", \
118
+ "had", \
119
+ "has", \
120
+ "hasnt", \
121
+ "have", \
122
+ "he", \
123
+ "hence", \
124
+ "her", \
125
+ "here", \
126
+ "hereafter", \
127
+ "hereby", \
128
+ "herein", \
129
+ "hereupon", \
130
+ "hers", \
131
+ "herself", \
132
+ "him", \
133
+ "himself", \
134
+ "his", \
135
+ "how", \
136
+ "however", \
137
+ "hundred", \
138
+ "i", \
139
+ "ie", \
140
+ "if", \
141
+ "in", \
142
+ "inc", \
143
+ "indeed", \
144
+ "interest", \
145
+ "into", \
146
+ "is", \
147
+ "it", \
148
+ "its", \
149
+ "itself", \
150
+ "keep", \
151
+ "last", \
152
+ "latter", \
153
+ "latterly", \
154
+ "least", \
155
+ "less", \
156
+ "ltd", \
157
+ "made", \
158
+ "many", \
159
+ "may", \
160
+ "me", \
161
+ "meanwhile", \
162
+ "might", \
163
+ "mill", \
164
+ "mine", \
165
+ "more", \
166
+ "moreover", \
167
+ "most", \
168
+ "mostly", \
169
+ "move", \
170
+ "much", \
171
+ "must", \
172
+ "my", \
173
+ "myself", \
174
+ "name", \
175
+ "namely", \
176
+ "neither", \
177
+ "never", \
178
+ "nevertheless", \
179
+ "next", \
180
+ "nine", \
181
+ "no", \
182
+ "nobody", \
183
+ "none", \
184
+ "noone", \
185
+ "nor", \
186
+ "not", \
187
+ "nothing", \
188
+ "now", \
189
+ "nowhere", \
190
+ "of", \
191
+ "off", \
192
+ "often", \
193
+ "on", \
194
+ "once", \
195
+ "one", \
196
+ "only", \
197
+ "onto", \
198
+ "or", \
199
+ "other", \
200
+ "others", \
201
+ "otherwise", \
202
+ "our", \
203
+ "ours", \
204
+ "ourselves", \
205
+ "out", \
206
+ "over", \
207
+ "own", \
208
+ "part", \
209
+ "per", \
210
+ "perhaps", \
211
+ "please", \
212
+ "put", \
213
+ "rather", \
214
+ "re", \
215
+ "same", \
216
+ "see", \
217
+ "seem", \
218
+ "seemed", \
219
+ "seeming", \
220
+ "seems", \
221
+ "serious", \
222
+ "several", \
223
+ "she", \
224
+ "should", \
225
+ "show", \
226
+ "side", \
227
+ "since", \
228
+ "sincere", \
229
+ "six", \
230
+ "sixty", \
231
+ "so", \
232
+ "some", \
233
+ "somehow", \
234
+ "someone", \
235
+ "something", \
236
+ "sometime", \
237
+ "sometimes", \
238
+ "somewhere", \
239
+ "still", \
240
+ "such", \
241
+ "system", \
242
+ "take", \
243
+ "ten", \
244
+ "than", \
245
+ "that", \
246
+ "the", \
247
+ "their", \
248
+ "them", \
249
+ "themselves", \
250
+ "then", \
251
+ "thence", \
252
+ "there", \
253
+ "thereafter", \
254
+ "thereby", \
255
+ "therefore", \
256
+ "therein", \
257
+ "thereupon", \
258
+ "these", \
259
+ "they", \
260
+ "thick", \
261
+ "thin", \
262
+ "third", \
263
+ "this", \
264
+ "those", \
265
+ "though", \
266
+ "three", \
267
+ "through", \
268
+ "throughout", \
269
+ "thru", \
270
+ "thus", \
271
+ "to", \
272
+ "together", \
273
+ "too", \
274
+ "top", \
275
+ "toward", \
276
+ "towards", \
277
+ "twelve", \
278
+ "twenty", \
279
+ "two", \
280
+ "un", \
281
+ "under", \
282
+ "until", \
283
+ "up", \
284
+ "upon", \
285
+ "us", \
286
+ "very", \
287
+ "via", \
288
+ "was", \
289
+ "we", \
290
+ "well", \
291
+ "were", \
292
+ "what", \
293
+ "whatever", \
294
+ "when", \
295
+ "whence", \
296
+ "whenever", \
297
+ "where", \
298
+ "whereafter", \
299
+ "whereas", \
300
+ "whereby", \
301
+ "wherein", \
302
+ "whereupon", \
303
+ "wherever", \
304
+ "whether", \
305
+ "which", \
306
+ "while", \
307
+ "whither", \
308
+ "who", \
309
+ "whoever", \
310
+ "whole", \
311
+ "whom", \
312
+ "whose", \
313
+ "why", \
314
+ "will", \
315
+ "with", \
316
+ "within", \
317
+ "without", \
318
+ "would", \
319
+ "yet", \
320
+ "you", \
321
+ "your", \
322
+ "yours", \
323
+ "yourself", \
324
+ "yourselves" )
325
+
326
+ # Need to manually put in the accessors if they are class methods
327
+ def SummaData.stopwords
328
+ @@stopwords
329
+ end
330
+
331
+ def SummaData.sigma
332
+ @@sigma
333
+ end
334
+
335
+ def SummaData.threshold
336
+ @@threshold
337
+ end
338
+
339
+
340
+
341
+ end
342
+ #end of SummaData class
343
+
data/lib/SummaUtils.rb ADDED
@@ -0,0 +1,49 @@
1
+ class String
2
+ def freqkeys
3
+ @freqCount = {}
4
+ @stopwords = SummaData.stopwords
5
+ @mean = 0
6
+ @keywords = [];
7
+
8
+ self.each(' ') { |word|
9
+ if word != nil
10
+ word.removePunctuation!
11
+ word.delete!(' ')
12
+ if !@stopwords.include?(word.downcase)
13
+ #stemmed = word.stem
14
+ if @freqCount.has_key?(word)
15
+ @freqCount[word] = @freqCount[word] + 1
16
+ else
17
+ @freqCount[word] = 1
18
+ end
19
+ end
20
+ end
21
+ }
22
+
23
+ sum = 0
24
+ count = 0
25
+ keys = @freqCount.keys
26
+ for i in 0..keys.length
27
+ if keys[i] != nil
28
+ sum = sum + @freqCount[keys[i]]
29
+ count = count + 1
30
+ end
31
+ end
32
+
33
+ @mean = sum/count
34
+
35
+ keys = @freqCount.keys
36
+ # p keys
37
+ for i in 0..keys.length
38
+ if keys[i] != nil
39
+ value = @freqCount[keys[i]]
40
+ if value > 2 * @mean && keys[i] != ""
41
+ @keywords << keys[i]
42
+ # p @keywords
43
+ end
44
+ end
45
+ end
46
+ @keywords
47
+ end
48
+
49
+ end #class String (extensions)
data/lib/summa.rb CHANGED
@@ -3,14 +3,16 @@ $:.unshift(File.dirname(__FILE__)) unless
3
3
 
4
4
 
5
5
  require 'FrequencyAnalyzer'
6
+ require 'SummaData'
7
+ require 'SummaUtils'
6
8
 
7
9
  module Summa
8
- VERSION = '0.0.8'
10
+ VERSION = '0.0.9'
9
11
  end
10
12
 
11
13
  class String
12
14
  def summarize
13
- puts "Testing version 0.0.8 "
15
+ puts "Testing version 0.0.9 "
14
16
  end
15
17
 
16
18
  def removePunctuation!
@@ -195,346 +197,3 @@ class CGDocument
195
197
 
196
198
  end
197
199
 
198
-
199
- class SummaData
200
-
201
- @@threshold = 0.8
202
- @@sigma = 16
203
- @@stopwords = Array.[]("a", \
204
- "about", \
205
- "above", \
206
- "across", \
207
- "after", \
208
- "afterwards", \
209
- "again", \
210
- "against", \
211
- "all", \
212
- "almost", \
213
- "alone", \
214
- "along", \
215
- "already", \
216
- "also", \
217
- "although", \
218
- "always", \
219
- "am", \
220
- "among", \
221
- "amongst", \
222
- "amoungst", \
223
- "amount", \
224
- "an", \
225
- "and", \
226
- "another", \
227
- "any", \
228
- "anyhow", \
229
- "anyone", \
230
- "anything", \
231
- "anyway", \
232
- "anywhere", \
233
- "are", \
234
- "around", \
235
- "as", \
236
- "at", \
237
- "back", \
238
- "be", \
239
- "became", \
240
- "because", \
241
- "become", \
242
- "becomes", \
243
- "becoming", \
244
- "been", \
245
- "before", \
246
- "beforehand", \
247
- "behind", \
248
- "being", \
249
- "below", \
250
- "beside", \
251
- "besides", \
252
- "between", \
253
- "beyond", \
254
- "bill", \
255
- "both", \
256
- "bottom", \
257
- "but", \
258
- "by", \
259
- "call", \
260
- "can", \
261
- "cannot", \
262
- "cant", \
263
- "co", \
264
- "computer", \
265
- "con", \
266
- "could", \
267
- "couldnt", \
268
- "cry", \
269
- "de", \
270
- "describe", \
271
- "detail", \
272
- "do", \
273
- "done", \
274
- "down", \
275
- "due", \
276
- "during", \
277
- "each", \
278
- "eg", \
279
- "eight", \
280
- "either", \
281
- "eleven", \
282
- "else", \
283
- "elsewhere", \
284
- "empty", \
285
- "enough", \
286
- "etc", \
287
- "even", \
288
- "ever", \
289
- "every", \
290
- "everyone", \
291
- "everything", \
292
- "everywhere", \
293
- "except", \
294
- "few", \
295
- "fifteen", \
296
- "fify", \
297
- "fill", \
298
- "find", \
299
- "fire", \
300
- "first", \
301
- "five", \
302
- "for", \
303
- "former", \
304
- "formerly", \
305
- "forty", \
306
- "found", \
307
- "four", \
308
- "from", \
309
- "front", \
310
- "full", \
311
- "further", \
312
- "get", \
313
- "give", \
314
- "go", \
315
- "had", \
316
- "has", \
317
- "hasnt", \
318
- "have", \
319
- "he", \
320
- "hence", \
321
- "her", \
322
- "here", \
323
- "hereafter", \
324
- "hereby", \
325
- "herein", \
326
- "hereupon", \
327
- "hers", \
328
- "herself", \
329
- "him", \
330
- "himself", \
331
- "his", \
332
- "how", \
333
- "however", \
334
- "hundred", \
335
- "i", \
336
- "ie", \
337
- "if", \
338
- "in", \
339
- "inc", \
340
- "indeed", \
341
- "interest", \
342
- "into", \
343
- "is", \
344
- "it", \
345
- "its", \
346
- "itself", \
347
- "keep", \
348
- "last", \
349
- "latter", \
350
- "latterly", \
351
- "least", \
352
- "less", \
353
- "ltd", \
354
- "made", \
355
- "many", \
356
- "may", \
357
- "me", \
358
- "meanwhile", \
359
- "might", \
360
- "mill", \
361
- "mine", \
362
- "more", \
363
- "moreover", \
364
- "most", \
365
- "mostly", \
366
- "move", \
367
- "much", \
368
- "must", \
369
- "my", \
370
- "myself", \
371
- "name", \
372
- "namely", \
373
- "neither", \
374
- "never", \
375
- "nevertheless", \
376
- "next", \
377
- "nine", \
378
- "no", \
379
- "nobody", \
380
- "none", \
381
- "noone", \
382
- "nor", \
383
- "not", \
384
- "nothing", \
385
- "now", \
386
- "nowhere", \
387
- "of", \
388
- "off", \
389
- "often", \
390
- "on", \
391
- "once", \
392
- "one", \
393
- "only", \
394
- "onto", \
395
- "or", \
396
- "other", \
397
- "others", \
398
- "otherwise", \
399
- "our", \
400
- "ours", \
401
- "ourselves", \
402
- "out", \
403
- "over", \
404
- "own", \
405
- "part", \
406
- "per", \
407
- "perhaps", \
408
- "please", \
409
- "put", \
410
- "rather", \
411
- "re", \
412
- "same", \
413
- "see", \
414
- "seem", \
415
- "seemed", \
416
- "seeming", \
417
- "seems", \
418
- "serious", \
419
- "several", \
420
- "she", \
421
- "should", \
422
- "show", \
423
- "side", \
424
- "since", \
425
- "sincere", \
426
- "six", \
427
- "sixty", \
428
- "so", \
429
- "some", \
430
- "somehow", \
431
- "someone", \
432
- "something", \
433
- "sometime", \
434
- "sometimes", \
435
- "somewhere", \
436
- "still", \
437
- "such", \
438
- "system", \
439
- "take", \
440
- "ten", \
441
- "than", \
442
- "that", \
443
- "the", \
444
- "their", \
445
- "them", \
446
- "themselves", \
447
- "then", \
448
- "thence", \
449
- "there", \
450
- "thereafter", \
451
- "thereby", \
452
- "therefore", \
453
- "therein", \
454
- "thereupon", \
455
- "these", \
456
- "they", \
457
- "thick", \
458
- "thin", \
459
- "third", \
460
- "this", \
461
- "those", \
462
- "though", \
463
- "three", \
464
- "through", \
465
- "throughout", \
466
- "thru", \
467
- "thus", \
468
- "to", \
469
- "together", \
470
- "too", \
471
- "top", \
472
- "toward", \
473
- "towards", \
474
- "twelve", \
475
- "twenty", \
476
- "two", \
477
- "un", \
478
- "under", \
479
- "until", \
480
- "up", \
481
- "upon", \
482
- "us", \
483
- "very", \
484
- "via", \
485
- "was", \
486
- "we", \
487
- "well", \
488
- "were", \
489
- "what", \
490
- "whatever", \
491
- "when", \
492
- "whence", \
493
- "whenever", \
494
- "where", \
495
- "whereafter", \
496
- "whereas", \
497
- "whereby", \
498
- "wherein", \
499
- "whereupon", \
500
- "wherever", \
501
- "whether", \
502
- "which", \
503
- "while", \
504
- "whither", \
505
- "who", \
506
- "whoever", \
507
- "whole", \
508
- "whom", \
509
- "whose", \
510
- "why", \
511
- "will", \
512
- "with", \
513
- "within", \
514
- "without", \
515
- "would", \
516
- "yet", \
517
- "you", \
518
- "your", \
519
- "yours", \
520
- "yourself", \
521
- "yourselves" )
522
-
523
- # Need to manually put in the accessors if they are class methods!
524
- def SummaData.stopwords
525
- @@stopwords
526
- end
527
-
528
- def SummaData.sigma
529
- @@sigma
530
- end
531
-
532
- def SummaData.threshold
533
- @@threshold
534
- end
535
-
536
-
537
-
538
- end
539
- #end of SummaData class
540
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: summa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - HyLiter.org
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-22 00:00:00 -07:00
12
+ date: 2009-09-24 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -43,6 +43,8 @@ files:
43
43
  - Rakefile
44
44
  - lib/summa.rb
45
45
  - lib/FrequencyAnalyzer.rb
46
+ - lib/SummaUtils.rb
47
+ - lib/SummaData.rb
46
48
  - script/console
47
49
  - script/destroy
48
50
  - script/generate