summa 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +2 -0
- data/lib/FrequencyAnalyzer.rb +37 -0
- data/lib/helloworld.rb +5 -0
- data/lib/summa.rb +11 -83
- metadata +3 -1
data/Manifest.txt
CHANGED
@@ -0,0 +1,37 @@
|
|
1
|
+
class FrequencyAnalyzer
|
2
|
+
def initialize(doctext)
|
3
|
+
@freqCount = {}
|
4
|
+
@doctext = doctext
|
5
|
+
@stopwords = SummaData.stopwords
|
6
|
+
@mean = 0
|
7
|
+
@keywords = [];
|
8
|
+
|
9
|
+
doctext.each { |word|
|
10
|
+
if word != nil
|
11
|
+
word.removePunctuation!
|
12
|
+
if !@stopwords.include?(word.downcase)
|
13
|
+
#stemmed = word.stem
|
14
|
+
if @freqCount.has_key?(word)
|
15
|
+
@freqCount[word] = @freqCount[word] + 1
|
16
|
+
else
|
17
|
+
@freqCount[word] = 1
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
}
|
22
|
+
@freqCount.each {|key, value| puts "#{key} has #{value}" }
|
23
|
+
|
24
|
+
sum = 0
|
25
|
+
count = 0
|
26
|
+
keys = @freqCount.keys
|
27
|
+
for i in 0..keys.length
|
28
|
+
if keys[i] != nil
|
29
|
+
sum = sum + @freqCount[keys[i]]
|
30
|
+
count = count + 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
@mean = sum/count
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
data/lib/helloworld.rb
ADDED
data/lib/summa.rb
CHANGED
@@ -2,12 +2,12 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
2
2
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
3
|
|
4
4
|
module Summa
|
5
|
-
VERSION = '0.0.
|
5
|
+
VERSION = '0.0.5'
|
6
6
|
end
|
7
7
|
|
8
8
|
class String
|
9
9
|
def summarize
|
10
|
-
puts "
|
10
|
+
puts "Still more testing!... of Summa #{VERSION}"
|
11
11
|
end
|
12
12
|
|
13
13
|
def removePunctuation!
|
@@ -23,9 +23,9 @@ class PheremoneAnalysis
|
|
23
23
|
end
|
24
24
|
@document = document
|
25
25
|
@keywords = keywords
|
26
|
-
@sigma =
|
26
|
+
@sigma = SummaData.sigma
|
27
27
|
@sigma_sq = @sigma * @sigma
|
28
|
-
@threshold =
|
28
|
+
@threshold = SummaData.threshold
|
29
29
|
@output = ""
|
30
30
|
end
|
31
31
|
|
@@ -150,84 +150,6 @@ class PheremoneAnalysis
|
|
150
150
|
end
|
151
151
|
|
152
152
|
|
153
|
-
class FrequencyAnalyzer
|
154
|
-
def initialize(document,stopWordsDoc)
|
155
|
-
@freqCount = {}
|
156
|
-
@document = document
|
157
|
-
@stopWords = stopWordsDoc
|
158
|
-
@mean = 0
|
159
|
-
@keywords = [];
|
160
|
-
|
161
|
-
for i in 0..@document.docArray.length
|
162
|
-
word = @document.docArray[i]
|
163
|
-
if word != nil
|
164
|
-
#word = CGWordOps.removePunctuation(word)
|
165
|
-
word.delete!(".,;:()?!\"")
|
166
|
-
if !@stopWords.docArray.include?(word.downcase)
|
167
|
-
#stemmed = word.stem
|
168
|
-
if @freqCount.has_key?(word)
|
169
|
-
@freqCount[word] =
|
170
|
-
@freqCount[word] + 1
|
171
|
-
else
|
172
|
-
@freqCount[word] = 1
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
#@freqCount.each {|key, value| puts "#{key} is #{value}" }
|
179
|
-
|
180
|
-
sum = 0
|
181
|
-
count = 0
|
182
|
-
keys = @freqCount.keys
|
183
|
-
for i in 0..keys.length
|
184
|
-
if keys[i] != nil
|
185
|
-
sum = sum + @freqCount[keys[i]]
|
186
|
-
count = count + 1
|
187
|
-
end
|
188
|
-
end
|
189
|
-
|
190
|
-
@mean = sum/count
|
191
|
-
end
|
192
|
-
|
193
|
-
def analyze(k=3)
|
194
|
-
@keywords = [];
|
195
|
-
|
196
|
-
keys = @freqCount.keys
|
197
|
-
for i in 0..keys.length
|
198
|
-
if keys[i] != nil
|
199
|
-
value = @freqCount[keys[i]]
|
200
|
-
if value > k * @mean && keys[i] != ""
|
201
|
-
@keywords << keys[i]
|
202
|
-
end
|
203
|
-
end
|
204
|
-
end
|
205
|
-
@keywords
|
206
|
-
end
|
207
|
-
attr_accessor :freqCount, :keywords
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
class CGWordOps
|
212
|
-
def self.removePunctuation(word)
|
213
|
-
output = word;
|
214
|
-
output = output.delete(".")
|
215
|
-
output = output.delete(",")
|
216
|
-
output = output.delete("--")
|
217
|
-
output = output.delete(";")
|
218
|
-
output = output.delete(":")
|
219
|
-
output = output.delete("(")
|
220
|
-
output = output.delete(")")
|
221
|
-
output = output.delete("[")
|
222
|
-
output = output.delete("]")
|
223
|
-
output = output.delete("?")
|
224
|
-
output = output.delete("!")
|
225
|
-
output = output.delete("\"")
|
226
|
-
output
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
|
231
153
|
class CGDocument
|
232
154
|
def initialize(docName)
|
233
155
|
@docName = docName
|
@@ -257,6 +179,8 @@ end
|
|
257
179
|
|
258
180
|
class SummaData
|
259
181
|
|
182
|
+
@@threshold = 0.8
|
183
|
+
@@sigma = 16
|
260
184
|
@@stopwords = Array.[]("a", \
|
261
185
|
"about", \
|
262
186
|
"above", \
|
@@ -577,7 +501,11 @@ class SummaData
|
|
577
501
|
"yourself", \
|
578
502
|
"yourselves" )
|
579
503
|
|
580
|
-
|
504
|
+
def SummaData.stopwords
|
505
|
+
@@stopwords
|
506
|
+
end
|
507
|
+
|
508
|
+
|
581
509
|
end
|
582
510
|
|
583
511
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: summa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- HyLiter.org
|
@@ -42,6 +42,8 @@ files:
|
|
42
42
|
- README.rdoc
|
43
43
|
- Rakefile
|
44
44
|
- lib/summa.rb
|
45
|
+
- lib/helloworld.rb
|
46
|
+
- lib/FrequencyAnalyzer.rb
|
45
47
|
- script/console
|
46
48
|
- script/destroy
|
47
49
|
- script/generate
|