summa 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +2 -0
- data/lib/FrequencyAnalyzer.rb +37 -0
- data/lib/helloworld.rb +5 -0
- data/lib/summa.rb +11 -83
- metadata +3 -1
data/Manifest.txt
CHANGED
@@ -0,0 +1,37 @@
|
|
1
|
+
class FrequencyAnalyzer
|
2
|
+
def initialize(doctext)
|
3
|
+
@freqCount = {}
|
4
|
+
@doctext = doctext
|
5
|
+
@stopwords = SummaData.stopwords
|
6
|
+
@mean = 0
|
7
|
+
@keywords = [];
|
8
|
+
|
9
|
+
doctext.each { |word|
|
10
|
+
if word != nil
|
11
|
+
word.removePunctuation!
|
12
|
+
if !@stopwords.include?(word.downcase)
|
13
|
+
#stemmed = word.stem
|
14
|
+
if @freqCount.has_key?(word)
|
15
|
+
@freqCount[word] = @freqCount[word] + 1
|
16
|
+
else
|
17
|
+
@freqCount[word] = 1
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
}
|
22
|
+
@freqCount.each {|key, value| puts "#{key} has #{value}" }
|
23
|
+
|
24
|
+
sum = 0
|
25
|
+
count = 0
|
26
|
+
keys = @freqCount.keys
|
27
|
+
for i in 0..keys.length
|
28
|
+
if keys[i] != nil
|
29
|
+
sum = sum + @freqCount[keys[i]]
|
30
|
+
count = count + 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
@mean = sum/count
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
data/lib/helloworld.rb
ADDED
data/lib/summa.rb
CHANGED
@@ -2,12 +2,12 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
2
2
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
3
|
|
4
4
|
module Summa
|
5
|
-
VERSION = '0.0.
|
5
|
+
VERSION = '0.0.5'
|
6
6
|
end
|
7
7
|
|
8
8
|
class String
|
9
9
|
def summarize
|
10
|
-
puts "
|
10
|
+
puts "Still more testing!... of Summa #{VERSION}"
|
11
11
|
end
|
12
12
|
|
13
13
|
def removePunctuation!
|
@@ -23,9 +23,9 @@ class PheremoneAnalysis
|
|
23
23
|
end
|
24
24
|
@document = document
|
25
25
|
@keywords = keywords
|
26
|
-
@sigma =
|
26
|
+
@sigma = SummaData.sigma
|
27
27
|
@sigma_sq = @sigma * @sigma
|
28
|
-
@threshold =
|
28
|
+
@threshold = SummaData.threshold
|
29
29
|
@output = ""
|
30
30
|
end
|
31
31
|
|
@@ -150,84 +150,6 @@ class PheremoneAnalysis
|
|
150
150
|
end
|
151
151
|
|
152
152
|
|
153
|
-
class FrequencyAnalyzer
|
154
|
-
def initialize(document,stopWordsDoc)
|
155
|
-
@freqCount = {}
|
156
|
-
@document = document
|
157
|
-
@stopWords = stopWordsDoc
|
158
|
-
@mean = 0
|
159
|
-
@keywords = [];
|
160
|
-
|
161
|
-
for i in 0..@document.docArray.length
|
162
|
-
word = @document.docArray[i]
|
163
|
-
if word != nil
|
164
|
-
#word = CGWordOps.removePunctuation(word)
|
165
|
-
word.delete!(".,;:()?!\"")
|
166
|
-
if !@stopWords.docArray.include?(word.downcase)
|
167
|
-
#stemmed = word.stem
|
168
|
-
if @freqCount.has_key?(word)
|
169
|
-
@freqCount[word] =
|
170
|
-
@freqCount[word] + 1
|
171
|
-
else
|
172
|
-
@freqCount[word] = 1
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
#@freqCount.each {|key, value| puts "#{key} is #{value}" }
|
179
|
-
|
180
|
-
sum = 0
|
181
|
-
count = 0
|
182
|
-
keys = @freqCount.keys
|
183
|
-
for i in 0..keys.length
|
184
|
-
if keys[i] != nil
|
185
|
-
sum = sum + @freqCount[keys[i]]
|
186
|
-
count = count + 1
|
187
|
-
end
|
188
|
-
end
|
189
|
-
|
190
|
-
@mean = sum/count
|
191
|
-
end
|
192
|
-
|
193
|
-
def analyze(k=3)
|
194
|
-
@keywords = [];
|
195
|
-
|
196
|
-
keys = @freqCount.keys
|
197
|
-
for i in 0..keys.length
|
198
|
-
if keys[i] != nil
|
199
|
-
value = @freqCount[keys[i]]
|
200
|
-
if value > k * @mean && keys[i] != ""
|
201
|
-
@keywords << keys[i]
|
202
|
-
end
|
203
|
-
end
|
204
|
-
end
|
205
|
-
@keywords
|
206
|
-
end
|
207
|
-
attr_accessor :freqCount, :keywords
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
class CGWordOps
|
212
|
-
def self.removePunctuation(word)
|
213
|
-
output = word;
|
214
|
-
output = output.delete(".")
|
215
|
-
output = output.delete(",")
|
216
|
-
output = output.delete("--")
|
217
|
-
output = output.delete(";")
|
218
|
-
output = output.delete(":")
|
219
|
-
output = output.delete("(")
|
220
|
-
output = output.delete(")")
|
221
|
-
output = output.delete("[")
|
222
|
-
output = output.delete("]")
|
223
|
-
output = output.delete("?")
|
224
|
-
output = output.delete("!")
|
225
|
-
output = output.delete("\"")
|
226
|
-
output
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
|
231
153
|
class CGDocument
|
232
154
|
def initialize(docName)
|
233
155
|
@docName = docName
|
@@ -257,6 +179,8 @@ end
|
|
257
179
|
|
258
180
|
class SummaData
|
259
181
|
|
182
|
+
@@threshold = 0.8
|
183
|
+
@@sigma = 16
|
260
184
|
@@stopwords = Array.[]("a", \
|
261
185
|
"about", \
|
262
186
|
"above", \
|
@@ -577,7 +501,11 @@ class SummaData
|
|
577
501
|
"yourself", \
|
578
502
|
"yourselves" )
|
579
503
|
|
580
|
-
|
504
|
+
def SummaData.stopwords
|
505
|
+
@@stopwords
|
506
|
+
end
|
507
|
+
|
508
|
+
|
581
509
|
end
|
582
510
|
|
583
511
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: summa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- HyLiter.org
|
@@ -42,6 +42,8 @@ files:
|
|
42
42
|
- README.rdoc
|
43
43
|
- Rakefile
|
44
44
|
- lib/summa.rb
|
45
|
+
- lib/helloworld.rb
|
46
|
+
- lib/FrequencyAnalyzer.rb
|
45
47
|
- script/console
|
46
48
|
- script/destroy
|
47
49
|
- script/generate
|