summa 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +11 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +46 -0
- data/Rakefile +27 -0
- data/lib/summa.rb +148 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_summa.rb +11 -0
- metadata +81 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
= summa
|
2
|
+
|
3
|
+
* http://github.com/#{github_username}/#{project_name}
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Auto-generate keywords from text, and auto-generate text summaries
|
8
|
+
from articles.
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
Contains newgem dependency.
|
13
|
+
|
14
|
+
== SYNOPSIS:
|
15
|
+
|
16
|
+
document.summarize
|
17
|
+
"String".stem
|
18
|
+
|
19
|
+
== REQUIREMENTS:
|
20
|
+
|
21
|
+
* FIX (list of requirements)
|
22
|
+
|
23
|
+
== INSTALL:
|
24
|
+
|
25
|
+
sudo gem install summa
|
26
|
+
|
27
|
+
== LICENSE:
|
28
|
+
|
29
|
+
Copyright 2009-2010 HyLiter.org
|
30
|
+
|
31
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
32
|
+
a copy of this software and associated documentation files (the
|
33
|
+
'Software'), to deal in the Software without restriction, including
|
34
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
35
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
36
|
+
permit persons to whom the Software is furnished to do so as well,
|
37
|
+
with no restrtions whatsoever, either explicit or implied.
|
38
|
+
|
39
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
40
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
41
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
42
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
43
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
44
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
45
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
46
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'hoe', '>= 2.1.0'
|
3
|
+
require 'hoe'
|
4
|
+
require 'fileutils'
|
5
|
+
require './lib/summa'
|
6
|
+
|
7
|
+
Hoe.plugin :newgem
|
8
|
+
# Hoe.plugin :website
|
9
|
+
# Hoe.plugin :cucumberfeatures
|
10
|
+
|
11
|
+
# Generate all the Rake tasks
|
12
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
+
$hoe = Hoe.spec 'summa' do
|
14
|
+
self.developer 'HyLiter.org', 'HyLiter.org@gmail.com'
|
15
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
16
|
+
self.rubyforge_name = 'summa'
|
17
|
+
# self.extra_deps = [['activesupport','>= 2.0.2']]
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'newgem/tasks'
|
22
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
23
|
+
|
24
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
25
|
+
# remove_task :default
|
26
|
+
# task :default => [:spec, :features]
|
27
|
+
|
data/lib/summa.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
module Summa
|
5
|
+
VERSION = '0.0.1'
|
6
|
+
end
|
7
|
+
|
8
|
+
class String
|
9
|
+
def stem
|
10
|
+
puts "Just testing!"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class PheremoneAnalysis
|
15
|
+
def initialize(document, keywords, constants)
|
16
|
+
@pheremones = Array.new(document.wordArray.length())
|
17
|
+
for i in 0...document.wordArray.length
|
18
|
+
@pheremones[i] = 0
|
19
|
+
end
|
20
|
+
@document = document
|
21
|
+
@keywords = keywords
|
22
|
+
@sigma = constants.sigma
|
23
|
+
@sigma_sq = @sigma * @sigma
|
24
|
+
@threshold = constants.threshold
|
25
|
+
@output = ""
|
26
|
+
end
|
27
|
+
|
28
|
+
def analyze()
|
29
|
+
front = 1/(@sigma * Math.sqrt(2 * Math::PI))
|
30
|
+
|
31
|
+
for i in 0 ... @document.wordArray.length
|
32
|
+
for j in 0 ... @keywords.wordArray.length
|
33
|
+
if @document.docArray[i][@keywords.wordArray[j]] != nil
|
34
|
+
for pos in 0 ... @document.wordArray.length
|
35
|
+
temp = front * Math.exp(-((i-pos)*(i-pos))/(2*@sigma_sq))
|
36
|
+
@pheremones[pos] = @pheremones[pos] + temp
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
max_value = -1;
|
43
|
+
for i in 0 ... @pheremones.length
|
44
|
+
if max_value < @pheremones[i]
|
45
|
+
max_value = @pheremones[i];
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
for i in 0 ... @pheremones.length
|
50
|
+
@pheremones[i] /= max_value;
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def summarize()
|
55
|
+
@output = ""
|
56
|
+
inRegion = false
|
57
|
+
for i in 0 ... @pheremones.length
|
58
|
+
if @pheremones[i] >= @threshold
|
59
|
+
if inRegion
|
60
|
+
@output += @document.docArray[i];
|
61
|
+
@output += " ";
|
62
|
+
else
|
63
|
+
inRegion = true
|
64
|
+
@output += findStartOfSentence(i)
|
65
|
+
@output += @document.docArray[i];
|
66
|
+
@output += " "
|
67
|
+
end
|
68
|
+
else
|
69
|
+
if inRegion
|
70
|
+
inRegion = false
|
71
|
+
@output += findEndOfSentence(i-1)
|
72
|
+
@output += " "
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
@output
|
77
|
+
end
|
78
|
+
|
79
|
+
def findStartOfSentence(i)
|
80
|
+
index = i
|
81
|
+
startIndex = 0
|
82
|
+
local_output = ""
|
83
|
+
while(index > 0)
|
84
|
+
word = @document.docArray[index]
|
85
|
+
if(word != "Mr." && word != "Mrs." && word != "Dr." &&
|
86
|
+
word != "U.S." && word != "Jan." &&
|
87
|
+
word != "Feb." && word != "Mar." &&
|
88
|
+
word != "Apr." && word != "May." &&
|
89
|
+
word != "Jun." && word != "Jul." &&
|
90
|
+
word != "Aug." && word != "Sep." &&
|
91
|
+
word != "Oct." && word != "Nov." &&
|
92
|
+
word != "Dec." && word != "Sept." &&
|
93
|
+
word != "Lt." && word != "Maj." &&
|
94
|
+
word != "Col.")
|
95
|
+
c = word[word.length - 1]
|
96
|
+
if(c == "."[0] || c == ";"[0] || c == ":"[0])
|
97
|
+
startIndex = index + 1
|
98
|
+
break;
|
99
|
+
end
|
100
|
+
end
|
101
|
+
index = index - 1
|
102
|
+
end
|
103
|
+
|
104
|
+
for j in startIndex ... i
|
105
|
+
local_output += @document.docArray[j]
|
106
|
+
local_output += " "
|
107
|
+
end
|
108
|
+
local_output
|
109
|
+
end
|
110
|
+
|
111
|
+
def findEndOfSentence(i)
|
112
|
+
endIndex = @document.docArray.length
|
113
|
+
index = i
|
114
|
+
local_output = ""
|
115
|
+
while(index < @document.docArray.length)
|
116
|
+
word = @document.docArray[index]
|
117
|
+
if(word != "Mr." && word != "Mrs." && word != "Dr." &&
|
118
|
+
word != "U.S." && word != "Jan." &&
|
119
|
+
word != "Feb." && word != "Mar." &&
|
120
|
+
word != "Apr." && word != "May." &&
|
121
|
+
word != "Jun." && word != "Jul." &&
|
122
|
+
word != "Aug." && word != "Sep." &&
|
123
|
+
word != "Oct." && word != "Nov." &&
|
124
|
+
word != "Dec." && word != "Sept." &&
|
125
|
+
word != "Lt." && word != "Maj." &&
|
126
|
+
word != "Col.")
|
127
|
+
c = word[word.length - 1]
|
128
|
+
if(c == "."[0] || c == ";"[0] || c == ":"[0])
|
129
|
+
endIndex = index
|
130
|
+
break;
|
131
|
+
end
|
132
|
+
end
|
133
|
+
index = index + 1
|
134
|
+
end
|
135
|
+
|
136
|
+
if endIndex != i
|
137
|
+
for j in i+1 ... (endIndex + 1)
|
138
|
+
local_output += @document.docArray[j]
|
139
|
+
local_output += " "
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
local_output
|
144
|
+
end
|
145
|
+
attr_accessor :pheremones, :documet, :keywords, :sigma, :threshold, :output
|
146
|
+
end
|
147
|
+
|
148
|
+
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/summa.rb'}"
|
9
|
+
puts "Loading summa gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/test/test_helper.rb
ADDED
data/test/test_summa.rb
ADDED
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: summa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- HyLiter.org
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-09-21 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hoe
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 2.3.3
|
24
|
+
version:
|
25
|
+
description: |-
|
26
|
+
Auto-generate keywords from text, and auto-generate text summaries
|
27
|
+
from articles.
|
28
|
+
email:
|
29
|
+
- HyLiter.org@gmail.com
|
30
|
+
executables: []
|
31
|
+
|
32
|
+
extensions: []
|
33
|
+
|
34
|
+
extra_rdoc_files:
|
35
|
+
- History.txt
|
36
|
+
- Manifest.txt
|
37
|
+
- PostInstall.txt
|
38
|
+
files:
|
39
|
+
- History.txt
|
40
|
+
- Manifest.txt
|
41
|
+
- PostInstall.txt
|
42
|
+
- README.rdoc
|
43
|
+
- Rakefile
|
44
|
+
- lib/summa.rb
|
45
|
+
- script/console
|
46
|
+
- script/destroy
|
47
|
+
- script/generate
|
48
|
+
- test/test_helper.rb
|
49
|
+
- test/test_summa.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/#{github_username}/#{project_name}
|
52
|
+
licenses: []
|
53
|
+
|
54
|
+
post_install_message: PostInstall.txt
|
55
|
+
rdoc_options:
|
56
|
+
- --main
|
57
|
+
- README.rdoc
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project: summa
|
75
|
+
rubygems_version: 1.3.5
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Auto-generate keywords from text, and auto-generate text summaries from articles.
|
79
|
+
test_files:
|
80
|
+
- test/test_helper.rb
|
81
|
+
- test/test_summa.rb
|