summa 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +2 -1
- data/lib/DocumentGraph.rb +94 -0
- data/lib/SummaUtils.rb +6 -1
- data/lib/summa.rb +3 -7
- metadata +3 -2
data/Manifest.txt
CHANGED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rgl/adjacency'
|
3
|
+
require 'rgl/dot'
|
4
|
+
require 'Matrix'
|
5
|
+
|
6
|
+
class CGDocumentGraph
|
7
|
+
|
8
|
+
def initialize(document, stopWordsDoc)
|
9
|
+
@document = document
|
10
|
+
@stopWords = stopWordsDoc
|
11
|
+
@adjacencyGraph = RGL::DirectedAdjacencyGraph.new()
|
12
|
+
@wordArray = []
|
13
|
+
|
14
|
+
#Write the non-stop words into a word array.
|
15
|
+
for i in 0 ... @document.docArray.length() - 1
|
16
|
+
word = @document.docArray[i]
|
17
|
+
if word != nil
|
18
|
+
word = CGWordOps.removePunctuation(word)
|
19
|
+
if !@stopWords.docArray.include?(word.downcase)
|
20
|
+
@wordArray << word
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
for i in 0 ... @wordArray.length() - 2
|
26
|
+
word1 = @wordArray[i]
|
27
|
+
word2 = @wordArray[i+1]
|
28
|
+
|
29
|
+
@adjacencyGraph.add_edge(word1, word2)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def buildKeywords(delta, thresh)
|
34
|
+
verts = @adjacencyGraph.vertices()
|
35
|
+
num_verts = @adjacencyGraph.size()
|
36
|
+
q_array = [];
|
37
|
+
for i in 0 .. (num_verts - 1)
|
38
|
+
w = verts[i]
|
39
|
+
n = @adjacencyGraph.out_degree(w)
|
40
|
+
if(n != 0)
|
41
|
+
q = Array.new(num_verts,0)
|
42
|
+
adjVerts = @adjacencyGraph.adjacent_vertices(w)
|
43
|
+
for j in 0 ... adjVerts.size() - 1
|
44
|
+
v = adjVerts[j]
|
45
|
+
index = verts.index(v)
|
46
|
+
if (index != nil)
|
47
|
+
q[index] = 1.0/n
|
48
|
+
end
|
49
|
+
end
|
50
|
+
q_array << q
|
51
|
+
else
|
52
|
+
q = Array.new(num_verts, 1.0/num_verts)
|
53
|
+
q_array << q
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
prM = Matrix.rows(q_array)
|
58
|
+
|
59
|
+
prArray = Array.new(num_verts, 1.0/num_verts);
|
60
|
+
pr = Matrix.columns([prArray])
|
61
|
+
|
62
|
+
deltaArray = Array.new(num_verts, (1.0-delta)/num_verts)
|
63
|
+
deltaM = Matrix.columns([deltaArray])
|
64
|
+
|
65
|
+
for i in 0 .. 10
|
66
|
+
pr = deltaM + delta * prM * pr
|
67
|
+
end
|
68
|
+
|
69
|
+
prArray = (pr.column_vectors[0].to_a)
|
70
|
+
|
71
|
+
prHash = Hash.new()
|
72
|
+
for i in 0 .. num_verts - 1
|
73
|
+
prHash[prArray[i]] = verts[i]
|
74
|
+
end
|
75
|
+
|
76
|
+
sortedHash = prHash.sort()
|
77
|
+
sortedHash = sortedHash.reverse()
|
78
|
+
|
79
|
+
if(thresh >= num_verts)
|
80
|
+
thresh = num_verts - 1
|
81
|
+
end
|
82
|
+
keywords = Array.new()
|
83
|
+
|
84
|
+
for i in 0 .. thresh
|
85
|
+
keywords << (sortedHash[i])[1]
|
86
|
+
end
|
87
|
+
|
88
|
+
keywords
|
89
|
+
end
|
90
|
+
|
91
|
+
attr_accessor :adjacencyGraph
|
92
|
+
|
93
|
+
end
|
94
|
+
|
data/lib/SummaUtils.rb
CHANGED
data/lib/summa.rb
CHANGED
@@ -7,16 +7,12 @@ require 'SummaData'
|
|
7
7
|
require 'SummaUtils'
|
8
8
|
|
9
9
|
module Summa
|
10
|
-
VERSION = '0.0.
|
10
|
+
VERSION = '0.0.10'
|
11
11
|
end
|
12
12
|
|
13
13
|
class String
|
14
|
-
def
|
15
|
-
|
16
|
-
end
|
17
|
-
|
18
|
-
def removePunctuation!
|
19
|
-
self.delete!(".,-:;()?!\"\'")
|
14
|
+
def summary
|
15
|
+
puts "10!"
|
20
16
|
end
|
21
17
|
end
|
22
18
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: summa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- HyLiter.org
|
@@ -42,9 +42,10 @@ files:
|
|
42
42
|
- README.rdoc
|
43
43
|
- Rakefile
|
44
44
|
- lib/summa.rb
|
45
|
-
- lib/FrequencyAnalyzer.rb
|
46
45
|
- lib/SummaUtils.rb
|
47
46
|
- lib/SummaData.rb
|
47
|
+
- lib/DocumentGraph.rb
|
48
|
+
- lib/FrequencyAnalyzer.rb
|
48
49
|
- script/console
|
49
50
|
- script/destroy
|
50
51
|
- script/generate
|