summa 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -4,9 +4,10 @@ PostInstall.txt
4
4
  README.rdoc
5
5
  Rakefile
6
6
  lib/summa.rb
7
- lib/FrequencyAnalyzer.rb
8
7
  lib/SummaUtils.rb
9
8
  lib/SummaData.rb
9
+ lib/DocumentGraph.rb
10
+ lib/FrequencyAnalyzer.rb
10
11
  script/console
11
12
  script/destroy
12
13
  script/generate
@@ -0,0 +1,94 @@
1
+ require 'rubygems'
2
+ require 'rgl/adjacency'
3
+ require 'rgl/dot'
4
+ require 'Matrix'
5
+
6
+ class CGDocumentGraph
7
+
8
+ def initialize(document, stopWordsDoc)
9
+ @document = document
10
+ @stopWords = stopWordsDoc
11
+ @adjacencyGraph = RGL::DirectedAdjacencyGraph.new()
12
+ @wordArray = []
13
+
14
+ #Write the non-stop words into a word array.
15
+ for i in 0 ... @document.docArray.length() - 1
16
+ word = @document.docArray[i]
17
+ if word != nil
18
+ word = CGWordOps.removePunctuation(word)
19
+ if !@stopWords.docArray.include?(word.downcase)
20
+ @wordArray << word
21
+ end
22
+ end
23
+ end
24
+
25
+ for i in 0 ... @wordArray.length() - 2
26
+ word1 = @wordArray[i]
27
+ word2 = @wordArray[i+1]
28
+
29
+ @adjacencyGraph.add_edge(word1, word2)
30
+ end
31
+ end
32
+
33
+ def buildKeywords(delta, thresh)
34
+ verts = @adjacencyGraph.vertices()
35
+ num_verts = @adjacencyGraph.size()
36
+ q_array = [];
37
+ for i in 0 .. (num_verts - 1)
38
+ w = verts[i]
39
+ n = @adjacencyGraph.out_degree(w)
40
+ if(n != 0)
41
+ q = Array.new(num_verts,0)
42
+ adjVerts = @adjacencyGraph.adjacent_vertices(w)
43
+ for j in 0 ... adjVerts.size() - 1
44
+ v = adjVerts[j]
45
+ index = verts.index(v)
46
+ if (index != nil)
47
+ q[index] = 1.0/n
48
+ end
49
+ end
50
+ q_array << q
51
+ else
52
+ q = Array.new(num_verts, 1.0/num_verts)
53
+ q_array << q
54
+ end
55
+ end
56
+
57
+ prM = Matrix.rows(q_array)
58
+
59
+ prArray = Array.new(num_verts, 1.0/num_verts);
60
+ pr = Matrix.columns([prArray])
61
+
62
+ deltaArray = Array.new(num_verts, (1.0-delta)/num_verts)
63
+ deltaM = Matrix.columns([deltaArray])
64
+
65
+ for i in 0 .. 10
66
+ pr = deltaM + delta * prM * pr
67
+ end
68
+
69
+ prArray = (pr.column_vectors[0].to_a)
70
+
71
+ prHash = Hash.new()
72
+ for i in 0 .. num_verts - 1
73
+ prHash[prArray[i]] = verts[i]
74
+ end
75
+
76
+ sortedHash = prHash.sort()
77
+ sortedHash = sortedHash.reverse()
78
+
79
+ if(thresh >= num_verts)
80
+ thresh = num_verts - 1
81
+ end
82
+ keywords = Array.new()
83
+
84
+ for i in 0 .. thresh
85
+ keywords << (sortedHash[i])[1]
86
+ end
87
+
88
+ keywords
89
+ end
90
+
91
+ attr_accessor :adjacencyGraph
92
+
93
+ end
94
+
data/lib/SummaUtils.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  class String
2
- def freqkeys
2
+
3
+ def removePunctuation!
4
+ self.delete!(".,-:;()?!\"\'*_")
5
+ end
6
+
7
+ def frequent
3
8
  @freqCount = {}
4
9
  @stopwords = SummaData.stopwords
5
10
  @mean = 0
data/lib/summa.rb CHANGED
@@ -7,16 +7,12 @@ require 'SummaData'
7
7
  require 'SummaUtils'
8
8
 
9
9
  module Summa
10
- VERSION = '0.0.9'
10
+ VERSION = '0.0.10'
11
11
  end
12
12
 
13
13
  class String
14
- def summarize
15
- puts "Testing version 0.0.9 "
16
- end
17
-
18
- def removePunctuation!
19
- self.delete!(".,-:;()?!\"\'")
14
+ def summary
15
+ puts "10!"
20
16
  end
21
17
  end
22
18
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: summa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - HyLiter.org
@@ -42,9 +42,10 @@ files:
42
42
  - README.rdoc
43
43
  - Rakefile
44
44
  - lib/summa.rb
45
- - lib/FrequencyAnalyzer.rb
46
45
  - lib/SummaUtils.rb
47
46
  - lib/SummaData.rb
47
+ - lib/DocumentGraph.rb
48
+ - lib/FrequencyAnalyzer.rb
48
49
  - script/console
49
50
  - script/destroy
50
51
  - script/generate