summa 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -4,9 +4,10 @@ PostInstall.txt
4
4
  README.rdoc
5
5
  Rakefile
6
6
  lib/summa.rb
7
- lib/FrequencyAnalyzer.rb
8
7
  lib/SummaUtils.rb
9
8
  lib/SummaData.rb
9
+ lib/DocumentGraph.rb
10
+ lib/FrequencyAnalyzer.rb
10
11
  script/console
11
12
  script/destroy
12
13
  script/generate
@@ -0,0 +1,94 @@
1
+ require 'rubygems'
2
+ require 'rgl/adjacency'
3
+ require 'rgl/dot'
4
+ require 'Matrix'
5
+
6
+ class CGDocumentGraph
7
+
8
+ def initialize(document, stopWordsDoc)
9
+ @document = document
10
+ @stopWords = stopWordsDoc
11
+ @adjacencyGraph = RGL::DirectedAdjacencyGraph.new()
12
+ @wordArray = []
13
+
14
+ #Write the non-stop words into a word array.
15
+ for i in 0 ... @document.docArray.length() - 1
16
+ word = @document.docArray[i]
17
+ if word != nil
18
+ word = CGWordOps.removePunctuation(word)
19
+ if !@stopWords.docArray.include?(word.downcase)
20
+ @wordArray << word
21
+ end
22
+ end
23
+ end
24
+
25
+ for i in 0 ... @wordArray.length() - 2
26
+ word1 = @wordArray[i]
27
+ word2 = @wordArray[i+1]
28
+
29
+ @adjacencyGraph.add_edge(word1, word2)
30
+ end
31
+ end
32
+
33
+ def buildKeywords(delta, thresh)
34
+ verts = @adjacencyGraph.vertices()
35
+ num_verts = @adjacencyGraph.size()
36
+ q_array = [];
37
+ for i in 0 .. (num_verts - 1)
38
+ w = verts[i]
39
+ n = @adjacencyGraph.out_degree(w)
40
+ if(n != 0)
41
+ q = Array.new(num_verts,0)
42
+ adjVerts = @adjacencyGraph.adjacent_vertices(w)
43
+ for j in 0 ... adjVerts.size() - 1
44
+ v = adjVerts[j]
45
+ index = verts.index(v)
46
+ if (index != nil)
47
+ q[index] = 1.0/n
48
+ end
49
+ end
50
+ q_array << q
51
+ else
52
+ q = Array.new(num_verts, 1.0/num_verts)
53
+ q_array << q
54
+ end
55
+ end
56
+
57
+ prM = Matrix.rows(q_array)
58
+
59
+ prArray = Array.new(num_verts, 1.0/num_verts);
60
+ pr = Matrix.columns([prArray])
61
+
62
+ deltaArray = Array.new(num_verts, (1.0-delta)/num_verts)
63
+ deltaM = Matrix.columns([deltaArray])
64
+
65
+ for i in 0 .. 10
66
+ pr = deltaM + delta * prM * pr
67
+ end
68
+
69
+ prArray = (pr.column_vectors[0].to_a)
70
+
71
+ prHash = Hash.new()
72
+ for i in 0 .. num_verts - 1
73
+ prHash[prArray[i]] = verts[i]
74
+ end
75
+
76
+ sortedHash = prHash.sort()
77
+ sortedHash = sortedHash.reverse()
78
+
79
+ if(thresh >= num_verts)
80
+ thresh = num_verts - 1
81
+ end
82
+ keywords = Array.new()
83
+
84
+ for i in 0 .. thresh
85
+ keywords << (sortedHash[i])[1]
86
+ end
87
+
88
+ keywords
89
+ end
90
+
91
+ attr_accessor :adjacencyGraph
92
+
93
+ end
94
+
data/lib/SummaUtils.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  class String
2
- def freqkeys
2
+
3
+ def removePunctuation!
4
+ self.delete!(".,-:;()?!\"\'*_")
5
+ end
6
+
7
+ def frequent
3
8
  @freqCount = {}
4
9
  @stopwords = SummaData.stopwords
5
10
  @mean = 0
data/lib/summa.rb CHANGED
@@ -7,16 +7,12 @@ require 'SummaData'
7
7
  require 'SummaUtils'
8
8
 
9
9
  module Summa
10
- VERSION = '0.0.9'
10
+ VERSION = '0.0.10'
11
11
  end
12
12
 
13
13
  class String
14
- def summarize
15
- puts "Testing version 0.0.9 "
16
- end
17
-
18
- def removePunctuation!
19
- self.delete!(".,-:;()?!\"\'")
14
+ def summary
15
+ puts "10!"
20
16
  end
21
17
  end
22
18
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: summa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - HyLiter.org
@@ -42,9 +42,10 @@ files:
42
42
  - README.rdoc
43
43
  - Rakefile
44
44
  - lib/summa.rb
45
- - lib/FrequencyAnalyzer.rb
46
45
  - lib/SummaUtils.rb
47
46
  - lib/SummaData.rb
47
+ - lib/DocumentGraph.rb
48
+ - lib/FrequencyAnalyzer.rb
48
49
  - script/console
49
50
  - script/destroy
50
51
  - script/generate