hits 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -2
- data/README +14 -2
- data/Rakefile +1 -1
- data/lib/hits/graph.rb +12 -0
- data/lib/hits/hits.rb +29 -4
- metadata +2 -2
data/LICENSE
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
-
|
1
|
+
(The MIT License)
|
2
2
|
|
3
|
-
|
3
|
+
Copyright &169;2001-2008 Integrallis Software, LLC.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
CHANGED
@@ -12,7 +12,7 @@ graph = Hits::Graph.new
|
|
12
12
|
graph.add_edge(:bsbodden, :objo, 1.0)
|
13
13
|
graph.add_edge(:bsbodden, :nusairat, 2.0)
|
14
14
|
graph.add_edge(:bsbodden, :looselytyped, 3.0)
|
15
|
-
graph.add_edge(:bsbodden, :neal4d,
|
15
|
+
graph.add_edge(:bsbodden, :neal4d, 8.5)
|
16
16
|
graph.add_edge(:objo, :nusairat, 2.5)
|
17
17
|
graph.add_edge(:objo, :bsbodden, 1.0)
|
18
18
|
graph.add_edge(:neal4d, :bsbodden, 1.15)
|
@@ -20,12 +20,19 @@ graph.add_edge(:nusairat, :bsbodden, 4.5)
|
|
20
20
|
|
21
21
|
# textual display of the graph
|
22
22
|
puts "graph ==> #{graph}"
|
23
|
+
puts "graph max weight ==> #{graph.max_weight}"
|
24
|
+
puts "graph min weight ==> #{graph.min_weight}"
|
23
25
|
|
24
26
|
# create a HITS for the graph
|
25
27
|
hits = Hits::Hits.new(graph)
|
26
28
|
|
27
29
|
# show the vertexes incoming and outgoing links (inlinks and outlinks)
|
28
|
-
graph.each_vertex
|
30
|
+
graph.each_vertex do |vertex|
|
31
|
+
puts "=== In links for #{vertex} ==="
|
32
|
+
graph.in_links(vertex).each { |in_link| puts in_link }
|
33
|
+
puts "=== Out links for #{vertex} ==="
|
34
|
+
graph.out_links(vertex).each { |out_link| puts out_link }
|
35
|
+
end
|
29
36
|
|
30
37
|
# compute HITS with the default number of iterations
|
31
38
|
hits.compute_hits
|
@@ -40,3 +47,8 @@ puts "=== TOP AUTHORITIES ==="
|
|
40
47
|
hits.top_authority_scores.each do |authority|
|
41
48
|
puts "authority #{authority}"
|
42
49
|
end
|
50
|
+
|
51
|
+
# print all scores
|
52
|
+
graph.each_vertex do |vertex|
|
53
|
+
puts "vertex: #{vertex}, authority: #{hits.authority_scores[vertex]}, hub: #{hits.hub_scores[vertex]}"
|
54
|
+
end
|
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ require 'spec/rake/spectask'
|
|
8
8
|
|
9
9
|
spec = Gem::Specification.new do |s|
|
10
10
|
s.name = 'hits'
|
11
|
-
s.version = '0.0.
|
11
|
+
s.version = '0.0.2'
|
12
12
|
s.has_rdoc = true
|
13
13
|
s.extra_rdoc_files = ['README', 'LICENSE']
|
14
14
|
s.summary = "A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)"
|
data/lib/hits/graph.rb
CHANGED
@@ -39,6 +39,18 @@ module Hits
|
|
39
39
|
def weight=(to, from, weight)
|
40
40
|
@edge_weights[[to, from]] = weight if @edge_weights[[to, from]]
|
41
41
|
end
|
42
|
+
|
43
|
+
def max_weight
|
44
|
+
@edge_weights.values.max
|
45
|
+
end
|
46
|
+
|
47
|
+
def min_weight
|
48
|
+
@edge_weights.values.min
|
49
|
+
end
|
50
|
+
|
51
|
+
def weights
|
52
|
+
@edge_weights.values
|
53
|
+
end
|
42
54
|
|
43
55
|
def to_s
|
44
56
|
@graph.edges.to_a.to_s
|
data/lib/hits/hits.rb
CHANGED
@@ -2,8 +2,12 @@ module Hits
|
|
2
2
|
|
3
3
|
class Hits
|
4
4
|
|
5
|
-
|
5
|
+
attr_reader :authority_scores
|
6
|
+
attr_reader :hub_scores
|
7
|
+
|
8
|
+
def initialize(graph, use_weights = true)
|
6
9
|
@graph = graph
|
10
|
+
@use_weights = use_weights
|
7
11
|
@hub_scores = {}
|
8
12
|
@authority_scores = {}
|
9
13
|
@graph.each_vertex do |vertex|
|
@@ -18,17 +22,38 @@ module Hits
|
|
18
22
|
authority_score = @graph.in_links(vertex).inject(0.0) { |sum, vertex| sum + @hub_scores[vertex] } if @graph.in_links(vertex)
|
19
23
|
hub_score = @graph.out_links(vertex).inject(0.0) { |sum, vertex| sum + @authority_scores[vertex] } if @graph.out_links(vertex)
|
20
24
|
@authority_scores[vertex] = authority_score || 0.0
|
21
|
-
@hub_scores[vertex] = hub_score || 0.0
|
25
|
+
@hub_scores[vertex] = hub_score || 0.0
|
22
26
|
end
|
27
|
+
normalize_scores
|
23
28
|
end
|
29
|
+
apply_weighting if @use_weights
|
24
30
|
end
|
25
31
|
|
26
32
|
def top_hub_scores(how_many=5)
|
27
|
-
@hub_scores.sort_by { |k,v| v }.
|
33
|
+
@hub_scores.sort_by { |k,v| v }.map { |v| v[0] }.reverse.first(how_many)
|
28
34
|
end
|
29
35
|
|
30
36
|
def top_authority_scores(how_many=5)
|
31
|
-
@authority_scores.sort_by { |k,v| v }.
|
37
|
+
@authority_scores.sort_by { |k,v| v }.map { |v| v[0] }.reverse.first(how_many)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def normalize_scores
|
43
|
+
sum_of_squares_for_authorities = @authority_scores.inject(0.0) { |sum, element| sum + element[1]**2 }
|
44
|
+
sum_of_squares_for_hubs = @hub_scores.inject(0.0) { |sum, element| sum + element[1]**2 }
|
45
|
+
@authority_scores.each { |key, value| @authority_scores[key] = value / sum_of_squares_for_authorities }
|
46
|
+
@hub_scores.each { |key, value| @hub_scores[key] = value / sum_of_squares_for_hubs }
|
47
|
+
end
|
48
|
+
|
49
|
+
def apply_weighting
|
50
|
+
sum = @graph.weights.inject(0.0) { |sum, weight| sum + weight }
|
51
|
+
max = @graph.max_weight
|
52
|
+
min = @graph.min_weight
|
53
|
+
@graph.each_vertex do |vertex|
|
54
|
+
@authority_scores[vertex] = (@authority_scores[vertex] / sum) * (max - min) + min
|
55
|
+
@hub_scores[vertex] = (@hub_scores[vertex] / sum) * (max - min) + min
|
56
|
+
end
|
32
57
|
end
|
33
58
|
|
34
59
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Sam-Bodden
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-11-
|
12
|
+
date: 2009-11-15 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|