hits 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +3 -0
- data/README +42 -0
- data/Rakefile +44 -0
- data/lib/hits/graph.rb +48 -0
- data/lib/hits/hits.rb +36 -0
- data/lib/hits.rb +2 -0
- metadata +61 -0
data/LICENSE
ADDED
data/README
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
== hits
|
2
|
+
|
3
|
+
A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities). See http://en.wikipedia.org/wiki/HITS_algorithm
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hits'
|
7
|
+
|
8
|
+
# create a graph
|
9
|
+
graph = Hits::Graph.new
|
10
|
+
|
11
|
+
# add some edges to the graph with weights
|
12
|
+
graph.add_edge(:bsbodden, :objo, 1.0)
|
13
|
+
graph.add_edge(:bsbodden, :nusairat, 2.0)
|
14
|
+
graph.add_edge(:bsbodden, :looselytyped, 3.0)
|
15
|
+
graph.add_edge(:bsbodden, :neal4d, 1.5)
|
16
|
+
graph.add_edge(:objo, :nusairat, 2.5)
|
17
|
+
graph.add_edge(:objo, :bsbodden, 1.0)
|
18
|
+
graph.add_edge(:neal4d, :bsbodden, 1.15)
|
19
|
+
graph.add_edge(:nusairat, :bsbodden, 4.5)
|
20
|
+
|
21
|
+
# textual display of the graph
|
22
|
+
puts "graph ==> #{graph}"
|
23
|
+
|
24
|
+
# create a HITS for the graph
|
25
|
+
hits = Hits::Hits.new(graph)
|
26
|
+
|
27
|
+
# show the vertexes incoming and outgoing links (inlinks and outlinks)
|
28
|
+
graph.each_vertex { |v| puts "in links for #{v} ==> #{graph.in_links(v)}, out links for #{v} ==> #{graph.out_links(v)}"}
|
29
|
+
|
30
|
+
# compute HITS with the default number of iterations
|
31
|
+
hits.compute_hits
|
32
|
+
|
33
|
+
# print the top HUBS and AUTHORITIES
|
34
|
+
puts "=== TOP HUBS ==="
|
35
|
+
hits.top_hub_scores.each do |hub|
|
36
|
+
puts "hub #{hub}"
|
37
|
+
end
|
38
|
+
|
39
|
+
puts "=== TOP AUTHORITIES ==="
|
40
|
+
hits.top_authority_scores.each do |authority|
|
41
|
+
puts "authority #{authority}"
|
42
|
+
end
|
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/testtask'
|
7
|
+
require 'spec/rake/spectask'
|
8
|
+
|
9
|
+
spec = Gem::Specification.new do |s|
|
10
|
+
s.name = 'hits'
|
11
|
+
s.version = '0.0.1'
|
12
|
+
s.has_rdoc = true
|
13
|
+
s.extra_rdoc_files = ['README', 'LICENSE']
|
14
|
+
s.summary = "A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)"
|
15
|
+
s.description = s.summary
|
16
|
+
s.author = 'Brian Sam-Bodden'
|
17
|
+
s.email = 'bsbodden@integrallis.com'
|
18
|
+
s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
|
19
|
+
s.require_path = "lib"
|
20
|
+
s.bindir = "bin"
|
21
|
+
end
|
22
|
+
|
23
|
+
Rake::GemPackageTask.new(spec) do |p|
|
24
|
+
p.gem_spec = spec
|
25
|
+
p.need_tar = true
|
26
|
+
p.need_zip = true
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::RDocTask.new do |rdoc|
|
30
|
+
files =['README', 'LICENSE', 'lib/**/*.rb']
|
31
|
+
rdoc.rdoc_files.add(files)
|
32
|
+
rdoc.main = "README" # page to start on
|
33
|
+
rdoc.title = "hits Docs"
|
34
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
35
|
+
rdoc.options << '--line-numbers'
|
36
|
+
end
|
37
|
+
|
38
|
+
Rake::TestTask.new do |t|
|
39
|
+
t.test_files = FileList['test/**/*.rb']
|
40
|
+
end
|
41
|
+
|
42
|
+
Spec::Rake::SpecTask.new do |t|
|
43
|
+
t.spec_files = FileList['spec/**/*.rb']
|
44
|
+
end
|
data/lib/hits/graph.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rgl/dot'
|
2
|
+
require 'rgl/adjacency'
|
3
|
+
require 'rgl/bidirectional'
|
4
|
+
|
5
|
+
module Hits
|
6
|
+
|
7
|
+
class Graph
|
8
|
+
attr_reader :graph
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@graph = RGL::DirectedAdjacencyGraph.new
|
12
|
+
@in_links = {}
|
13
|
+
@edge_weights = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_edge(from, to, weight = 1.0)
|
17
|
+
@graph.add_edge(from, to)
|
18
|
+
@in_links[to] ||= []
|
19
|
+
@in_links[to] << from unless @in_links[to].include? from
|
20
|
+
@edge_weights[[to, from]] = weight
|
21
|
+
end
|
22
|
+
|
23
|
+
def in_links(vertex)
|
24
|
+
@in_links[vertex]
|
25
|
+
end
|
26
|
+
|
27
|
+
def out_links(vertex)
|
28
|
+
@graph.adjacent_vertices(vertex)
|
29
|
+
end
|
30
|
+
|
31
|
+
def each_vertex(&b)
|
32
|
+
@graph.each_vertex(&b)
|
33
|
+
end
|
34
|
+
|
35
|
+
def weight(to, from)
|
36
|
+
@edge_weights[[to, from]]
|
37
|
+
end
|
38
|
+
|
39
|
+
def weight=(to, from, weight)
|
40
|
+
@edge_weights[[to, from]] = weight if @edge_weights[[to, from]]
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_s
|
44
|
+
@graph.edges.to_a.to_s
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
data/lib/hits/hits.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
module Hits
|
2
|
+
|
3
|
+
class Hits
|
4
|
+
|
5
|
+
def initialize(graph)
|
6
|
+
@graph = graph
|
7
|
+
@hub_scores = {}
|
8
|
+
@authority_scores = {}
|
9
|
+
@graph.each_vertex do |vertex|
|
10
|
+
@hub_scores[vertex] = 1.0
|
11
|
+
@authority_scores[vertex] = 1.0
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def compute_hits(iterations = 25)
|
16
|
+
(1..iterations).each do
|
17
|
+
@graph.each_vertex do |vertex|
|
18
|
+
authority_score = @graph.in_links(vertex).inject(0.0) { |sum, vertex| sum + @hub_scores[vertex] } if @graph.in_links(vertex)
|
19
|
+
hub_score = @graph.out_links(vertex).inject(0.0) { |sum, vertex| sum + @authority_scores[vertex] } if @graph.out_links(vertex)
|
20
|
+
@authority_scores[vertex] = authority_score || 0.0
|
21
|
+
@hub_scores[vertex] = hub_score || 0.0
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def top_hub_scores(how_many=5)
|
27
|
+
@hub_scores.sort_by { |k,v| v }.collect { |v| v[0] }.reverse.first(how_many)
|
28
|
+
end
|
29
|
+
|
30
|
+
def top_authority_scores(how_many=5)
|
31
|
+
@authority_scores.sort_by { |k,v| v }.collect { |v| v[0] }.reverse.first(how_many)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/lib/hits.rb
ADDED
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hits
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brian Sam-Bodden
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-08 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)
|
17
|
+
email: bsbodden@integrallis.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
- LICENSE
|
25
|
+
files:
|
26
|
+
- LICENSE
|
27
|
+
- README
|
28
|
+
- Rakefile
|
29
|
+
- lib/hits/graph.rb
|
30
|
+
- lib/hits/hits.rb
|
31
|
+
- lib/hits.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage:
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.3.5
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)
|
60
|
+
test_files: []
|
61
|
+
|