hits 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (7) hide show
  1. data/LICENSE +3 -0
  2. data/README +42 -0
  3. data/Rakefile +44 -0
  4. data/lib/hits/graph.rb +48 -0
  5. data/lib/hits/hits.rb +36 -0
  6. data/lib/hits.rb +2 -0
  7. metadata +61 -0
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ == hits
2
+
3
+ Put appropriate LICENSE for your project here.
data/README ADDED
@@ -0,0 +1,42 @@
1
+ == hits
2
+
3
+ A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities). See http://en.wikipedia.org/wiki/HITS_algorithm
4
+
5
+ require 'rubygems'
6
+ require 'hits'
7
+
8
+ # create a graph
9
+ graph = Hits::Graph.new
10
+
11
+ # add some edges to the graph with weights
12
+ graph.add_edge(:bsbodden, :objo, 1.0)
13
+ graph.add_edge(:bsbodden, :nusairat, 2.0)
14
+ graph.add_edge(:bsbodden, :looselytyped, 3.0)
15
+ graph.add_edge(:bsbodden, :neal4d, 1.5)
16
+ graph.add_edge(:objo, :nusairat, 2.5)
17
+ graph.add_edge(:objo, :bsbodden, 1.0)
18
+ graph.add_edge(:neal4d, :bsbodden, 1.15)
19
+ graph.add_edge(:nusairat, :bsbodden, 4.5)
20
+
21
+ # textual display of the graph
22
+ puts "graph ==> #{graph}"
23
+
24
+ # create a HITS for the graph
25
+ hits = Hits::Hits.new(graph)
26
+
27
+ # show the vertexes incoming and outgoing links (inlinks and outlinks)
28
+ graph.each_vertex { |v| puts "in links for #{v} ==> #{graph.in_links(v)}, out links for #{v} ==> #{graph.out_links(v)}"}
29
+
30
+ # compute HITS with the default number of iterations
31
+ hits.compute_hits
32
+
33
+ # print the top HUBS and AUTHORITIES
34
+ puts "=== TOP HUBS ==="
35
+ hits.top_hub_scores.each do |hub|
36
+ puts "hub #{hub}"
37
+ end
38
+
39
+ puts "=== TOP AUTHORITIES ==="
40
+ hits.top_authority_scores.each do |authority|
41
+ puts "authority #{authority}"
42
+ end
data/Rakefile ADDED
@@ -0,0 +1,44 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/rdoctask'
6
+ require 'rake/testtask'
7
+ require 'spec/rake/spectask'
8
+
9
+ spec = Gem::Specification.new do |s|
10
+ s.name = 'hits'
11
+ s.version = '0.0.1'
12
+ s.has_rdoc = true
13
+ s.extra_rdoc_files = ['README', 'LICENSE']
14
+ s.summary = "A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)"
15
+ s.description = s.summary
16
+ s.author = 'Brian Sam-Bodden'
17
+ s.email = 'bsbodden@integrallis.com'
18
+ s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
19
+ s.require_path = "lib"
20
+ s.bindir = "bin"
21
+ end
22
+
23
+ Rake::GemPackageTask.new(spec) do |p|
24
+ p.gem_spec = spec
25
+ p.need_tar = true
26
+ p.need_zip = true
27
+ end
28
+
29
+ Rake::RDocTask.new do |rdoc|
30
+ files =['README', 'LICENSE', 'lib/**/*.rb']
31
+ rdoc.rdoc_files.add(files)
32
+ rdoc.main = "README" # page to start on
33
+ rdoc.title = "hits Docs"
34
+ rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
35
+ rdoc.options << '--line-numbers'
36
+ end
37
+
38
+ Rake::TestTask.new do |t|
39
+ t.test_files = FileList['test/**/*.rb']
40
+ end
41
+
42
+ Spec::Rake::SpecTask.new do |t|
43
+ t.spec_files = FileList['spec/**/*.rb']
44
+ end
data/lib/hits/graph.rb ADDED
@@ -0,0 +1,48 @@
1
+ require 'rgl/dot'
2
+ require 'rgl/adjacency'
3
+ require 'rgl/bidirectional'
4
+
5
+ module Hits
6
+
7
+ class Graph
8
+ attr_reader :graph
9
+
10
+ def initialize
11
+ @graph = RGL::DirectedAdjacencyGraph.new
12
+ @in_links = {}
13
+ @edge_weights = {}
14
+ end
15
+
16
+ def add_edge(from, to, weight = 1.0)
17
+ @graph.add_edge(from, to)
18
+ @in_links[to] ||= []
19
+ @in_links[to] << from unless @in_links[to].include? from
20
+ @edge_weights[[to, from]] = weight
21
+ end
22
+
23
+ def in_links(vertex)
24
+ @in_links[vertex]
25
+ end
26
+
27
+ def out_links(vertex)
28
+ @graph.adjacent_vertices(vertex)
29
+ end
30
+
31
+ def each_vertex(&b)
32
+ @graph.each_vertex(&b)
33
+ end
34
+
35
+ def weight(to, from)
36
+ @edge_weights[[to, from]]
37
+ end
38
+
39
+ def weight=(to, from, weight)
40
+ @edge_weights[[to, from]] = weight if @edge_weights[[to, from]]
41
+ end
42
+
43
+ def to_s
44
+ @graph.edges.to_a.to_s
45
+ end
46
+
47
+ end
48
+ end
data/lib/hits/hits.rb ADDED
@@ -0,0 +1,36 @@
1
+ module Hits
2
+
3
+ class Hits
4
+
5
+ def initialize(graph)
6
+ @graph = graph
7
+ @hub_scores = {}
8
+ @authority_scores = {}
9
+ @graph.each_vertex do |vertex|
10
+ @hub_scores[vertex] = 1.0
11
+ @authority_scores[vertex] = 1.0
12
+ end
13
+ end
14
+
15
+ def compute_hits(iterations = 25)
16
+ (1..iterations).each do
17
+ @graph.each_vertex do |vertex|
18
+ authority_score = @graph.in_links(vertex).inject(0.0) { |sum, vertex| sum + @hub_scores[vertex] } if @graph.in_links(vertex)
19
+ hub_score = @graph.out_links(vertex).inject(0.0) { |sum, vertex| sum + @authority_scores[vertex] } if @graph.out_links(vertex)
20
+ @authority_scores[vertex] = authority_score || 0.0
21
+ @hub_scores[vertex] = hub_score || 0.0
22
+ end
23
+ end
24
+ end
25
+
26
+ def top_hub_scores(how_many=5)
27
+ @hub_scores.sort_by { |k,v| v }.collect { |v| v[0] }.reverse.first(how_many)
28
+ end
29
+
30
+ def top_authority_scores(how_many=5)
31
+ @authority_scores.sort_by { |k,v| v }.collect { |v| v[0] }.reverse.first(how_many)
32
+ end
33
+
34
+ end
35
+
36
+ end
data/lib/hits.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'hits/hits'
2
+ require 'hits/graph'
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hits
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Sam-Bodden
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-08 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)
17
+ email: bsbodden@integrallis.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ - LICENSE
25
+ files:
26
+ - LICENSE
27
+ - README
28
+ - Rakefile
29
+ - lib/hits/graph.rb
30
+ - lib/hits/hits.rb
31
+ - lib/hits.rb
32
+ has_rdoc: true
33
+ homepage:
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options: []
38
+
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements: []
54
+
55
+ rubyforge_project:
56
+ rubygems_version: 1.3.5
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)
60
+ test_files: []
61
+