hits 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (7) hide show
  1. data/LICENSE +3 -0
  2. data/README +42 -0
  3. data/Rakefile +44 -0
  4. data/lib/hits/graph.rb +48 -0
  5. data/lib/hits/hits.rb +36 -0
  6. data/lib/hits.rb +2 -0
  7. metadata +61 -0
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ == hits
2
+
3
+ Put appropriate LICENSE for your project here.
data/README ADDED
@@ -0,0 +1,42 @@
1
+ == hits
2
+
3
+ A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities). See http://en.wikipedia.org/wiki/HITS_algorithm
4
+
5
+ require 'rubygems'
6
+ require 'hits'
7
+
8
+ # create a graph
9
+ graph = Hits::Graph.new
10
+
11
+ # add some edges to the graph with weights
12
+ graph.add_edge(:bsbodden, :objo, 1.0)
13
+ graph.add_edge(:bsbodden, :nusairat, 2.0)
14
+ graph.add_edge(:bsbodden, :looselytyped, 3.0)
15
+ graph.add_edge(:bsbodden, :neal4d, 1.5)
16
+ graph.add_edge(:objo, :nusairat, 2.5)
17
+ graph.add_edge(:objo, :bsbodden, 1.0)
18
+ graph.add_edge(:neal4d, :bsbodden, 1.15)
19
+ graph.add_edge(:nusairat, :bsbodden, 4.5)
20
+
21
+ # textual display of the graph
22
+ puts "graph ==> #{graph}"
23
+
24
+ # create a HITS for the graph
25
+ hits = Hits::Hits.new(graph)
26
+
27
+ # show the vertexes incoming and outgoing links (inlinks and outlinks)
28
+ graph.each_vertex { |v| puts "in links for #{v} ==> #{graph.in_links(v)}, out links for #{v} ==> #{graph.out_links(v)}"}
29
+
30
+ # compute HITS with the default number of iterations
31
+ hits.compute_hits
32
+
33
+ # print the top HUBS and AUTHORITIES
34
+ puts "=== TOP HUBS ==="
35
+ hits.top_hub_scores.each do |hub|
36
+ puts "hub #{hub}"
37
+ end
38
+
39
+ puts "=== TOP AUTHORITIES ==="
40
+ hits.top_authority_scores.each do |authority|
41
+ puts "authority #{authority}"
42
+ end
data/Rakefile ADDED
@@ -0,0 +1,44 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/rdoctask'
6
+ require 'rake/testtask'
7
+ require 'spec/rake/spectask'
8
+
9
+ spec = Gem::Specification.new do |s|
10
+ s.name = 'hits'
11
+ s.version = '0.0.1'
12
+ s.has_rdoc = true
13
+ s.extra_rdoc_files = ['README', 'LICENSE']
14
+ s.summary = "A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)"
15
+ s.description = s.summary
16
+ s.author = 'Brian Sam-Bodden'
17
+ s.email = 'bsbodden@integrallis.com'
18
+ s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
19
+ s.require_path = "lib"
20
+ s.bindir = "bin"
21
+ end
22
+
23
+ Rake::GemPackageTask.new(spec) do |p|
24
+ p.gem_spec = spec
25
+ p.need_tar = true
26
+ p.need_zip = true
27
+ end
28
+
29
+ Rake::RDocTask.new do |rdoc|
30
+ files =['README', 'LICENSE', 'lib/**/*.rb']
31
+ rdoc.rdoc_files.add(files)
32
+ rdoc.main = "README" # page to start on
33
+ rdoc.title = "hits Docs"
34
+ rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
35
+ rdoc.options << '--line-numbers'
36
+ end
37
+
38
+ Rake::TestTask.new do |t|
39
+ t.test_files = FileList['test/**/*.rb']
40
+ end
41
+
42
+ Spec::Rake::SpecTask.new do |t|
43
+ t.spec_files = FileList['spec/**/*.rb']
44
+ end
data/lib/hits/graph.rb ADDED
@@ -0,0 +1,48 @@
1
+ require 'rgl/dot'
2
+ require 'rgl/adjacency'
3
+ require 'rgl/bidirectional'
4
+
5
+ module Hits
6
+
7
+ class Graph
8
+ attr_reader :graph
9
+
10
+ def initialize
11
+ @graph = RGL::DirectedAdjacencyGraph.new
12
+ @in_links = {}
13
+ @edge_weights = {}
14
+ end
15
+
16
+ def add_edge(from, to, weight = 1.0)
17
+ @graph.add_edge(from, to)
18
+ @in_links[to] ||= []
19
+ @in_links[to] << from unless @in_links[to].include? from
20
+ @edge_weights[[to, from]] = weight
21
+ end
22
+
23
+ def in_links(vertex)
24
+ @in_links[vertex]
25
+ end
26
+
27
+ def out_links(vertex)
28
+ @graph.adjacent_vertices(vertex)
29
+ end
30
+
31
+ def each_vertex(&b)
32
+ @graph.each_vertex(&b)
33
+ end
34
+
35
+ def weight(to, from)
36
+ @edge_weights[[to, from]]
37
+ end
38
+
39
+ def weight=(to, from, weight)
40
+ @edge_weights[[to, from]] = weight if @edge_weights[[to, from]]
41
+ end
42
+
43
+ def to_s
44
+ @graph.edges.to_a.to_s
45
+ end
46
+
47
+ end
48
+ end
data/lib/hits/hits.rb ADDED
@@ -0,0 +1,36 @@
1
+ module Hits
2
+
3
+ class Hits
4
+
5
+ def initialize(graph)
6
+ @graph = graph
7
+ @hub_scores = {}
8
+ @authority_scores = {}
9
+ @graph.each_vertex do |vertex|
10
+ @hub_scores[vertex] = 1.0
11
+ @authority_scores[vertex] = 1.0
12
+ end
13
+ end
14
+
15
+ def compute_hits(iterations = 25)
16
+ (1..iterations).each do
17
+ @graph.each_vertex do |vertex|
18
+ authority_score = @graph.in_links(vertex).inject(0.0) { |sum, vertex| sum + @hub_scores[vertex] } if @graph.in_links(vertex)
19
+ hub_score = @graph.out_links(vertex).inject(0.0) { |sum, vertex| sum + @authority_scores[vertex] } if @graph.out_links(vertex)
20
+ @authority_scores[vertex] = authority_score || 0.0
21
+ @hub_scores[vertex] = hub_score || 0.0
22
+ end
23
+ end
24
+ end
25
+
26
+ def top_hub_scores(how_many=5)
27
+ @hub_scores.sort_by { |k,v| v }.collect { |v| v[0] }.reverse.first(how_many)
28
+ end
29
+
30
+ def top_authority_scores(how_many=5)
31
+ @authority_scores.sort_by { |k,v| v }.collect { |v| v[0] }.reverse.first(how_many)
32
+ end
33
+
34
+ end
35
+
36
+ end
data/lib/hits.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'hits/hits'
2
+ require 'hits/graph'
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hits
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Sam-Bodden
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-08 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)
17
+ email: bsbodden@integrallis.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ - LICENSE
25
+ files:
26
+ - LICENSE
27
+ - README
28
+ - Rakefile
29
+ - lib/hits/graph.rb
30
+ - lib/hits/hits.rb
31
+ - lib/hits.rb
32
+ has_rdoc: true
33
+ homepage:
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options: []
38
+
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements: []
54
+
55
+ rubyforge_project:
56
+ rubygems_version: 1.3.5
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: A poor man's implementation of Jon Kleinberg's Hyperlink-Induced Topic Search (HITS) (also known as Hubs and authorities)
60
+ test_files: []
61
+