rankable_graph 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +26 -0
- data/LICENSE +20 -0
- data/README.rdoc +56 -0
- data/Rakefile +47 -0
- data/VERSION +1 -0
- data/benchmark.rb +30 -0
- data/ext/Makefile +181 -0
- data/ext/extconf.rb +8 -0
- data/ext/mkmf.log +5 -0
- data/ext/rankable_graph.c +226 -0
- data/rankable_graph.gemspec +62 -0
- data/spec/rankable_graph_spec.rb +117 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +80 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
22
|
+
ext/*.bundle
|
23
|
+
ext/*.o
|
24
|
+
lib/*.bundle
|
25
|
+
lib/*.o
|
26
|
+
**/*/*.gem
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Daniel Cadenas
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
= rankable_graph
|
2
|
+
|
3
|
+
A Ruby {PageRank}[http://en.wikipedia.org/wiki/PageRank] like implementation.
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
This gem is mostly writen in C with a pretty Ruby wrapper.
|
8
|
+
It's intended to be used for big but not huge graphs, as
|
9
|
+
those are better processed with a map-reduce distributed solution.
|
10
|
+
|
11
|
+
== Usage
|
12
|
+
|
13
|
+
rankable_graph = RankableGraph.new
|
14
|
+
|
15
|
+
#First we draw our directed graph using the link method which receives as parameters two identifiers.
|
16
|
+
#The only restriction for the identifiers is that they should be integers.
|
17
|
+
rankable_graph.link(1234, 4312)
|
18
|
+
rankable_graph.link(9876, 4312)
|
19
|
+
rankable_graph.link(4312, 9876)
|
20
|
+
rankable_graph.link(8888, 4312)
|
21
|
+
|
22
|
+
probability_of_following_a_link = 0.85 # The bigger the number, less probability we have to teleport to some random link
|
23
|
+
tolerance = 0.0001 # the smaller the number, the more exact the result will be but more CPU cycles will be needed
|
24
|
+
|
25
|
+
rankable_graph.rank(probability_of_following_a_link, tolerance) do |identifier, rank|
|
26
|
+
puts "Node #{identifier} rank is #{rank}"
|
27
|
+
end
|
28
|
+
|
29
|
+
Which outputs
|
30
|
+
|
31
|
+
Node 1234 rank is 0.0375000014901161
|
32
|
+
Node 4312 rank is 0.479941636323929
|
33
|
+
Node 9876 rank is 0.445058345794678
|
34
|
+
Node 8888 rank is 0.0375000014901161
|
35
|
+
|
36
|
+
This ranks represent the probabilities that a certain node will be visited.
|
37
|
+
For more examples please refer to the tests.
|
38
|
+
|
39
|
+
== Requirements
|
40
|
+
|
41
|
+
* Ruby 1.9
|
42
|
+
* {glib2}[http://library.gnome.org/devel/glib/2.22/] >= 2.22.2
|
43
|
+
|
44
|
+
== Note on Patches/Pull Requests
|
45
|
+
|
46
|
+
* Fork the project.
|
47
|
+
* Make your feature addition or bug fix.
|
48
|
+
* Add tests for it. This is important so I don't break it in a
|
49
|
+
future version unintentionally.
|
50
|
+
* Commit, do not mess with rakefile, version, or history.
|
51
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
52
|
+
* Send me a pull request. Bonus points for topic branches.
|
53
|
+
|
54
|
+
== Copyright
|
55
|
+
|
56
|
+
Copyright (c) 2009 {Cubox}[http://cuboxsa.com]. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "rankable_graph"
|
8
|
+
gem.summary = %Q{A Ruby Pagerank implementation}
|
9
|
+
gem.description = %Q{A Ruby Pagerank implementation}
|
10
|
+
gem.email = "dev@cuboxsa.com"
|
11
|
+
gem.homepage = "http://github.com/cubox/rankable_graph"
|
12
|
+
gem.authors = ["Daniel Cadenas"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
gem.extensions = ["ext/extconf.rb"]
|
15
|
+
gem.required_ruby_version = '>= 1.9'
|
16
|
+
gem.requirements << 'glib2, v2.22.2 or greater'
|
17
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
|
+
end
|
19
|
+
rescue LoadError
|
20
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'spec/rake/spectask'
|
24
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
25
|
+
spec.libs << 'lib' << 'spec'
|
26
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
27
|
+
end
|
28
|
+
|
29
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
30
|
+
spec.libs << 'lib' << 'spec'
|
31
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
32
|
+
spec.rcov = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :spec => :check_dependencies
|
36
|
+
|
37
|
+
task :default => :spec
|
38
|
+
|
39
|
+
require 'rake/rdoctask'
|
40
|
+
Rake::RDocTask.new do |rdoc|
|
41
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
42
|
+
|
43
|
+
rdoc.rdoc_dir = 'rdoc'
|
44
|
+
rdoc.title = "rankable_graph #{version}"
|
45
|
+
rdoc.rdoc_files.include('README*')
|
46
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
47
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/benchmark.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#require 'rubygems'
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'ext'))
|
3
|
+
|
4
|
+
require 'ext/rankable_graph'
|
5
|
+
|
6
|
+
require "benchmark"
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
n = 1000000
|
10
|
+
bmbm(12) do |test|
|
11
|
+
r = RankableGraph.new
|
12
|
+
srand(5)
|
13
|
+
(0..(n-1)).map do |i|
|
14
|
+
#each node has an average of 30 links
|
15
|
+
rand(60).times do
|
16
|
+
j = rand(n)
|
17
|
+
#first three nodes are more linked to than the rest
|
18
|
+
r.link(i, (j > 800000 ? rand(3) : j))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
test.report("c:") do
|
23
|
+
result = []
|
24
|
+
r.rank(0.85, 0.001){|key, val| result << [key, val]}
|
25
|
+
puts "7 first values are #{result[0..6].map{|(k,v)| "[#{k}]=#{"%.4f" % (v * 100)}, "}}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
data/ext/Makefile
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /usr/local/include/ruby19-1.9.1
|
8
|
+
hdrdir = /usr/local/include/ruby19-1.9.1
|
9
|
+
arch_hdrdir = /usr/local/include/ruby19-1.9.1/$(arch)
|
10
|
+
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
11
|
+
prefix = $(DESTDIR)/usr/local
|
12
|
+
exec_prefix = $(prefix)
|
13
|
+
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
14
|
+
sitehdrdir = $(rubyhdrdir)/site_ruby
|
15
|
+
rubyhdrdir = $(includedir)/$(RUBY_INSTALL_NAME)-$(ruby_version)
|
16
|
+
vendordir = $(libdir)/$(RUBY_INSTALL_NAME)/vendor_ruby
|
17
|
+
sitedir = $(libdir)/$(RUBY_INSTALL_NAME)/site_ruby
|
18
|
+
mandir = $(datarootdir)/man
|
19
|
+
localedir = $(datarootdir)/locale
|
20
|
+
libdir = $(exec_prefix)/lib
|
21
|
+
psdir = $(docdir)
|
22
|
+
pdfdir = $(docdir)
|
23
|
+
dvidir = $(docdir)
|
24
|
+
htmldir = $(docdir)
|
25
|
+
infodir = $(datarootdir)/info
|
26
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
27
|
+
oldincludedir = $(DESTDIR)/usr/include
|
28
|
+
includedir = $(prefix)/include
|
29
|
+
localstatedir = $(prefix)/var
|
30
|
+
sharedstatedir = $(prefix)/com
|
31
|
+
sysconfdir = $(prefix)/etc
|
32
|
+
datadir = $(datarootdir)
|
33
|
+
datarootdir = $(prefix)/share
|
34
|
+
libexecdir = $(exec_prefix)/libexec
|
35
|
+
sbindir = $(exec_prefix)/sbin
|
36
|
+
bindir = $(exec_prefix)/bin
|
37
|
+
rubylibdir = $(libdir)/$(ruby_install_name)/$(ruby_version)
|
38
|
+
archdir = $(rubylibdir)/$(arch)
|
39
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
40
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
41
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
42
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
43
|
+
|
44
|
+
CC = gcc
|
45
|
+
CXX = g++
|
46
|
+
LIBRUBY = $(LIBRUBY_SO)
|
47
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
48
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
49
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
50
|
+
OUTFLAG = -o
|
51
|
+
COUTFLAG = -o
|
52
|
+
|
53
|
+
RUBY_EXTCONF_H =
|
54
|
+
cflags = $(optflags) $(debugflags) $(warnflags)
|
55
|
+
optflags = -O2
|
56
|
+
debugflags = -g
|
57
|
+
warnflags = -Wall -Wno-parentheses
|
58
|
+
CFLAGS = -fno-common $(cflags) -fno-common -pipe -fno-common -I/opt/local/include/glib-2.0 -I/opt/local/lib/glib-2.0/include -I/opt/local/include
|
59
|
+
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
60
|
+
DEFS =
|
61
|
+
CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
|
62
|
+
CXXFLAGS = $(CFLAGS) $(cxxflags)
|
63
|
+
ldflags = -L. -L/usr/local/lib -L/opt/local/lib
|
64
|
+
dldflags =
|
65
|
+
archflag =
|
66
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
67
|
+
LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
|
68
|
+
LDSHAREDXX = $(LDSHARED)
|
69
|
+
AR = ar
|
70
|
+
EXEEXT =
|
71
|
+
|
72
|
+
RUBY_INSTALL_NAME = ruby19
|
73
|
+
RUBY_SO_NAME = ruby19
|
74
|
+
arch = i386-darwin10.2.0
|
75
|
+
sitearch = i386-darwin10.2.0
|
76
|
+
ruby_version = 1.9.1
|
77
|
+
ruby = /usr/local/bin/ruby19
|
78
|
+
RUBY = $(ruby)
|
79
|
+
RM = rm -f
|
80
|
+
RM_RF = $(RUBY) -run -e rm -- -rf
|
81
|
+
RMDIRS = $(RUBY) -run -e rmdir -- -p
|
82
|
+
MAKEDIRS = mkdir -p
|
83
|
+
INSTALL = /usr/bin/install -c
|
84
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
85
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
86
|
+
COPY = cp
|
87
|
+
|
88
|
+
#### End of system configuration section. ####
|
89
|
+
|
90
|
+
preload =
|
91
|
+
|
92
|
+
libpath = . $(libdir)
|
93
|
+
LIBPATH = -L. -L$(libdir)
|
94
|
+
DEFFILE =
|
95
|
+
|
96
|
+
CLEANFILES = mkmf.log
|
97
|
+
DISTCLEANFILES =
|
98
|
+
DISTCLEANDIRS =
|
99
|
+
|
100
|
+
extout =
|
101
|
+
extout_prefix =
|
102
|
+
target_prefix =
|
103
|
+
LOCAL_LIBS =
|
104
|
+
LIBS = $(LIBRUBYARG_SHARED) -lglib-2.0 -lintl -liconv -lpthread -ldl -lobjc
|
105
|
+
SRCS = rankable_graph.c
|
106
|
+
OBJS = rankable_graph.o
|
107
|
+
TARGET = rankable_graph
|
108
|
+
DLLIB = $(TARGET).bundle
|
109
|
+
EXTSTATIC =
|
110
|
+
STATIC_LIB =
|
111
|
+
|
112
|
+
BINDIR = $(bindir)
|
113
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
114
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
115
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
116
|
+
HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
|
117
|
+
ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
|
118
|
+
|
119
|
+
TARGET_SO = $(DLLIB)
|
120
|
+
CLEANLIBS = $(TARGET).bundle
|
121
|
+
CLEANOBJS = *.o *.bak
|
122
|
+
|
123
|
+
all: $(DLLIB)
|
124
|
+
static: $(STATIC_LIB)
|
125
|
+
|
126
|
+
clean-rb-default::
|
127
|
+
clean-rb::
|
128
|
+
clean-so::
|
129
|
+
clean: clean-so clean-rb-default clean-rb
|
130
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
131
|
+
|
132
|
+
distclean-rb-default::
|
133
|
+
distclean-rb::
|
134
|
+
distclean-so::
|
135
|
+
distclean: clean distclean-so distclean-rb-default distclean-rb
|
136
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
137
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
138
|
+
@-$(RMDIRS) $(DISTCLEANDIRS)
|
139
|
+
|
140
|
+
realclean: distclean
|
141
|
+
install: install-so install-rb
|
142
|
+
|
143
|
+
install-so: $(RUBYARCHDIR)
|
144
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
145
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
146
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
147
|
+
install-rb: pre-install-rb install-rb-default
|
148
|
+
install-rb-default: pre-install-rb-default
|
149
|
+
pre-install-rb: Makefile
|
150
|
+
pre-install-rb-default: Makefile
|
151
|
+
$(RUBYARCHDIR):
|
152
|
+
$(MAKEDIRS) $@
|
153
|
+
|
154
|
+
site-install: site-install-so site-install-rb
|
155
|
+
site-install-so: install-so
|
156
|
+
site-install-rb: install-rb
|
157
|
+
|
158
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
159
|
+
|
160
|
+
.cc.o:
|
161
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
162
|
+
|
163
|
+
.cxx.o:
|
164
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
165
|
+
|
166
|
+
.cpp.o:
|
167
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
168
|
+
|
169
|
+
.C.o:
|
170
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
171
|
+
|
172
|
+
.c.o:
|
173
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
174
|
+
|
175
|
+
$(DLLIB): $(OBJS) Makefile
|
176
|
+
@-$(RM) $(@)
|
177
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
$(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
|
data/ext/extconf.rb
ADDED
data/ext/mkmf.log
ADDED
@@ -0,0 +1,226 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <glib.h>
|
3
|
+
|
4
|
+
typedef struct {
|
5
|
+
GPtrArray* in_links;
|
6
|
+
GPtrArray* number_out_links;
|
7
|
+
gint current_available_index;
|
8
|
+
GHashTable* key_to_index;
|
9
|
+
GHashTable* index_to_key;
|
10
|
+
} RNStruct;
|
11
|
+
|
12
|
+
|
13
|
+
static void rankable_graph_free (RNStruct *p){
|
14
|
+
g_hash_table_destroy(p->index_to_key);
|
15
|
+
g_hash_table_destroy(p->key_to_index);
|
16
|
+
g_ptr_array_free(p->number_out_links, TRUE);
|
17
|
+
g_ptr_array_free(p->in_links, TRUE);
|
18
|
+
}
|
19
|
+
|
20
|
+
static void array_of_arrays_free_func (gpointer array){
|
21
|
+
g_array_free(array, TRUE);
|
22
|
+
}
|
23
|
+
|
24
|
+
static VALUE rankable_graph_allocate (VALUE klass){
|
25
|
+
RNStruct* rn;
|
26
|
+
rn = ALLOC(RNStruct);
|
27
|
+
rn->in_links = g_ptr_array_new_with_free_func(array_of_arrays_free_func);
|
28
|
+
rn->number_out_links = g_ptr_array_new_with_free_func(g_free);
|
29
|
+
rn->key_to_index = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);
|
30
|
+
rn->index_to_key = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);
|
31
|
+
|
32
|
+
return Data_Wrap_Struct(klass, 0, rankable_graph_free, rn);
|
33
|
+
}
|
34
|
+
|
35
|
+
static VALUE init(VALUE self){
|
36
|
+
RNStruct* rn;
|
37
|
+
Data_Get_Struct(self, RNStruct, rn);
|
38
|
+
rn->current_available_index = -1;
|
39
|
+
return self;
|
40
|
+
}
|
41
|
+
|
42
|
+
static void fill_empty_holes_in_in_links(RNStruct *rn){
|
43
|
+
GArray* in_links_array;
|
44
|
+
const gint in_links_size = rn->in_links->len, size = g_hash_table_size(rn->key_to_index);
|
45
|
+
gint i;
|
46
|
+
if(in_links_size < size){
|
47
|
+
for(i = 0; i < size - in_links_size; i++){
|
48
|
+
in_links_array = g_array_new(FALSE, FALSE, sizeof(gint));
|
49
|
+
g_ptr_array_add(rn->in_links, in_links_array);
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
static void fill_empty_holes_in_number_out_links(RNStruct *rn){
|
55
|
+
gint i, out_links_size = rn->number_out_links->len;
|
56
|
+
const gint size = g_hash_table_size(rn->key_to_index);
|
57
|
+
gint* zero;
|
58
|
+
if(out_links_size < size){
|
59
|
+
for(i = 0; i < size - out_links_size; i++){
|
60
|
+
zero = g_new(gint, 1);
|
61
|
+
*zero = 0;
|
62
|
+
g_ptr_array_add(rn->number_out_links, zero);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
static void update_in_links(RNStruct *rn, gint from, gint to){
|
68
|
+
fill_empty_holes_in_in_links(rn);
|
69
|
+
GArray* in_links_for_to = g_ptr_array_index(rn->in_links, to);
|
70
|
+
g_array_append_val(in_links_for_to, from);
|
71
|
+
}
|
72
|
+
|
73
|
+
static void update_number_out_links(RNStruct *rn, gint from){
|
74
|
+
fill_empty_holes_in_number_out_links(rn);
|
75
|
+
gint* current_value = (gint *)g_ptr_array_index(rn->number_out_links, from);
|
76
|
+
*current_value = (*current_value)++;
|
77
|
+
}
|
78
|
+
|
79
|
+
static void link_with_indices(RNStruct *rn, gint from, gint to){
|
80
|
+
update_in_links(rn, from, to);
|
81
|
+
update_number_out_links(rn, from);
|
82
|
+
}
|
83
|
+
|
84
|
+
static gint key_as_array_index(RNStruct *rn, VALUE key){
|
85
|
+
gpointer key_as_index_ptr;
|
86
|
+
gint key_as_int = FIX2INT(key);
|
87
|
+
|
88
|
+
if(!g_hash_table_lookup_extended(rn->key_to_index, GINT_TO_POINTER(key_as_int), NULL, &key_as_index_ptr)){
|
89
|
+
key_as_index_ptr = GINT_TO_POINTER(++rn->current_available_index);
|
90
|
+
g_hash_table_insert(rn->key_to_index, GINT_TO_POINTER(key_as_int), key_as_index_ptr);
|
91
|
+
g_hash_table_insert(rn->index_to_key, key_as_index_ptr, GINT_TO_POINTER(key_as_int));
|
92
|
+
}
|
93
|
+
|
94
|
+
return GPOINTER_TO_INT(key_as_index_ptr);
|
95
|
+
}
|
96
|
+
|
97
|
+
static VALUE link(VALUE self, VALUE from, VALUE to){
|
98
|
+
RNStruct* rn;
|
99
|
+
Data_Get_Struct(self, RNStruct, rn);
|
100
|
+
|
101
|
+
gint from_as_index = key_as_array_index(rn, from);
|
102
|
+
gint to_as_index = key_as_array_index(rn, to);
|
103
|
+
|
104
|
+
link_with_indices(rn, from_as_index, to_as_index);
|
105
|
+
return Qnil;
|
106
|
+
}
|
107
|
+
|
108
|
+
static gfloat calculate_change(gfloat *a, gfloat *b, gint size){
|
109
|
+
gint i;
|
110
|
+
gfloat acc = 0;
|
111
|
+
for(i = 0; i < size; i++){
|
112
|
+
acc += ABS(a[i] - b[i]);
|
113
|
+
}
|
114
|
+
return acc;
|
115
|
+
}
|
116
|
+
|
117
|
+
static GArray * calculate_dangling_nodes(RNStruct *rn){
|
118
|
+
GArray* dangling_nodes = g_array_new(FALSE, FALSE, sizeof(gint));
|
119
|
+
gint i;
|
120
|
+
gpointer int_as_pointer;
|
121
|
+
for(i = 0; i < rn->number_out_links->len; i++){
|
122
|
+
if(*(gint *)g_ptr_array_index(rn->number_out_links, i) == 0){
|
123
|
+
int_as_pointer = GINT_TO_POINTER(i);
|
124
|
+
g_array_append_val(dangling_nodes, int_as_pointer);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
return dangling_nodes;
|
128
|
+
}
|
129
|
+
|
130
|
+
static gfloat* step(gfloat s, gfloat t_over_size, gfloat *p, RNStruct *rn, GArray *dangling_nodes){
|
131
|
+
const gint size = g_hash_table_size(rn->key_to_index);
|
132
|
+
gint i, j;
|
133
|
+
gfloat inner_product = 0;
|
134
|
+
for(i = 0; i < dangling_nodes->len; i++){
|
135
|
+
inner_product += p[GPOINTER_TO_INT(g_array_index(dangling_nodes, gint, i))];
|
136
|
+
}
|
137
|
+
const gfloat inner_product_over_size = inner_product / (gfloat)size;
|
138
|
+
|
139
|
+
gfloat ksum, vsum = 0;
|
140
|
+
gint index;
|
141
|
+
gfloat* v = g_new0(gfloat, size);
|
142
|
+
GArray* in_links_for_i;
|
143
|
+
for(i = 0; i < size; i++){
|
144
|
+
ksum = 0;
|
145
|
+
in_links_for_i = (GArray *)g_ptr_array_index(rn->in_links, i);
|
146
|
+
for(j = 0; j < in_links_for_i->len; j++){
|
147
|
+
index = GPOINTER_TO_INT(g_array_index(in_links_for_i, gint, j));
|
148
|
+
ksum += p[index] / *((gint *)g_ptr_array_index(rn->number_out_links, index));
|
149
|
+
}
|
150
|
+
|
151
|
+
v[i] = s * (ksum + inner_product_over_size) + t_over_size;
|
152
|
+
vsum += v[i];
|
153
|
+
}
|
154
|
+
|
155
|
+
const gfloat inverse_of_vsum = 1 / vsum;
|
156
|
+
for(i = 0; i < size; i++){
|
157
|
+
v[i] *= inverse_of_vsum;
|
158
|
+
}
|
159
|
+
return v;
|
160
|
+
}
|
161
|
+
|
162
|
+
static VALUE rank(VALUE self, VALUE s, VALUE tolerance){
|
163
|
+
if(rb_block_given_p() == Qtrue){
|
164
|
+
RNStruct* rn;
|
165
|
+
Data_Get_Struct(self, RNStruct, rn);
|
166
|
+
|
167
|
+
const gint size = g_hash_table_size(rn->key_to_index);
|
168
|
+
const gfloat inverse_of_size = 1.0 / size;
|
169
|
+
const gfloat t_over_size = (1.0 - NUM2DBL(s)) / size;
|
170
|
+
|
171
|
+
g_assert_cmpuint(rn->in_links->len, ==, size);
|
172
|
+
g_assert_cmpuint(rn->number_out_links->len, ==, size);
|
173
|
+
GArray* dangling_nodes = calculate_dangling_nodes(rn);
|
174
|
+
gfloat* p = g_new(gfloat, size);
|
175
|
+
gint i;
|
176
|
+
for(i = 0; i < size; i++){
|
177
|
+
p[i] = inverse_of_size;
|
178
|
+
}
|
179
|
+
|
180
|
+
gfloat* new_p;
|
181
|
+
gfloat change = 2;
|
182
|
+
while(change > NUM2DBL(tolerance)){
|
183
|
+
new_p = step(NUM2DBL(s), t_over_size, p, rn, dangling_nodes);
|
184
|
+
change = calculate_change(p, new_p, size);
|
185
|
+
g_free(p);
|
186
|
+
p = new_p;
|
187
|
+
}
|
188
|
+
|
189
|
+
for(i = 0; i < size; i++){
|
190
|
+
rb_yield_values(2, INT2FIX(g_hash_table_lookup(rn->index_to_key, GINT_TO_POINTER(i))), rb_float_new(p[i]));
|
191
|
+
}
|
192
|
+
|
193
|
+
g_free(p);
|
194
|
+
g_array_free(dangling_nodes, TRUE);
|
195
|
+
}
|
196
|
+
return Qnil;
|
197
|
+
}
|
198
|
+
|
199
|
+
// Copy across state (used by clone and dup)
|
200
|
+
static VALUE rankable_graph_init_copy(VALUE copy, VALUE orig){
|
201
|
+
RNStruct* orig_rn;
|
202
|
+
RNStruct* copy_rn;
|
203
|
+
|
204
|
+
if (copy == orig) return copy;
|
205
|
+
|
206
|
+
if (TYPE(orig) != T_DATA || RDATA(orig)->dfree != (RUBY_DATA_FUNC)rankable_graph_free) {
|
207
|
+
rb_raise(rb_eTypeError, "wrong argument type");
|
208
|
+
}
|
209
|
+
|
210
|
+
Data_Get_Struct(orig, RNStruct, orig_rn);
|
211
|
+
Data_Get_Struct(copy, RNStruct, copy_rn);
|
212
|
+
MEMCPY(copy_rn, orig_rn, RNStruct, 1);
|
213
|
+
return copy;
|
214
|
+
}
|
215
|
+
|
216
|
+
static VALUE rb_cRankableGraph;
|
217
|
+
|
218
|
+
void Init_rankable_graph(){
|
219
|
+
rb_cRankableGraph = rb_define_class("RankableGraph", rb_cObject);
|
220
|
+
rb_define_alloc_func(rb_cRankableGraph, rankable_graph_allocate);
|
221
|
+
rb_define_method(rb_cRankableGraph, "initialize", init, 0);
|
222
|
+
rb_define_method(rb_cRankableGraph, "initialize_copy", rankable_graph_init_copy, 1);
|
223
|
+
rb_define_method(rb_cRankableGraph, "link", link, 2);
|
224
|
+
rb_define_method(rb_cRankableGraph, "rank", rank, 2);
|
225
|
+
}
|
226
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rankable_graph}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Daniel Cadenas"]
|
12
|
+
s.date = %q{2010-02-01}
|
13
|
+
s.description = %q{A Ruby Pagerank implementation}
|
14
|
+
s.email = %q{dev@cuboxsa.com}
|
15
|
+
s.extensions = ["ext/extconf.rb"]
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"LICENSE",
|
18
|
+
"README.rdoc"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
".document",
|
22
|
+
".gitignore",
|
23
|
+
"LICENSE",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"benchmark.rb",
|
28
|
+
"ext/Makefile",
|
29
|
+
"ext/extconf.rb",
|
30
|
+
"ext/mkmf.log",
|
31
|
+
"ext/rankable_graph.c",
|
32
|
+
"rankable_graph.gemspec",
|
33
|
+
"spec/rankable_graph_spec.rb",
|
34
|
+
"spec/spec.opts",
|
35
|
+
"spec/spec_helper.rb"
|
36
|
+
]
|
37
|
+
s.homepage = %q{http://github.com/cubox/rankable_graph}
|
38
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.required_ruby_version = Gem::Requirement.new(">= 1.9")
|
41
|
+
s.requirements = ["glib2, v2.22.2 or greater"]
|
42
|
+
s.rubygems_version = %q{1.3.5}
|
43
|
+
s.summary = %q{A Ruby Pagerank implementation}
|
44
|
+
s.test_files = [
|
45
|
+
"spec/rankable_graph_spec.rb",
|
46
|
+
"spec/spec_helper.rb"
|
47
|
+
]
|
48
|
+
|
49
|
+
if s.respond_to? :specification_version then
|
50
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
51
|
+
s.specification_version = 3
|
52
|
+
|
53
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
54
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
57
|
+
end
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
class Float
|
4
|
+
# 0.666666666 -> 66.7
|
5
|
+
def to_percentage
|
6
|
+
100 * (self * (10 ** 3)).round / (10 ** 3).to_f
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def assert_rank(rankable_graph, expected_rank)
|
11
|
+
rankable_graph.rank(0.85, 0.0001){|label, rank| rank.to_percentage.should == expected_rank[label]}
|
12
|
+
end
|
13
|
+
|
14
|
+
describe RankableGraph do
|
15
|
+
it "should return correct results when having a dangling node" do
|
16
|
+
rankable_graph = RankableGraph.new
|
17
|
+
#node 2 is a dangling node because it has no outbound links
|
18
|
+
rankable_graph.link(0, 2)
|
19
|
+
rankable_graph.link(1, 2)
|
20
|
+
|
21
|
+
expected_rank = {
|
22
|
+
0 => 21.3,
|
23
|
+
1 => 21.3,
|
24
|
+
2 => 57.4
|
25
|
+
}
|
26
|
+
|
27
|
+
assert_rank(rankable_graph, expected_rank)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should return correct results for a star graph" do
|
31
|
+
rankable_graph = RankableGraph.new
|
32
|
+
rankable_graph.link(0, 2)
|
33
|
+
rankable_graph.link(1, 2)
|
34
|
+
rankable_graph.link(2, 2)
|
35
|
+
|
36
|
+
expected_rank = {
|
37
|
+
0 => 5,
|
38
|
+
1 => 5,
|
39
|
+
2 => 90,
|
40
|
+
}
|
41
|
+
|
42
|
+
assert_rank(rankable_graph, expected_rank)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should be uniform for a circular graph" do
|
46
|
+
rankable_graph = RankableGraph.new
|
47
|
+
rankable_graph.link(0, 1)
|
48
|
+
rankable_graph.link(1, 2)
|
49
|
+
rankable_graph.link(2, 3)
|
50
|
+
rankable_graph.link(3, 4)
|
51
|
+
rankable_graph.link(4, 0)
|
52
|
+
|
53
|
+
expected_rank = {
|
54
|
+
0 => 20,
|
55
|
+
1 => 20,
|
56
|
+
2 => 20,
|
57
|
+
3 => 20,
|
58
|
+
4 => 20
|
59
|
+
}
|
60
|
+
|
61
|
+
assert_rank(rankable_graph, expected_rank)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should return correct results for a converging graph" do
|
65
|
+
rankable_graph = RankableGraph.new
|
66
|
+
rankable_graph.link(0, 1)
|
67
|
+
rankable_graph.link(0, 2)
|
68
|
+
rankable_graph.link(1, 2)
|
69
|
+
rankable_graph.link(2, 2)
|
70
|
+
|
71
|
+
expected_rank = {
|
72
|
+
0 => 5,
|
73
|
+
1 => 7.1,
|
74
|
+
2 => 87.9
|
75
|
+
}
|
76
|
+
|
77
|
+
assert_rank(rankable_graph, expected_rank)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should correctly reproduce the wikipedia example" do
|
81
|
+
#http://en.wikipedia.org/wiki/File:PageRanks-Example.svg
|
82
|
+
rankable_graph = RankableGraph.new
|
83
|
+
rankable_graph.link(1, 2)
|
84
|
+
rankable_graph.link(2, 1)
|
85
|
+
rankable_graph.link(3, 0)
|
86
|
+
rankable_graph.link(3, 1)
|
87
|
+
rankable_graph.link(4, 3)
|
88
|
+
rankable_graph.link(4, 1)
|
89
|
+
rankable_graph.link(4, 5)
|
90
|
+
rankable_graph.link(5, 4)
|
91
|
+
rankable_graph.link(5, 1)
|
92
|
+
rankable_graph.link(6, 1)
|
93
|
+
rankable_graph.link(6, 4)
|
94
|
+
rankable_graph.link(7, 1)
|
95
|
+
rankable_graph.link(7, 4)
|
96
|
+
rankable_graph.link(8, 1)
|
97
|
+
rankable_graph.link(8, 4)
|
98
|
+
rankable_graph.link(9, 4)
|
99
|
+
rankable_graph.link(10, 4)
|
100
|
+
|
101
|
+
expected_rank = {
|
102
|
+
0 => 3.3, #a
|
103
|
+
1 => 38.4, #b
|
104
|
+
2 => 34.3, #c
|
105
|
+
3 => 3.9, #d
|
106
|
+
4 => 8.1, #e
|
107
|
+
5 => 3.9, #f
|
108
|
+
6 => 1.6, #g
|
109
|
+
7 => 1.6, #h
|
110
|
+
8 => 1.6, #i
|
111
|
+
9 => 1.6, #j
|
112
|
+
10 => 1.6 #k
|
113
|
+
}
|
114
|
+
|
115
|
+
assert_rank(rankable_graph, expected_rank)
|
116
|
+
end
|
117
|
+
end
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext'))
|
4
|
+
require 'ext/rankable_graph'
|
5
|
+
require 'spec'
|
6
|
+
require 'spec/autorun'
|
7
|
+
|
8
|
+
Spec::Runner.configure do |config|
|
9
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rankable_graph
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Daniel Cadenas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-01 00:00:00 -02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rspec
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.9
|
24
|
+
version:
|
25
|
+
description: A Ruby Pagerank implementation
|
26
|
+
email: dev@cuboxsa.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions:
|
30
|
+
- ext/extconf.rb
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README.rdoc
|
34
|
+
files:
|
35
|
+
- .document
|
36
|
+
- .gitignore
|
37
|
+
- LICENSE
|
38
|
+
- README.rdoc
|
39
|
+
- Rakefile
|
40
|
+
- VERSION
|
41
|
+
- benchmark.rb
|
42
|
+
- ext/Makefile
|
43
|
+
- ext/extconf.rb
|
44
|
+
- ext/mkmf.log
|
45
|
+
- ext/rankable_graph.c
|
46
|
+
- rankable_graph.gemspec
|
47
|
+
- spec/rankable_graph_spec.rb
|
48
|
+
- spec/spec.opts
|
49
|
+
- spec/spec_helper.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/cubox/rankable_graph
|
52
|
+
licenses: []
|
53
|
+
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options:
|
56
|
+
- --charset=UTF-8
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: "1.9"
|
64
|
+
version:
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: "0"
|
70
|
+
version:
|
71
|
+
requirements:
|
72
|
+
- glib2, v2.22.2 or greater
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 1.3.5
|
75
|
+
signing_key:
|
76
|
+
specification_version: 3
|
77
|
+
summary: A Ruby Pagerank implementation
|
78
|
+
test_files:
|
79
|
+
- spec/rankable_graph_spec.rb
|
80
|
+
- spec/spec_helper.rb
|