google_hash 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +47 -11
- data/VERSION +1 -1
- data/ext/benchmark.rb +42 -4
- data/ext/extconf.rb +62 -5
- data/ext/go.bat +2 -1
- data/ext/scale.rb +6 -0
- data/ext/template/google_hash.cpp.erb +261 -0
- data/ext/template/main.cpp.erb +19 -0
- data/results.txt +10 -20
- data/spec/spec.google_hash.rb +170 -0
- metadata +10 -9
- data/ext/main.cpp +0 -12
- data/ext/template/go.cpp +0 -114
- data/test/spec.go +0 -70
data/README
CHANGED
@@ -1,21 +1,57 @@
|
|
1
|
-
|
1
|
+
Pleased to announce the initial release of the "google_hash" gem.
|
2
2
|
|
3
|
-
|
3
|
+
Its goal. To boldly be faster than any hash hash before (cue star trek TNG theme).
|
4
4
|
|
5
|
-
a = GoogleHashSmall.new
|
6
|
-
a[3] = 'abc' # only accept integers for keys currently.
|
7
5
|
|
8
|
-
The
|
6
|
+
The goal is basically a better hash, either one that is faster or more space efficient than ruby's default. To attempt this we wrap the google sparse and dense hashes [1].
|
9
7
|
|
10
|
-
|
8
|
+
Speed results (populating 500000 integers):
|
11
9
|
|
12
|
-
|
10
|
+
1.9.1p376 (mingw):
|
13
11
|
|
12
|
+
Hash (Ruby default)
|
13
|
+
0.359375 (populate)
|
14
|
+
1.1875 (each)
|
14
15
|
|
15
|
-
|
16
|
+
GoogleHashDense
|
17
|
+
0.1875 (populate)
|
18
|
+
0.078125 (each)
|
16
19
|
|
17
|
-
|
20
|
+
GoogleHashSparse
|
21
|
+
0.53125 (populate)
|
22
|
+
0.078125 (each)
|
18
23
|
|
19
|
-
|
24
|
+
Usage:
|
20
25
|
|
21
|
-
|
26
|
+
a = GoogleHashDenseRubyToRuby.new # or GoogleHash.new
|
27
|
+
b = GoogleHashDenseLongToRuby.new # :int => Ruby
|
28
|
+
|
29
|
+
a[3] = 4
|
30
|
+
b[4] = 'abc'
|
31
|
+
b['abc'.hash] = 'some complex object'
|
32
|
+
|
33
|
+
a.each{|k, v| ... }
|
34
|
+
|
35
|
+
a.keys => Array
|
36
|
+
a.values => Array
|
37
|
+
|
38
|
+
Installation:
|
39
|
+
|
40
|
+
gem install google_hash (if on doze, you'll need the devkit installed)
|
41
|
+
|
42
|
+
Both classes are currently more space efficient than a hash, because they store keys as "native" ints, so the keys no longer affect GC time, as well as only use 4 bytes instead of 20 (or 8 instead of 40, on 64 bit). This should release stress on the GC, and both GoogleHashDense and GoogleHashSparse do this. In terms of total memory usage GoogleHashDense uses more, and is more speedy, GoogleHashSparse uses less, and is much more memory efficient (2 bits per entry, or so I'm told).
|
43
|
+
|
44
|
+
If you have a desired use case let me know and I might well be able to code it up for you.
|
45
|
+
|
46
|
+
ex: currently it uses longs internally instead of ints--if you want ints or chars or what not, let me know.
|
47
|
+
|
48
|
+
This is meant to be one more tool in the rubyists toolbelt when trying to optimize speed-wise, and plans to expand to more types, but at least with this release it has a #each method.
|
49
|
+
|
50
|
+
Note that sparse maps can be saved to disk--ask if you want/need this functionality.
|
51
|
+
|
52
|
+
Enjoy.
|
53
|
+
-r
|
54
|
+
|
55
|
+
[1] http://code.google.com/p/google-sparsehash
|
56
|
+
|
57
|
+
If you want to see the code/hack on it, run extconf.rb within the ext directory, to create the code it actually uses (from a template).
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/ext/benchmark.rb
CHANGED
@@ -1,9 +1,47 @@
|
|
1
1
|
require './google_hash'
|
2
2
|
require 'benchmark'
|
3
|
+
require 'hitimes'
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
puts Benchmark.realtime { subject.each{|k, v| }}.to_s + " (each)", ''
|
5
|
+
def measure
|
6
|
+
Hitimes::Interval.measure { yield }
|
7
|
+
end
|
8
8
|
|
9
|
+
def meas string
|
10
|
+
puts "% -23s" % string + measure { yield }.to_s
|
9
11
|
end
|
12
|
+
|
13
|
+
def go num
|
14
|
+
puts num
|
15
|
+
# get all existing
|
16
|
+
all = [Hash] + Object.constants.grep(/Goog/).reject{|n| n == :GoogleHash}.map{|n| eval n}
|
17
|
+
|
18
|
+
for name in all do
|
19
|
+
GC.start
|
20
|
+
subject = name.new
|
21
|
+
puts
|
22
|
+
puts name
|
23
|
+
|
24
|
+
subject = name.new
|
25
|
+
meas( "populate string ") { num.times {|n| subject['abc'] = 4 } } rescue nil
|
26
|
+
subject = name.new
|
27
|
+
meas( "populate symbol") { num.times {|n| subject[:abc] = 4} } rescue nil
|
28
|
+
|
29
|
+
meas( "populate int") { num.times {|n| subject[n] = 4}}
|
30
|
+
meas("each") { subject.each{|k, v| } }
|
31
|
+
|
32
|
+
begin
|
33
|
+
subject = name.new
|
34
|
+
subject[3] = 4
|
35
|
+
meas("lookup int") { num.times {|n| subject[3]}}
|
36
|
+
subject['abc'] = 3
|
37
|
+
subject[:abc] = 3
|
38
|
+
|
39
|
+
meas("lookup string") { num.times {|n| subject['abc']}}
|
40
|
+
meas( "lookup symbol" ) { num.times {|n| subject[:abc]}}
|
41
|
+
rescue
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
num = 200_000
|
47
|
+
go num if $0 ==__FILE__
|
data/ext/extconf.rb
CHANGED
@@ -2,7 +2,7 @@ require 'mkmf'
|
|
2
2
|
require 'erb'
|
3
3
|
require 'rubygems'
|
4
4
|
require 'sane'
|
5
|
-
|
5
|
+
puts 'got', xsystem("ls")
|
6
6
|
# build google's lib locally...
|
7
7
|
|
8
8
|
dir = Dir.pwd
|
@@ -23,11 +23,68 @@ end
|
|
23
23
|
|
24
24
|
# create our files...
|
25
25
|
# currently we're int only...hmm...
|
26
|
-
# ltodo 64 bit
|
26
|
+
# ltodo if I am using longs, this 31 needs to be a 63 on 64 bit machines...
|
27
|
+
# if I ever use longs :)
|
28
|
+
|
29
|
+
# my goal is...ruby friendly hashers
|
27
30
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
if OS.bits == 32
|
32
|
+
unreachable_int = 31
|
33
|
+
else
|
34
|
+
unreachable_int = 63
|
31
35
|
end
|
32
36
|
|
37
|
+
ruby_key = {:convert_keys_from_ruby => "", :convert_keys_to_ruby => "", :key_type => "VALUE", :unreachable_key => "current_instance"} # TODO NULL is false here?
|
38
|
+
long_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2LONG",
|
39
|
+
:convert_keys_to_ruby => "LONG2FIX", :key_type => "long", :unreachable_key => "1<<#{unreachable_int}"}
|
40
|
+
int_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2INT",
|
41
|
+
:convert_keys_to_ruby => "INT2FIX", :key_type => "int", :unreachable_key => "1<<#{unreachable_int}"}
|
42
|
+
|
43
|
+
|
44
|
+
ruby_value = {:value_type => "VALUE"}
|
45
|
+
long_value = {:assert_value_type => 'T_FIXNUM', :convert_values_from_ruby => "FIX2LONG",
|
46
|
+
:convert_values_to_ruby => "LONG2FIX", :value_type => "long"}
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
int_to_ruby = long_key.merge(ruby_value)
|
51
|
+
ruby_to_ruby = ruby_key.merge(ruby_value)
|
52
|
+
|
53
|
+
init_funcs = []
|
54
|
+
|
55
|
+
for options in [int_to_ruby, ruby_to_ruby] do
|
56
|
+
for type in ['sparse', 'dense'] do
|
57
|
+
raise unless options[:value_type] && options[:key_type]
|
58
|
+
|
59
|
+
# create local variables so that the template can look cleaner
|
60
|
+
unreachable_key = options[:unreachable_key]
|
61
|
+
convert_keys_from_ruby = options[:convert_keys_from_ruby]
|
62
|
+
convert_keys_to_ruby = options[:convert_keys_to_ruby]
|
63
|
+
key_type = options[:key_type]
|
64
|
+
value_type = options[:value_type]
|
65
|
+
english_key_type = options[:key_type] == 'VALUE' ? 'ruby' : options[:key_type]
|
66
|
+
english_value_type = options[:value_type] == 'VALUE' ? 'ruby' : options[:value_type]
|
67
|
+
|
68
|
+
assert_key_type = options[:assert_key_type]
|
69
|
+
convert_values_from_ruby = options[:convert_values_from_ruby]
|
70
|
+
convert_values_to_ruby = options[:convert_values_to_ruby]
|
71
|
+
|
72
|
+
|
73
|
+
if options[:key_type] == 'VALUE'
|
74
|
+
extra_hash_params = ", hashrb, eqrb" # use these methods for comparison
|
75
|
+
# ltodo is that the right hash -- is is type_t
|
76
|
+
end
|
77
|
+
|
78
|
+
template = ERB.new(File.read('template/google_hash.cpp.erb'))
|
79
|
+
descriptor = type + '_' + english_key_type + '_to_' + english_value_type;
|
80
|
+
File.write(descriptor + '.cpp', template.result(binding))
|
81
|
+
init_funcs << "init_" + descriptor
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# write our Init method
|
86
|
+
|
87
|
+
template = ERB.new(File.read('template/main.cpp.erb'))
|
88
|
+
File.write 'main.cpp', template.result(binding)
|
89
|
+
|
33
90
|
create_makefile('google_hash')
|
data/ext/go.bat
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
|
1
|
+
rm *.cpp
|
2
|
+
ruby extconf.rb && make clean && make && ruby ..\spec\spec.google_hash.rb
|
data/ext/scale.rb
ADDED
@@ -0,0 +1,261 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <google/<%= type %>_hash_map>
|
3
|
+
#include <ruby.h>
|
4
|
+
|
5
|
+
// much code stolen shamelessly from lourens' cb gem
|
6
|
+
|
7
|
+
using google::<%= type %>_hash_map; // namespace where class lives by default
|
8
|
+
using std::cout;
|
9
|
+
using std::endl;
|
10
|
+
<% if OS.linux? %>
|
11
|
+
#include <ext/hash_set>
|
12
|
+
<% end %>
|
13
|
+
using __gnu_cxx::hash; // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS
|
14
|
+
extern "C" {
|
15
|
+
|
16
|
+
// some helpers
|
17
|
+
|
18
|
+
struct eqstr
|
19
|
+
{
|
20
|
+
bool operator()(const char* s1, const char* s2) const
|
21
|
+
{
|
22
|
+
return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
|
23
|
+
}
|
24
|
+
};
|
25
|
+
|
26
|
+
static hash<const char*> H;
|
27
|
+
|
28
|
+
// hashing it is like hash<const char*>
|
29
|
+
|
30
|
+
static ID id_eql, id_hash;
|
31
|
+
|
32
|
+
struct eqrb
|
33
|
+
{
|
34
|
+
bool operator()(const VALUE s1, const VALUE s2) const
|
35
|
+
{
|
36
|
+
// speeds up populate int 18/11
|
37
|
+
// slows down string 21/22
|
38
|
+
// ltodo
|
39
|
+
if(s1 == s2) {
|
40
|
+
return true;
|
41
|
+
}
|
42
|
+
|
43
|
+
// this line from object.c's rb_eql
|
44
|
+
// lookup 0.278 -> 0.26
|
45
|
+
|
46
|
+
return RTEST(rb_funcall(s1, id_eql, 1, s2));
|
47
|
+
}
|
48
|
+
};
|
49
|
+
|
50
|
+
struct hashrb
|
51
|
+
{
|
52
|
+
size_t operator()(VALUE hash_me) const
|
53
|
+
{
|
54
|
+
// stolen from hash.c populate -> 0.64 0.625
|
55
|
+
|
56
|
+
// hmm
|
57
|
+
// use our own custom hash function for well known types
|
58
|
+
// to avoid a function call
|
59
|
+
// this speeds up ints 29/44
|
60
|
+
// and speeds up string 26/22
|
61
|
+
// though I suppose we could
|
62
|
+
|
63
|
+
switch (TYPE(hash_me)) {
|
64
|
+
// ltodo does this help?
|
65
|
+
// if so more types?
|
66
|
+
case T_FIXNUM:
|
67
|
+
case T_FLOAT:
|
68
|
+
case T_SYMBOL:
|
69
|
+
// ltodo
|
70
|
+
return hash_me;
|
71
|
+
<% unless RUBY_VERSION < '1.9' %>
|
72
|
+
case T_BIGNUM:
|
73
|
+
return LONG2FIX(((long*)(RBIGNUM_DIGITS(hash_me)))[0]); // its first digit...I'm thinkin'
|
74
|
+
<% end %>
|
75
|
+
case T_STRING:
|
76
|
+
//return H(StringValueCStr(hash_me)); // populate/lookup 0.26 -> 0.23 [core is 0.16 somehow]
|
77
|
+
// perhaps they cache?
|
78
|
+
return H(RSTRING_PTR(hash_me)); // 0.23 -> -.22
|
79
|
+
}
|
80
|
+
|
81
|
+
VALUE hval = rb_funcall(hash_me, id_hash, 0);
|
82
|
+
|
83
|
+
retry:
|
84
|
+
switch (TYPE(hval)) {
|
85
|
+
case T_FIXNUM:
|
86
|
+
return hval;
|
87
|
+
<% unless RUBY_VERSION < '1.9' %>
|
88
|
+
case T_BIGNUM:
|
89
|
+
return LONG2FIX(((long*)(RBIGNUM_DIGITS(hval)))[0]);
|
90
|
+
<% end %>
|
91
|
+
|
92
|
+
default:
|
93
|
+
hval = rb_to_int(hval);
|
94
|
+
goto retry;
|
95
|
+
}
|
96
|
+
|
97
|
+
}
|
98
|
+
};
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
/* we end up not needing this...or at least not using it, I don't know if it would be faster than using the default or not
|
103
|
+
struct eqint
|
104
|
+
{
|
105
|
+
inline bool operator()(int s1, int s2) const
|
106
|
+
{
|
107
|
+
return s1 == s2;
|
108
|
+
}
|
109
|
+
};
|
110
|
+
*/
|
111
|
+
|
112
|
+
|
113
|
+
typedef struct {
|
114
|
+
<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> > *hash_map;
|
115
|
+
} RCallback;
|
116
|
+
|
117
|
+
|
118
|
+
static void mark_hash_map_values(RCallback *incoming) {
|
119
|
+
<% if value_type == 'VALUE' || key_type == 'VALUE' %>
|
120
|
+
|
121
|
+
for(<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
122
|
+
|
123
|
+
<% if value_type == 'VALUE' %>
|
124
|
+
rb_gc_mark(it->second);
|
125
|
+
<% end %>
|
126
|
+
|
127
|
+
<% if key_type == 'VALUE' %>
|
128
|
+
rb_gc_mark(it->first);
|
129
|
+
<% end %>
|
130
|
+
}
|
131
|
+
<% end %>
|
132
|
+
}
|
133
|
+
|
134
|
+
static void free_hash_callback(RCallback* cb) {
|
135
|
+
// delete cb->hash_map;
|
136
|
+
}
|
137
|
+
|
138
|
+
static VALUE callback_alloc _((VALUE)); // what does this line do?
|
139
|
+
|
140
|
+
static VALUE
|
141
|
+
callback_alloc( VALUE klass )
|
142
|
+
{
|
143
|
+
RCallback* cbs;
|
144
|
+
VALUE current_instance = Data_Make_Struct(klass, RCallback, mark_hash_map_values, free_hash_callback, cbs);
|
145
|
+
|
146
|
+
cbs->hash_map = new <%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >();
|
147
|
+
<% if unreachable_key && type == 'dense' %>
|
148
|
+
cbs->hash_map->set_empty_key(<%= unreachable_key %>);
|
149
|
+
<% end %>
|
150
|
+
|
151
|
+
return current_instance;
|
152
|
+
}
|
153
|
+
|
154
|
+
|
155
|
+
#define GetCallbackStruct(obj) (Check_Type(obj, T_DATA), (RCallback*)DATA_PTR(obj))
|
156
|
+
|
157
|
+
|
158
|
+
static VALUE
|
159
|
+
rb_mri_hash_new(VALUE freshly_created) {
|
160
|
+
|
161
|
+
// we don't actually have anything special to do here...
|
162
|
+
// unless someone subclassed us or something [?]
|
163
|
+
// ltodo test
|
164
|
+
return freshly_created;
|
165
|
+
}
|
166
|
+
|
167
|
+
|
168
|
+
static VALUE rb_ghash_set(VALUE cb, VALUE set_this, VALUE to_this) {
|
169
|
+
<% if assert_key_type %>
|
170
|
+
if(!(TYPE(set_this) == <%= assert_key_type %>)) {
|
171
|
+
rb_raise(rb_eTypeError, "not valid value #{assert_key_type}");
|
172
|
+
}
|
173
|
+
<% end %>
|
174
|
+
RCallback* cbs = GetCallbackStruct(cb);
|
175
|
+
(*cbs->hash_map)[ <%= convert_keys_from_ruby %>(set_this)] = <%= convert_values_from_ruby %>(to_this);
|
176
|
+
return to_this; // ltodo test that it returns value...
|
177
|
+
}
|
178
|
+
|
179
|
+
static VALUE rb_ghash_get(VALUE cb, VALUE get_this) {
|
180
|
+
// TODO optionally not assert [?]
|
181
|
+
<% if assert_key_type %>
|
182
|
+
if(!(TYPE(get_this) == <%= assert_key_type %>)) {
|
183
|
+
rb_raise(rb_eTypeError, "not valid value #{assert_key_type}");
|
184
|
+
}
|
185
|
+
<% end %>
|
186
|
+
RCallback* cbs = GetCallbackStruct(cb);
|
187
|
+
|
188
|
+
<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator out = cbs->hash_map->find(<%= convert_keys_from_ruby %>(get_this));
|
189
|
+
|
190
|
+
if(out == cbs->hash_map->end()) { // not found...hmm...is this False, though?
|
191
|
+
return Qnil;
|
192
|
+
} else {
|
193
|
+
return <%= convert_values_to_ruby %>(out->second);
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
197
|
+
static VALUE rb_ghash_each(VALUE cb) {
|
198
|
+
RCallback* incoming = GetCallbackStruct(cb);
|
199
|
+
for(<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
200
|
+
rb_yield_values(2, <%= convert_keys_to_ruby %>(it->first), <%= convert_values_to_ruby %>(it->second));
|
201
|
+
}
|
202
|
+
return cb;
|
203
|
+
}
|
204
|
+
|
205
|
+
static VALUE rb_ghash_values(VALUE cb) {
|
206
|
+
RCallback* incoming = GetCallbackStruct(cb);
|
207
|
+
VALUE out = rb_ary_new2(incoming->hash_map->size());
|
208
|
+
for(<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
209
|
+
rb_ary_push(out, <%= convert_values_to_ruby %>(it->second));
|
210
|
+
}
|
211
|
+
return out;
|
212
|
+
}
|
213
|
+
|
214
|
+
static VALUE rb_ghash_keys(VALUE cb) {
|
215
|
+
RCallback* incoming = GetCallbackStruct(cb);
|
216
|
+
VALUE out = rb_ary_new2(incoming->hash_map->size());
|
217
|
+
for(<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
218
|
+
rb_ary_push(out, <%= convert_keys_to_ruby %>(it->first));
|
219
|
+
}
|
220
|
+
return out;
|
221
|
+
}
|
222
|
+
|
223
|
+
|
224
|
+
// only yields for now :)
|
225
|
+
|
226
|
+
static VALUE rb_ghash_combination_2(VALUE cb) {
|
227
|
+
RCallback* incoming = GetCallbackStruct(cb);
|
228
|
+
|
229
|
+
for(<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
230
|
+
|
231
|
+
<%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator next = it;
|
232
|
+
++next; // advance it
|
233
|
+
|
234
|
+
while(next != incoming->hash_map->end()) {
|
235
|
+
rb_yield_values(2, <%= convert_keys_to_ruby %>(it->first), <%= convert_keys_to_ruby %>(next->first));
|
236
|
+
++next;
|
237
|
+
}
|
238
|
+
}
|
239
|
+
return cb;
|
240
|
+
}
|
241
|
+
|
242
|
+
|
243
|
+
void init_<%= type %>_<%= english_key_type %>_to_<%= english_value_type %>() {
|
244
|
+
|
245
|
+
VALUE rb_cGoogleHashLocal;
|
246
|
+
rb_cGoogleHashLocal = rb_define_class("GoogleHash<%= type.capitalize %><%= english_key_type.capitalize %>To<%= english_value_type.capitalize %>", rb_cObject);
|
247
|
+
|
248
|
+
rb_define_alloc_func(rb_cGoogleHashLocal, callback_alloc); // I guess it calls this for us, pre initialize...
|
249
|
+
|
250
|
+
rb_define_method(rb_cGoogleHashLocal, "initialize", RUBY_METHOD_FUNC(rb_mri_hash_new), 0);
|
251
|
+
rb_define_method(rb_cGoogleHashLocal, "[]=", RUBY_METHOD_FUNC(rb_ghash_set), 2);
|
252
|
+
rb_define_method(rb_cGoogleHashLocal, "[]", RUBY_METHOD_FUNC(rb_ghash_get), 1);
|
253
|
+
rb_define_method(rb_cGoogleHashLocal, "each", RUBY_METHOD_FUNC(rb_ghash_each), 0);
|
254
|
+
rb_define_method(rb_cGoogleHashLocal, "values", RUBY_METHOD_FUNC(rb_ghash_values), 0);
|
255
|
+
rb_define_method(rb_cGoogleHashLocal, "keys", RUBY_METHOD_FUNC(rb_ghash_keys), 0);
|
256
|
+
rb_define_method(rb_cGoogleHashLocal, "keys_combination_2", RUBY_METHOD_FUNC(rb_ghash_combination_2), 0);
|
257
|
+
id_eql = rb_intern("eql?");
|
258
|
+
id_hash = rb_intern("hash");
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
extern "C" {
|
3
|
+
#include <ruby.h>
|
4
|
+
// like extern void init_dense_ruby_to_ruby_;
|
5
|
+
<% init_funcs.each do |func| %>
|
6
|
+
<%= "extern void #{func}();" %>
|
7
|
+
<% end %>
|
8
|
+
|
9
|
+
void Init_google_hash() {
|
10
|
+
|
11
|
+
<% init_funcs.each do |func| %>
|
12
|
+
<%= func %>();
|
13
|
+
<% end %>
|
14
|
+
|
15
|
+
rb_eval_string("GoogleHash = GoogleHashDenseRubyToRuby"); // give a default
|
16
|
+
}
|
17
|
+
|
18
|
+
}
|
19
|
+
|
data/results.txt
CHANGED
@@ -1,33 +1,23 @@
|
|
1
|
-
1.9 mingw
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
GoogleHashDense
|
8
|
-
0.1875 (populate)
|
9
|
-
0.078125 (each)
|
10
|
-
|
11
|
-
Hash
|
12
|
-
0.359375 (populate)
|
13
|
-
1.1875 (each)
|
14
|
-
|
15
|
-
|
1
|
+
Here some 1.9 mingw results
|
2
|
+
|
3
|
+
http://pastie.org/752318
|
4
|
+
|
5
|
+
|
6
|
+
|
16
7
|
ruby 1.8.6 mingw:
|
17
|
-
|
8
|
+
|
18
9
|
GoogleHashSparse
|
19
10
|
0.625 (populate)
|
20
11
|
0.546875(each)
|
21
|
-
|
12
|
+
|
22
13
|
GoogleHashDense
|
23
14
|
0.234375(populate)
|
24
15
|
0.421875(each)
|
25
|
-
|
26
|
-
Hash
|
16
|
+
|
17
|
+
Hash
|
27
18
|
0.5 (populate)
|
28
19
|
0.53125 (each)
|
29
20
|
|
30
|
-
|
31
21
|
1.9.2 linux:
|
32
22
|
|
33
23
|
GoogleHashSparse
|
@@ -0,0 +1,170 @@
|
|
1
|
+
require 'rubygems' if RUBY_VERSION < '1.9'
|
2
|
+
require 'sane'
|
3
|
+
require_rel '../ext/google_hash.so'
|
4
|
+
require 'spec/autorun'
|
5
|
+
|
6
|
+
describe "google_hash" do
|
7
|
+
|
8
|
+
before do
|
9
|
+
@subject = GoogleHashSparseLongToRuby.new
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should be instantiable" do
|
13
|
+
# nothing
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should allow you to set a key" do
|
17
|
+
@subject[33] = 'abc'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should allow you to retrieve a key" do
|
21
|
+
@subject[33] = 'abc'
|
22
|
+
@subject[33].should == 'abc'
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should allow you to iterate" do
|
26
|
+
@subject[33] = 'abc'
|
27
|
+
@subject[44] = 'def'
|
28
|
+
all_got = []
|
29
|
+
@subject.each{|k, v|
|
30
|
+
all_got << v
|
31
|
+
}
|
32
|
+
assert all_got.sort == ['abc', 'def']
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should have all the methods desired" do
|
36
|
+
pending "need"
|
37
|
+
# guess these could all be tests, themselves...
|
38
|
+
@subject.each_key {}
|
39
|
+
@subject.each_value{}
|
40
|
+
@subject.each{}
|
41
|
+
@subject[33] = 'abc'
|
42
|
+
@subject.length.should == 1
|
43
|
+
@subject.delete(33).should == 'abc'
|
44
|
+
@subject.clear
|
45
|
+
@subject.length.should == 0
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should not leak" do
|
49
|
+
pending "testing if it leaks"
|
50
|
+
end
|
51
|
+
|
52
|
+
def populate(a)
|
53
|
+
a['abc'] = 'def'
|
54
|
+
a['bbc'] = 'yoyo'
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should not die with GC" do
|
58
|
+
a = GoogleHashSparseRubyToRuby.new
|
59
|
+
populate(a)
|
60
|
+
a['abc'].should == 'def'
|
61
|
+
a['bbc'].should == 'yoyo'
|
62
|
+
GC.start
|
63
|
+
a.keys.each{|k| k}
|
64
|
+
a.values.each{|v| v}
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
it "should work with value => value" do
|
69
|
+
a = GoogleHashSparseRubyToRuby.new
|
70
|
+
a['abc'] = 'def'
|
71
|
+
a['abc'].should == 'def'
|
72
|
+
a = GoogleHashDenseRubyToRuby.new
|
73
|
+
a['abc'] = 'def'
|
74
|
+
a['abc'].should == 'def'
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should have better namespace" do
|
78
|
+
pending do
|
79
|
+
GoogleHash::Sparse
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should disallow non numeric keys" do
|
84
|
+
lambda { @subject['33']}.should raise_error(TypeError)
|
85
|
+
end
|
86
|
+
|
87
|
+
# it "should allow for non numeric keys" do
|
88
|
+
# todo instantiate new type here...
|
89
|
+
# todo allow for floats, ints, symbols, strings [freeze 'em]
|
90
|
+
# wait are any of those actually useful tho?
|
91
|
+
# @subject['33'] = 33
|
92
|
+
# @subject['33'].should == 33
|
93
|
+
# end
|
94
|
+
|
95
|
+
it "should return nil if key is absent" do
|
96
|
+
@subject[33].should be_nil
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should work with 0's" do
|
100
|
+
@subject[0] = 'abc'
|
101
|
+
@subject[0].should == 'abc'
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should do BigNums" do
|
105
|
+
pending "if necessary"
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should do longs eventually" do
|
109
|
+
pending "caring about 64 bit"
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should do 63 bit thingy for longs on 64 bit" do
|
113
|
+
pending "caring about 64 bit"
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should have sets"
|
117
|
+
it "should have Set#each"
|
118
|
+
|
119
|
+
it "Set should have #combination calls" do
|
120
|
+
@subject[33] = 34
|
121
|
+
@subject[36] = 37
|
122
|
+
@subject.keys_combination_2{|a, b|
|
123
|
+
assert a == 33
|
124
|
+
assert b == 36
|
125
|
+
}
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
it "Set should have #combination calls with more than one" do
|
130
|
+
@subject[1] = 34
|
131
|
+
@subject[2] = 37
|
132
|
+
@subject[3]= 39
|
133
|
+
sum = 0
|
134
|
+
count = 0
|
135
|
+
@subject.keys_combination_2{|a, b|
|
136
|
+
sum += a
|
137
|
+
sum += b
|
138
|
+
count += 1
|
139
|
+
}
|
140
|
+
assert count == 3
|
141
|
+
assert sum == 1 + 2 + 1 + 3 + 2 + 3
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
it "should have an Array for values, keys" do
|
146
|
+
@subject[33] = 34
|
147
|
+
@subject.keys.should == [33]
|
148
|
+
@subject.values.should == [34]
|
149
|
+
end
|
150
|
+
|
151
|
+
it "should work with all Longs" do
|
152
|
+
a = GoogleHashDenseLongToLong.new
|
153
|
+
a[3] = 4
|
154
|
+
a[3].should == 4
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should raise on errant values" do
|
158
|
+
a = GoogleHashDenseLongToLong.new
|
159
|
+
proc { a[3] = 4}.should raise_error
|
160
|
+
end
|
161
|
+
|
162
|
+
it "should have an Enumerator for values, keys, on demand"
|
163
|
+
|
164
|
+
it "should have a block for values, keys, on demand"
|
165
|
+
|
166
|
+
it "should have real sets"
|
167
|
+
|
168
|
+
it "should skip GC when native to native"
|
169
|
+
|
170
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_hash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- rogerdpack
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-21 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -26,8 +26,8 @@ description: Ruby wrappers to the google hash library
|
|
26
26
|
email: rogerdpack@gmail.com
|
27
27
|
executables: []
|
28
28
|
|
29
|
-
extensions:
|
30
|
-
|
29
|
+
extensions: []
|
30
|
+
|
31
31
|
extra_rdoc_files:
|
32
32
|
- README
|
33
33
|
files:
|
@@ -37,7 +37,7 @@ files:
|
|
37
37
|
- ext/benchmark.rb
|
38
38
|
- ext/extconf.rb
|
39
39
|
- ext/go.bat
|
40
|
-
- ext/
|
40
|
+
- ext/scale.rb
|
41
41
|
- ext/sparsehash-1.5.2/AUTHORS
|
42
42
|
- ext/sparsehash-1.5.2/COPYING
|
43
43
|
- ext/sparsehash-1.5.2/ChangeLog
|
@@ -115,9 +115,10 @@ files:
|
|
115
115
|
- ext/sparsehash-1.5.2/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj
|
116
116
|
- ext/sparsehash-1.5.2/vsprojects/time_hash_map/time_hash_map.vcproj
|
117
117
|
- ext/sparsehash-1.5.2/vsprojects/type_traits_unittest/type_traits_unittest.vcproj
|
118
|
-
- ext/template/
|
118
|
+
- ext/template/google_hash.cpp.erb
|
119
|
+
- ext/template/main.cpp.erb
|
119
120
|
- results.txt
|
120
|
-
-
|
121
|
+
- spec/spec.google_hash.rb
|
121
122
|
has_rdoc: true
|
122
123
|
homepage: http://github.com/rdp/ruby_google_hash
|
123
124
|
licenses: []
|
@@ -146,5 +147,5 @@ rubygems_version: 1.3.5
|
|
146
147
|
signing_key:
|
147
148
|
specification_version: 3
|
148
149
|
summary: Ruby wrappers to the google hash library
|
149
|
-
test_files:
|
150
|
-
|
150
|
+
test_files:
|
151
|
+
- spec/spec.google_hash.rb
|
data/ext/main.cpp
DELETED
data/ext/template/go.cpp
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
#include <iostream>
|
2
|
-
#include <google/<%= type %>_hash_map>
|
3
|
-
#include <ruby.h>
|
4
|
-
|
5
|
-
using google::<%= type %>_hash_map; // namespace where class lives by default
|
6
|
-
using std::cout;
|
7
|
-
using std::endl;
|
8
|
-
<% if !OS.windows? %>
|
9
|
-
#include <ext/hash_set>
|
10
|
-
<% end %>
|
11
|
-
using __gnu_cxx::hash; // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS
|
12
|
-
extern "C" {
|
13
|
-
|
14
|
-
struct eqstr
|
15
|
-
{
|
16
|
-
bool operator()(const char* s1, const char* s2) const
|
17
|
-
{
|
18
|
-
return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
|
19
|
-
}
|
20
|
-
};
|
21
|
-
|
22
|
-
|
23
|
-
struct eqint
|
24
|
-
{
|
25
|
-
inline bool operator()(int s1, int s2) const
|
26
|
-
{
|
27
|
-
return s1 == s2;
|
28
|
-
}
|
29
|
-
};
|
30
|
-
|
31
|
-
typedef struct {
|
32
|
-
<%= type %>_hash_map<int, VALUE> *hash_map;
|
33
|
-
} RCallback;
|
34
|
-
|
35
|
-
static VALUE rb_cGoogleHash<%= type %>;
|
36
|
-
|
37
|
-
|
38
|
-
static void mark_hash_map_values(RCallback *incoming) {
|
39
|
-
for(<%= type %>_hash_map<int, VALUE>::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
40
|
-
rb_gc_mark(it->second);
|
41
|
-
}
|
42
|
-
}
|
43
|
-
|
44
|
-
static void free_hash_callback(RCallback* cb) {
|
45
|
-
// delete cb->hash_map;
|
46
|
-
}
|
47
|
-
|
48
|
-
static VALUE callback_alloc _((VALUE)); // what does this line do?
|
49
|
-
|
50
|
-
static VALUE
|
51
|
-
callback_alloc( VALUE klass )
|
52
|
-
{
|
53
|
-
VALUE cb;
|
54
|
-
RCallback* cbs;
|
55
|
-
cb = Data_Make_Struct(klass, RCallback, mark_hash_map_values, free_hash_callback, cbs);
|
56
|
-
cbs->hash_map = new <%= type %>_hash_map<int, VALUE>();
|
57
|
-
<% if setup_code %>
|
58
|
-
cbs->hash_map-><%= setup_code %>;
|
59
|
-
<% end %>
|
60
|
-
return cb;
|
61
|
-
}
|
62
|
-
|
63
|
-
#define GetCallbackStruct(obj) (Check_Type(obj, T_DATA), (RCallback*)DATA_PTR(obj))
|
64
|
-
|
65
|
-
static VALUE
|
66
|
-
rb_mri_hash_new(VALUE freshly_created) {
|
67
|
-
|
68
|
-
// we don't actually have anything special to do here...
|
69
|
-
return freshly_created;
|
70
|
-
}
|
71
|
-
|
72
|
-
|
73
|
-
static VALUE rb_ghash_set(VALUE cb, VALUE set_this, VALUE to_this) {
|
74
|
-
if(!(TYPE(set_this) == T_FIXNUM)) {
|
75
|
-
rb_raise(rb_eTypeError, "not valid value");
|
76
|
-
}
|
77
|
-
RCallback* cbs = GetCallbackStruct(cb);
|
78
|
-
(*cbs->hash_map)[FIX2INT(set_this)] = to_this;
|
79
|
-
return to_this; // ltodo test that it returns value...
|
80
|
-
}
|
81
|
-
|
82
|
-
static VALUE rb_ghash_get(VALUE cb, VALUE get_this) {
|
83
|
-
if(!(TYPE(get_this) == T_FIXNUM)) {
|
84
|
-
rb_raise(rb_eTypeError, "not valid value");
|
85
|
-
}
|
86
|
-
RCallback* cbs = GetCallbackStruct(cb);
|
87
|
-
VALUE out = (*cbs->hash_map)[FIX2INT(get_this)];
|
88
|
-
// todo if out == 0 return Qnil
|
89
|
-
return out;
|
90
|
-
}
|
91
|
-
|
92
|
-
static VALUE rb_ghash_each(VALUE cb) {
|
93
|
-
RCallback* incoming = GetCallbackStruct(cb);
|
94
|
-
// TODO assert block given
|
95
|
-
for(<%= type %>_hash_map<int, VALUE>::iterator it = incoming->hash_map->begin(); it != incoming->hash_map->end(); ++it) {
|
96
|
-
rb_yield_values(2, INT2FIX(it->first), it->second);
|
97
|
-
}
|
98
|
-
return cb;
|
99
|
-
|
100
|
-
}
|
101
|
-
|
102
|
-
void init_<%= type %>() {
|
103
|
-
rb_cGoogleHash<%= type %> = rb_define_class("GoogleHash<%= type.capitalize %>", rb_cObject);
|
104
|
-
|
105
|
-
rb_define_alloc_func(rb_cGoogleHash<%= type %>, callback_alloc); // I guess it calls this for us, pre initialize...
|
106
|
-
|
107
|
-
rb_define_method(rb_cGoogleHash<%= type %>, "initialize", RUBY_METHOD_FUNC(rb_mri_hash_new), 0);
|
108
|
-
rb_define_method(rb_cGoogleHash<%= type %>, "[]=", RUBY_METHOD_FUNC(rb_ghash_set), 2);
|
109
|
-
rb_define_method(rb_cGoogleHash<%= type %>, "[]", RUBY_METHOD_FUNC(rb_ghash_get), 1);
|
110
|
-
rb_define_method(rb_cGoogleHash<%= type %>, "each", RUBY_METHOD_FUNC(rb_ghash_each), 0);
|
111
|
-
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
data/test/spec.go
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
require 'sane'
|
2
|
-
require_rel '../ext/google_hash.so'
|
3
|
-
require 'spec/autorun'
|
4
|
-
|
5
|
-
describe GoogleHashSmall do
|
6
|
-
|
7
|
-
before do
|
8
|
-
@subject = GoogleHashSmall.new
|
9
|
-
end
|
10
|
-
|
11
|
-
it "should be instantiable" do
|
12
|
-
# nothing
|
13
|
-
end
|
14
|
-
|
15
|
-
it "should allow you to set a key" do
|
16
|
-
@subject[33] = 'abc'
|
17
|
-
end
|
18
|
-
|
19
|
-
it "should allow you to retrieve a key" do
|
20
|
-
@subject[33] = 'abc'
|
21
|
-
@subject[33].should == 'abc'
|
22
|
-
end
|
23
|
-
|
24
|
-
it "should allow you to iterate" do
|
25
|
-
@subject[33] = 'abc'
|
26
|
-
@subject[44] = 'def'
|
27
|
-
all_got = []
|
28
|
-
@subject.each{|k, v|
|
29
|
-
all_got << v
|
30
|
-
}
|
31
|
-
assert all_got.sort == ['abc', 'def']
|
32
|
-
end
|
33
|
-
|
34
|
-
it "should have all the methods desired" do
|
35
|
-
# guess these could all be tests, themselves...
|
36
|
-
@subject.each_key {}
|
37
|
-
@subject.each_value{}
|
38
|
-
@subject.each{}
|
39
|
-
@subject.delete(33)
|
40
|
-
@subject.clear
|
41
|
-
@subject.length.should == 0
|
42
|
-
end
|
43
|
-
|
44
|
-
it "should not leak" do
|
45
|
-
raise 'not done'
|
46
|
-
end
|
47
|
-
|
48
|
-
it "should have better namespace" do
|
49
|
-
GoogleHash::Space
|
50
|
-
end
|
51
|
-
|
52
|
-
it "should disallow non numeric keys" do
|
53
|
-
@subject['33'].should raise_exception
|
54
|
-
end
|
55
|
-
|
56
|
-
it "should allow for non numeric keys" do
|
57
|
-
# todo instantiate new type here...
|
58
|
-
# todo allow for floats, ints, symbols, strings [freeze 'em]
|
59
|
-
# wait are any of those actually useful tho?
|
60
|
-
@subject['33'] = 33
|
61
|
-
@subject['33'].should == 33
|
62
|
-
end
|
63
|
-
|
64
|
-
# todo do the non sparse, too...
|
65
|
-
|
66
|
-
it "should return nil if key is absent" do
|
67
|
-
@subject[33].should be_nil
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|