google_hash 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +25 -13
- data/VERSION +1 -1
- data/ext/extconf.rb +46 -38
- data/ext/template/google_hash.cpp.erb +37 -17
- data/spec/spec.google_hash.rb +1 -1
- data/types.txt +18 -0
- metadata +3 -2
data/README
CHANGED
@@ -2,7 +2,6 @@ Pleased to announce the initial release of the "google_hash" gem.
|
|
2
2
|
|
3
3
|
Its goal. To boldly be faster than any hash hash before (cue star trek TNG theme).
|
4
4
|
|
5
|
-
|
6
5
|
The goal is basically a better hash, either one that is faster or more space efficient than ruby's default. To attempt this we wrap the google sparse and dense hashes [1].
|
7
6
|
|
8
7
|
Speed results (populating 500000 integers):
|
@@ -13,50 +12,63 @@ Hash (Ruby default)
|
|
13
12
|
0.359375 (populate)
|
14
13
|
1.1875 (each)
|
15
14
|
|
16
|
-
|
15
|
+
GoogleHashDenseIntToInt
|
17
16
|
0.1875 (populate)
|
18
17
|
0.078125 (each)
|
19
18
|
|
20
|
-
|
19
|
+
GoogleHashSparseIntToInt
|
21
20
|
0.53125 (populate)
|
22
21
|
0.078125 (each)
|
23
22
|
|
24
23
|
Usage:
|
25
24
|
|
26
|
-
a = GoogleHashDenseRubyToRuby.new
|
27
|
-
b =
|
28
|
-
|
25
|
+
a = GoogleHashDenseRubyToRuby.new
|
26
|
+
b = GoogleHashDenseIntToRuby.new # :int => Ruby
|
27
|
+
c = GoogleHashSparseIntToInt.new # :int => :int
|
28
|
+
d = GoogleHashSparseIntToInt.new # :long => :long (longs are 8 byte ints on 64-bit machines, so can store larger numbers)
|
29
29
|
|
30
30
|
a[3] = 4
|
31
31
|
b[4] = 'abc'
|
32
|
-
b['abc'
|
32
|
+
b['abc'] = 'some complex object'
|
33
|
+
c[3] = 4 # all you can use are ints
|
33
34
|
|
34
35
|
a.each{|k, v| ... }
|
35
36
|
|
36
37
|
a.keys => Array
|
37
38
|
a.values => Array
|
38
39
|
|
40
|
+
For a complete list, see types.txt, but it's all the various combinations of Ruby, Int, and Long.
|
41
|
+
|
39
42
|
To learn if sparse or dense is right for you, check their documentation:
|
40
43
|
|
41
44
|
http://google-sparsehash.googlecode.com/svn/trunk/doc/index.html
|
45
|
+
Dense is faster, sparse uses less memory.
|
42
46
|
|
43
47
|
Installation:
|
44
48
|
|
45
|
-
gem install
|
49
|
+
gem install os # install a dependency
|
50
|
+
gem install sane # install a dependency
|
51
|
+
gem install google_hash (if on doze, you'll also first need the devkit installed)
|
46
52
|
|
47
|
-
|
53
|
+
Note that if you use a type other than "Ruby" that you will be saving the values away as native values, so they will be using less space than a typical ruby object (which is 20 bytes), using 4 bytes instead of 20 (or instead of 40, in the case of 64 bit machines). In addition to taking less memory, they are also stored separate from Ruby's heap, meaning that they should release stress on the GC, if you are spending much time in GC.
|
48
54
|
|
49
|
-
If you have a desired use case let me know and I might well be able to code it up for you.
|
55
|
+
If you have a desired use case that's not covered, let me know and I might well be able to code it up for you and add it.
|
50
56
|
|
51
57
|
ex: currently it uses longs internally instead of ints--if you want ints or chars or what not, let me know.
|
52
58
|
|
53
|
-
|
59
|
+
if you want it to remember insertion order, I could do that, too, or native "store away" strings/bignums, whatever.
|
60
|
+
|
54
61
|
|
55
|
-
|
62
|
+
This is meant to be one more tool in the rubyists toolbelt when trying to optimize speed-wise, and plans to expand to more types, but at least with this release it has a #each method.
|
56
63
|
|
57
64
|
Enjoy.
|
65
|
+
|
58
66
|
-r
|
59
67
|
|
60
68
|
[1] http://code.google.com/p/google-sparsehash
|
61
69
|
|
62
|
-
If you want to see the code/hack on it, run extconf.rb within the ext directory, to create the code it actually uses (from a template).
|
70
|
+
If you want to see the code/hack on it, run extconf.rb within the ext directory, to create the code it actually uses (from a template).
|
71
|
+
|
72
|
+
Related:
|
73
|
+
|
74
|
+
judy http://groups.google.com/group/ruby-talk-google/browse_thread/thread/05ed587925526a7f/314375891d12b672?lnk=raot
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/ext/extconf.rb
CHANGED
@@ -17,8 +17,8 @@ end
|
|
17
17
|
$CFLAGS += " -I./local_installed/include "
|
18
18
|
|
19
19
|
if RUBY_VERSION < '1.9'
|
20
|
-
|
21
|
-
|
20
|
+
# appears to link using gcc on 1.8 [mingw at least]
|
21
|
+
$LDFLAGS += " -lstdc++ "
|
22
22
|
end
|
23
23
|
|
24
24
|
# create our files...
|
@@ -35,56 +35,64 @@ else
|
|
35
35
|
end
|
36
36
|
|
37
37
|
ruby_key = {:convert_keys_from_ruby => "", :convert_keys_to_ruby => "", :key_type => "VALUE", :unreachable_key => "current_instance"} # TODO NULL is false here?
|
38
|
-
long_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2LONG",
|
39
|
-
|
40
|
-
int_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2INT",
|
41
|
-
|
38
|
+
long_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2LONG",
|
39
|
+
:convert_keys_to_ruby => "LONG2FIX", :key_type => "long", :unreachable_key => "1<<#{unreachable_int}"}
|
40
|
+
int_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2INT",
|
41
|
+
:convert_keys_to_ruby => "INT2FIX", :key_type => "int", :unreachable_key => "1<<#{unreachable_int}"}
|
42
42
|
|
43
43
|
|
44
44
|
ruby_value = {:value_type => "VALUE"}
|
45
|
-
long_value = {:assert_value_type => 'T_FIXNUM', :convert_values_from_ruby => "FIX2LONG",
|
46
|
-
|
45
|
+
long_value = {:assert_value_type => 'T_FIXNUM', :convert_values_from_ruby => "FIX2LONG",
|
46
|
+
:convert_values_to_ruby => "LONG2FIX", :value_type => "long"}
|
47
|
+
int_value = {:assert_value_type => 'T_FIXNUM', :convert_values_from_ruby => "FIX2INT",
|
48
|
+
:convert_values_to_ruby => "INT2FIX", :value_type => "int"}
|
47
49
|
|
48
50
|
|
49
|
-
|
50
|
-
int_to_ruby = long_key.merge(ruby_value)
|
51
|
+
long_to_ruby = long_key.merge(ruby_value)
|
51
52
|
ruby_to_ruby = ruby_key.merge(ruby_value)
|
52
53
|
|
54
|
+
long_to_long = long_key.merge(long_value)
|
55
|
+
|
53
56
|
init_funcs = []
|
57
|
+
require 'sane'
|
54
58
|
|
55
|
-
for
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
59
|
+
for key in [ruby_key, long_key, int_key] do
|
60
|
+
for value in [ruby_value, long_value, int_value] do
|
61
|
+
options = key.merge(value)
|
62
|
+
for type in ['sparse', 'dense'] do
|
63
|
+
|
64
|
+
|
65
|
+
# create local variables so that the template can look cleaner
|
66
|
+
unreachable_key = options[:unreachable_key]
|
67
|
+
convert_keys_from_ruby = options[:convert_keys_from_ruby]
|
68
|
+
convert_keys_to_ruby = options[:convert_keys_to_ruby]
|
69
|
+
key_type = options[:key_type]
|
70
|
+
value_type = options[:value_type]
|
71
|
+
english_key_type = options[:key_type] == 'VALUE' ? 'ruby' : options[:key_type]
|
72
|
+
english_value_type = options[:value_type] == 'VALUE' ? 'ruby' : options[:value_type]
|
73
|
+
|
74
|
+
assert_key_type = options[:assert_key_type]
|
75
|
+
convert_values_from_ruby = options[:convert_values_from_ruby]
|
76
|
+
convert_values_to_ruby = options[:convert_values_to_ruby]
|
77
|
+
assert_value_type = options[:assert_value_type]
|
78
|
+
|
79
|
+
if options[:key_type] == 'VALUE'
|
80
|
+
extra_hash_params = ", hashrb, eqrb"
|
81
|
+
else
|
82
|
+
extra_hash_params = nil
|
83
|
+
end
|
84
|
+
|
85
|
+
template = ERB.new(File.read('template/google_hash.cpp.erb'))
|
86
|
+
descriptor = type + '_' + english_key_type + '_to_' + english_value_type;
|
87
|
+
File.write(descriptor + '.cpp', template.result(binding))
|
88
|
+
init_funcs << "init_" + descriptor
|
89
|
+
end
|
76
90
|
end
|
77
|
-
|
78
|
-
template = ERB.new(File.read('template/google_hash.cpp.erb'))
|
79
|
-
descriptor = type + '_' + english_key_type + '_to_' + english_value_type;
|
80
|
-
File.write(descriptor + '.cpp', template.result(binding))
|
81
|
-
init_funcs << "init_" + descriptor
|
82
|
-
end
|
83
91
|
end
|
84
92
|
|
85
93
|
# write our Init method
|
86
94
|
|
87
|
-
template = ERB.new(File.read('template/main.cpp.erb'))
|
95
|
+
template = ERB.new(File.read('template/main.cpp.erb'))
|
88
96
|
File.write 'main.cpp', template.result(binding)
|
89
97
|
|
90
98
|
create_makefile('google_hash')
|
@@ -15,6 +15,10 @@ extern "C" {
|
|
15
15
|
|
16
16
|
// some helpers
|
17
17
|
|
18
|
+
<%
|
19
|
+
# not yet used...
|
20
|
+
if false
|
21
|
+
%>
|
18
22
|
struct eqstr
|
19
23
|
{
|
20
24
|
bool operator()(const char* s1, const char* s2) const
|
@@ -23,12 +27,24 @@ struct eqstr
|
|
23
27
|
}
|
24
28
|
};
|
25
29
|
|
26
|
-
|
30
|
+
struct eqint
|
31
|
+
{
|
32
|
+
inline bool operator()(int s1, int s2) const
|
33
|
+
{
|
34
|
+
return s1 == s2;
|
35
|
+
}
|
36
|
+
};
|
37
|
+
|
38
|
+
<% end %>
|
27
39
|
|
28
|
-
// hashing it is like hash<const char*>
|
29
40
|
|
30
41
|
static ID id_eql, id_hash;
|
31
42
|
|
43
|
+
<% if key_type == 'VALUE' %>
|
44
|
+
static hash<const char*> H;
|
45
|
+
|
46
|
+
// hashing it is like hash<const char*>
|
47
|
+
|
32
48
|
struct eqrb
|
33
49
|
{
|
34
50
|
bool operator()(const VALUE s1, const VALUE s2) const
|
@@ -47,6 +63,10 @@ struct eqrb
|
|
47
63
|
}
|
48
64
|
};
|
49
65
|
|
66
|
+
#ifndef RBIGNUM_DIGITS
|
67
|
+
# define RBIGNUM_DIGITS(a) RBIGNUM(a)->digits
|
68
|
+
#endif
|
69
|
+
|
50
70
|
struct hashrb
|
51
71
|
{
|
52
72
|
size_t operator()(VALUE hash_me) const
|
@@ -68,14 +88,13 @@ struct hashrb
|
|
68
88
|
case T_SYMBOL:
|
69
89
|
// ltodo
|
70
90
|
return hash_me;
|
71
|
-
<% unless RUBY_VERSION < '1.9' %>
|
72
91
|
case T_BIGNUM:
|
73
|
-
|
74
|
-
|
75
|
-
|
92
|
+
return LONG2FIX(((long*)(RBIGNUM_DIGITS(hash_me)))[0]); // its first digit...I'm thinkin'
|
93
|
+
// not sure if this is faster or not...
|
94
|
+
//case T_STRING:
|
76
95
|
//return H(StringValueCStr(hash_me)); // populate/lookup 0.26 -> 0.23 [core is 0.16 somehow]
|
77
96
|
// perhaps they cache?
|
78
|
-
return H(RSTRING_PTR(hash_me)); // 0.23 -> -.22
|
97
|
+
//return H(RSTRING_PTR(hash_me)); // 0.23 -> -.22
|
79
98
|
}
|
80
99
|
|
81
100
|
VALUE hval = rb_funcall(hash_me, id_hash, 0);
|
@@ -97,16 +116,9 @@ struct hashrb
|
|
97
116
|
}
|
98
117
|
};
|
99
118
|
|
100
|
-
|
119
|
+
<% end %>
|
101
120
|
|
102
121
|
/* we end up not needing this...or at least not using it, I don't know if it would be faster than using the default or not
|
103
|
-
struct eqint
|
104
|
-
{
|
105
|
-
inline bool operator()(int s1, int s2) const
|
106
|
-
{
|
107
|
-
return s1 == s2;
|
108
|
-
}
|
109
|
-
};
|
110
122
|
*/
|
111
123
|
|
112
124
|
|
@@ -168,9 +180,17 @@ rb_mri_hash_new(VALUE freshly_created) {
|
|
168
180
|
static VALUE rb_ghash_set(VALUE cb, VALUE set_this, VALUE to_this) {
|
169
181
|
<% if assert_key_type %>
|
170
182
|
if(!(TYPE(set_this) == <%= assert_key_type %>)) {
|
171
|
-
rb_raise(rb_eTypeError, "not valid
|
183
|
+
rb_raise(rb_eTypeError, "not valid key #{assert_key_type}");
|
172
184
|
}
|
173
185
|
<% end %>
|
186
|
+
|
187
|
+
<% if assert_value_type %>
|
188
|
+
if(!(TYPE(to_this) == <%= assert_value_type %>)) {
|
189
|
+
rb_raise(rb_eTypeError, "not valid value #{assert_value_type}");
|
190
|
+
}
|
191
|
+
<% end %>
|
192
|
+
|
193
|
+
|
174
194
|
RCallback* cbs = GetCallbackStruct(cb);
|
175
195
|
(*cbs->hash_map)[ <%= convert_keys_from_ruby %>(set_this)] = <%= convert_values_from_ruby %>(to_this);
|
176
196
|
return to_this; // ltodo test that it returns value...
|
@@ -180,7 +200,7 @@ static VALUE rb_ghash_get(VALUE cb, VALUE get_this) {
|
|
180
200
|
// TODO optionally not assert [?]
|
181
201
|
<% if assert_key_type %>
|
182
202
|
if(!(TYPE(get_this) == <%= assert_key_type %>)) {
|
183
|
-
rb_raise(rb_eTypeError, "not valid
|
203
|
+
rb_raise(rb_eTypeError, "not valid key #{assert_key_type}");
|
184
204
|
}
|
185
205
|
<% end %>
|
186
206
|
RCallback* cbs = GetCallbackStruct(cb);
|
data/spec/spec.google_hash.rb
CHANGED
@@ -156,7 +156,7 @@ describe "google_hash" do
|
|
156
156
|
|
157
157
|
it "should raise on errant values" do
|
158
158
|
a = GoogleHashDenseLongToLong.new
|
159
|
-
proc { a[3] =
|
159
|
+
proc { a[3] = 'abc'}.should raise_error
|
160
160
|
end
|
161
161
|
|
162
162
|
it "should have an Enumerator for values, keys, on demand"
|
data/types.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
GoogleHashSparseRubyToRuby
|
2
|
+
GoogleHashDenseRubyToRuby
|
3
|
+
GoogleHashSparseRubyToLong
|
4
|
+
GoogleHashDenseRubyToLong
|
5
|
+
GoogleHashSparseRubyToInt
|
6
|
+
GoogleHashDenseRubyToInt
|
7
|
+
GoogleHashSparseLongToRuby
|
8
|
+
GoogleHashDenseLongToRuby
|
9
|
+
GoogleHashSparseLongToLong
|
10
|
+
GoogleHashDenseLongToLong
|
11
|
+
GoogleHashSparseLongToInt
|
12
|
+
GoogleHashDenseLongToInt
|
13
|
+
GoogleHashSparseIntToRuby
|
14
|
+
GoogleHashDenseIntToRuby
|
15
|
+
GoogleHashSparseIntToLong
|
16
|
+
GoogleHashDenseIntToLong
|
17
|
+
GoogleHashSparseIntToInt
|
18
|
+
GoogleHashDenseIntToInt
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_hash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- rogerdpack
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-26 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- ext/template/main.cpp.erb
|
120
120
|
- results.txt
|
121
121
|
- spec/spec.google_hash.rb
|
122
|
+
- types.txt
|
122
123
|
has_rdoc: true
|
123
124
|
homepage: http://github.com/rdp/ruby_google_hash
|
124
125
|
licenses: []
|