google_hash 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -21,18 +21,27 @@ populate integer 0.114
21
21
  lookup int 0.080
22
22
 
23
23
 
24
- GoogleHashSparseIntToInt # said to be much more memory efficient
24
+ GoogleHashSparseIntToInt # said to be more memory efficient
25
25
  populate integer 0.242
26
26
  #each 0.046
27
27
  lookup int 0.099
28
28
 
29
- These also use significantly less memory, because (if you specify IntToInt, it stores only 4 bytes per int, instead of Ruby's
30
- usual 20 bytes). This also frees up Ruby so it doesn't hvae to garbage collect as much. Yea!
29
+ These also use less memory, because (if you specify IntToInt, it stores only 4 bytes per int, instead of Ruby's
30
+ usual 20 bytes), and usually less overall RAM for the hash store.
31
+ This also frees up Ruby so it doesn't hvae to garbage collect as much. Yea!
32
+
33
+ For instance, with 1M ints, doing a GC takes this long, comparatively:
34
+ GoogleHashDenseIntToInt "dense took 0.002"
35
+ "ruby hash took 0.103"
36
+
37
+ per GC. And those garbage collects happen all the time, so this is meant to speed those up.
31
38
 
32
39
  See also the results.txt file for more OS benchmark results.
33
40
 
34
41
  You can also run your own benchmarks to see how much faster it would be for you ("try before you buy"), see spec/benchmark.rb file.
35
42
 
43
+ The best benchmark, of course, is to integrate and run it in your own app.
44
+
36
45
  Here is how it performs, if used as a "replacement" for the Ruby standard Hash:
37
46
 
38
47
  Ruby Standard Hash
@@ -45,7 +54,7 @@ lookup string 0.642
45
54
  lookup symbol 0.082
46
55
 
47
56
  GoogleHashDenseRubyToRuby
48
- populate string 0.312
57
+ populate string 0.312 # slower here
49
58
  populate symbol 0.136
50
59
  populate integer 0.172
51
60
  #each 0.077
data/TODO CHANGED
@@ -2,4 +2,7 @@ could be faster:
2
2
 
3
3
  http://code.google.com/p/ulib/wiki/AlignedHashingPerformance
4
4
 
5
- some specs fail [?]
5
+ some specs fail [?]
6
+
7
+ faster install, yikes!
8
+ single file for C code...remove unit test junk from sparsehash code?
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.0
1
+ 0.8.0
data/changelog CHANGED
@@ -1,3 +1,8 @@
1
+ 0.8.0:
2
+ add a delete method for the Sparse** classes
3
+ actually delete the hashmaps when the object is collected by Ruby, instead of leaking
4
+ hopefully work with more versions of GCC
5
+
1
6
  0.7.0:
2
7
  fix building in linux with newer GCC's, fix building in windows with broken system command (?)
3
8
  bump internal google_hash version to 0.8.2
@@ -0,0 +1,4 @@
1
+ rm *.cpp
2
+ rm *.o
3
+ rm *.so
4
+ rm *.def
@@ -37,7 +37,8 @@ else
37
37
  end
38
38
 
39
39
  ruby_key = {:convert_keys_from_ruby => "", :convert_keys_to_ruby => "", :key_type => "VALUE", :english_key_type => "ruby",
40
- :extra_hash_params => ", hashrb, eqrb", :unreachable_key => "current_instance"} # TODO NULL is false here?
40
+ :extra_hash_params => ", hashrb, eqrb", :unreachable_key => "current_instance"}
41
+
41
42
  int_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2INT",
42
43
  :convert_keys_to_ruby => "INT2FIX", :key_type => "int", :unreachable_key => "1<<#{unreachable_int}"}
43
44
 
@@ -46,9 +47,11 @@ int_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2INT",
46
47
  long_key = {:assert_key_type => 'T_FIXNUM', :convert_keys_from_ruby => "FIX2LONG",
47
48
  :convert_keys_to_ruby => "LONG2FIX", :key_type => "long", :unreachable_key => "1<<#{unreachable_long}"}
48
49
 
50
+ # currently "big numbers" we handle by storing them as a double
51
+ # TODO floats [does ruby do real doubles underneath?] too
49
52
  bignum_as_double_key = {:assert_key_type => ['T_BIGNUM', 'T_FIXNUM'], :convert_keys_from_ruby => "rb_big2dbl",
50
53
  :convert_keys_to_ruby => "rb_dbl2big", :key_type => "double", :unreachable_key => "1<<#{unreachable_long}", # LODO is this a bignum value though? LODO TEST this key on 64 bit!
51
- :extra_hash_params => ", hashdouble, eqdouble",
54
+ #:extra_hash_params => ", hashdouble, eqdouble", # these methods provided natively these days?
52
55
  :extra_set_code => "if(TYPE(set_this) == T_FIXNUM)\nset_this = rb_int2big(FIX2INT(set_this));",
53
56
  :extra_get_code => "if(TYPE(get_this) == T_FIXNUM) \n get_this = rb_int2big(FIX2INT(get_this));"
54
57
  }
@@ -63,8 +66,7 @@ bignum_as_double_value = {:assert_value_type => ['T_BIGNUM', 'T_FIXNUM'], :conve
63
66
  :convert_values_to_ruby => "rb_dbl2big", :value_type => "double",
64
67
  :extra_set_code2 => "if(TYPE(to_this) == T_FIXNUM)\nto_this = rb_int2big(FIX2INT(to_this));"
65
68
  }
66
-
67
-
69
+
68
70
  init_funcs = []
69
71
 
70
72
  for key in [ruby_key, int_key, bignum_as_double_key, long_key] do
data/ext/go.bat CHANGED
@@ -1,6 +1,4 @@
1
- rm *.cpp
2
- rm *.o
3
- rm *.so
1
+ call clean.bat
4
2
  ruby extconf.rb
5
3
  call make
6
4
  spec.bat
@@ -0,0 +1 @@
1
+ ruby -C ../spec spec.google_hash.rb
@@ -170,35 +170,36 @@ static void mark_hash_map_values(RCallback *incoming) {
170
170
  }
171
171
 
172
172
  static void free_hash_callback(RCallback* cb) {
173
- // delete cb->hash_map;
173
+ delete cb->hash_map; // I had this line commented out one? huh?
174
174
  }
175
175
 
176
- static VALUE callback_alloc _((VALUE)); // what does this line do?
177
-
178
176
  static VALUE
179
177
  callback_alloc( VALUE klass )
180
178
  {
181
179
  RCallback* cbs;
182
- VALUE current_instance = Data_Make_Struct(klass, RCallback, mark_hash_map_values, free_hash_callback, cbs);
180
+ VALUE current_instance = Data_Make_Struct(klass, RCallback, mark_hash_map_values, free_hash_callback, cbs); // XXXX the last parameter is just a pointer? huh?
183
181
 
184
182
  cbs->hash_map = new <%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >();
185
- <% if unreachable_key && type == 'dense' %>
186
- cbs->hash_map->set_empty_key(<%= unreachable_key %>);
183
+ <% if type == 'dense' %>
184
+ // cbs->hash_map->set_deleted_key(<%= unreachable_key %>);
185
+ // also needs another one ?
186
+ cbs->hash_map->set_empty_key(<%= unreachable_key %>);
187
187
  <% end %>
188
188
 
189
+ <% if type == 'sparse' %>
190
+ cbs->hash_map->set_deleted_key(<%= unreachable_key %>);
191
+ <% end %>
192
+
189
193
  return current_instance;
190
194
  }
191
195
 
192
-
193
196
  #define GetCallbackStruct(obj) (Check_Type(obj, T_DATA), (RCallback*)DATA_PTR(obj))
194
197
 
195
-
196
198
  static VALUE
197
199
  rb_mri_hash_new(VALUE freshly_created) {
198
-
199
200
  // we don't actually have anything special to do here...
200
201
  // unless someone subclassed us or something [?]
201
- // ltodo test
202
+ // XXXX test
202
203
  return freshly_created;
203
204
  }
204
205
 
@@ -227,8 +228,8 @@ static VALUE rb_ghash_set(VALUE cb, VALUE set_this, VALUE to_this) {
227
228
  return to_this; // ltodo test that it returns value...
228
229
  }
229
230
 
230
- static VALUE rb_ghash_get(VALUE cb, VALUE get_this, int just_check_for_presence) {
231
- // TODO optionally not type check assert anywhere [?]
231
+ static VALUE rb_ghash_get(VALUE cb, VALUE get_this, int just_check_for_presence, int delete_it) {
232
+ // TODO optionally not type check assert anymore [if it slows down computationally, that is...]
232
233
  <% if assert_key_type %>
233
234
  if(!(TYPE(get_this) == <%= assert_key_type %>)) {
234
235
  <%= "if(!(TYPE(get_this) == #{assert_key_type2}))" if assert_key_type2 %>
@@ -240,26 +241,38 @@ static VALUE rb_ghash_get(VALUE cb, VALUE get_this, int just_check_for_presence)
240
241
 
241
242
  <%= type %>_hash_map< <%= key_type %>, <%= value_type %> <%= extra_hash_params %> >::iterator out = cbs->hash_map->find(<%= convert_keys_from_ruby %>(get_this));
242
243
 
243
- if(out == cbs->hash_map->end()) { // not found...hmm...is this False, though?
244
+ if(out == cbs->hash_map->end()) { // key not found in hashmap
244
245
  if(just_check_for_presence)
245
246
  return Qfalse;
246
- else
247
- return Qnil;
247
+ else {
248
+ // key not found, or delete requested and key not found, return nil
249
+ return Qnil;
250
+ }
248
251
  } else {
249
252
  if(just_check_for_presence)
250
253
  return Qtrue;
251
- else
252
- return <%= convert_values_to_ruby %>(out->second);
254
+ else {
255
+ VALUE out2 = <%= convert_values_to_ruby %>(out->second);
256
+ if(delete_it) {
257
+ cbs->hash_map->erase(out);
258
+ // still return it
259
+ }
260
+ return out2;
261
+ }
253
262
  }
254
263
 
255
264
  }
256
265
 
257
266
  static VALUE rb_ghash_get_value(VALUE cb, VALUE get_this) {
258
- return rb_ghash_get(cb, get_this, 0);
267
+ return rb_ghash_get(cb, get_this, 0, 0);
259
268
  }
260
269
 
261
270
  static VALUE rb_ghash_get_present(VALUE cb, VALUE get_this) {
262
- return rb_ghash_get(cb, get_this, 1);
271
+ return rb_ghash_get(cb, get_this, 1, 0);
272
+ }
273
+
274
+ static VALUE rb_ghash_delete(VALUE cb, VALUE delete_this) {
275
+ return rb_ghash_get(cb, delete_this, 0, 1);
263
276
  }
264
277
 
265
278
  static VALUE rb_ghash_size(VALUE cb) {
@@ -267,6 +280,11 @@ static VALUE rb_ghash_size(VALUE cb) {
267
280
  return INT2FIX(incoming->hash_map->size());
268
281
  }
269
282
 
283
+ static VALUE rb_ghash_clear(VALUE cb) {
284
+ RCallback* incoming = GetCallbackStruct(cb);
285
+ incoming->hash_map->clear();
286
+ return cb;
287
+ }
270
288
 
271
289
  static VALUE rb_ghash_each(VALUE cb) {
272
290
  RCallback* incoming = GetCallbackStruct(cb);
@@ -325,10 +343,14 @@ void init_<%= type %>_<%= english_key_type %>_to_<%= english_value_type %>() {
325
343
  rb_define_method(rb_cGoogleHashLocal, "initialize", RUBY_METHOD_FUNC(rb_mri_hash_new), 0);
326
344
  rb_define_method(rb_cGoogleHashLocal, "[]=", RUBY_METHOD_FUNC(rb_ghash_set), 2);
327
345
  rb_define_method(rb_cGoogleHashLocal, "[]", RUBY_METHOD_FUNC(rb_ghash_get_value), 1);
328
- rb_define_method(rb_cGoogleHashLocal, "each", RUBY_METHOD_FUNC(rb_ghash_each), 0);
346
+ rb_define_method(rb_cGoogleHashLocal, "each", RUBY_METHOD_FUNC(rb_ghash_each), 0);
329
347
  rb_define_method(rb_cGoogleHashLocal, "values", RUBY_METHOD_FUNC(rb_ghash_values), 0);
330
348
  rb_define_method(rb_cGoogleHashLocal, "keys", RUBY_METHOD_FUNC(rb_ghash_keys), 0);
331
349
  rb_define_method(rb_cGoogleHashLocal, "has_key?", RUBY_METHOD_FUNC(rb_ghash_get_present), 1);
350
+ <% if type == 'sparse' %> // only ones its safe on for now, till I get it figured out...
351
+ rb_define_method(rb_cGoogleHashLocal, "delete", RUBY_METHOD_FUNC(rb_ghash_delete), 1);
352
+ rb_define_method(rb_cGoogleHashLocal, "clear", RUBY_METHOD_FUNC(rb_ghash_clear), 0);
353
+ <% end %>
332
354
  rb_define_method(rb_cGoogleHashLocal, "key?", RUBY_METHOD_FUNC(rb_ghash_get_present), 1);
333
355
  rb_define_method(rb_cGoogleHashLocal, "member?", RUBY_METHOD_FUNC(rb_ghash_get_present), 1);
334
356
  rb_define_method(rb_cGoogleHashLocal, "include?", RUBY_METHOD_FUNC(rb_ghash_get_present), 1);
@@ -0,0 +1,20 @@
1
+
2
+ require 'google_hash'
3
+ require 'sane'
4
+ require 'benchmark'
5
+ sparse = false
6
+ dense = false
7
+ ruby = true
8
+ if sparse
9
+ a = GoogleHashSparseIntToInt.new
10
+ p 'sparse'
11
+ elsif dense
12
+ p 'dense'
13
+ a = GoogleHashDenseIntToInt.new
14
+ else
15
+ p 'ruby'
16
+ a = []
17
+ end
18
+ took = Benchmark.realtime { 200_00000.times {|i| a[i] = i} }
19
+
20
+ p 'took', took.group_digits, OS.rss_bytes.group_digits, Benchmark.realtime { GC.start}.group_digits, ObjectSpace.count_objects
@@ -49,24 +49,44 @@ describe "google_hash" do
49
49
 
50
50
  it "should have all the methods desired" do
51
51
  # guess these could all be tests, themselves...
52
- @subject.each{}
53
- @subject[33] = 'abc'
52
+ @subject.each{|k, v| raise}
53
+ @subject[33] = 34
54
54
  @subject.length.should == 1
55
- @subject.each{}
55
+ sum = 0
56
+ @subject.each{|k, v| sum += k; sum += v}
57
+ sum.should == (33+34)
58
+ @subject[33] = 'abc'
56
59
  @subject.each{|k, v|
57
60
  k.should == 33
58
61
  v.should == 'abc'
59
62
  }
60
- @subject.delete(33).should == 'abc' # guess we don't do delete yet [?]
61
- @subject.length.should == 0
62
- @subject[33] = 'abc'
63
- @subject.length.should == 1
63
+
64
64
  @subject.clear
65
+ @subject.length.should == 0
66
+ @subject.keys.should == []
67
+ @subject[33] = 'abc'
68
+ @subject.delete(33).should == 'abc' # we don't actually have these methods yet :)
65
69
  @subject.length.should == 0
70
+ @subject[33] = 'def'
71
+ @subject[33].should == 'def'
66
72
  end
67
73
 
68
- it 'should not be able to set the absent key for double' do
69
- fail
74
+ pending "they should all have a clear method" do
75
+ for kls in get_all_classes
76
+ kls.new.clear
77
+ end
78
+ end
79
+
80
+ it 'should not be able to set the absent key for double' do
81
+ if OS.bits == 32
82
+ unreachable_int = 31
83
+ unreachable_long = 31
84
+ else
85
+ unreachable_int = 31
86
+ unreachable_long = 63
87
+ end
88
+ proc { GoogleHashSparseIntToInt[1<<unreachable_int] = 3 } # should raise...
89
+ proc { GoogleHashSparseLongToInt[1<<unreachable_long] = 3 }
70
90
  end
71
91
 
72
92
  def populate(a)
@@ -122,11 +142,10 @@ describe "google_hash" do
122
142
  end
123
143
 
124
144
  if OS.bits == 64
125
- it "should disallow keys like 1<<40 for ints on 64 bit"
145
+ it "should disallow keys like 1<<40 for ints on 64 bit, since they'll be lost"
126
146
  end
127
147
 
128
- it "should have sets"
129
- it "should have Set#each"
148
+ it "should have sets, Set#each, etc."
130
149
 
131
150
  it "Set should have #combination calls" do
132
151
  @subject[33] = 34
@@ -176,29 +195,34 @@ describe "google_hash" do
176
195
  a[10000000000000000000].should == 1
177
196
  end
178
197
 
179
- it "should not leak" do
180
- pending 'something that might leak'
181
- a = GoogleHashDenseIntToInt.new
198
+ it "should not leak [?]" do
199
+ a = GoogleHashSparseIntToInt.new
182
200
  100_000.times {
183
201
  a[1] = 1
184
202
  a[1]
185
203
  a.each{|k, v|}
186
204
  a.delete(1) rescue nil
187
205
  }
206
+ a.length.should == 0
188
207
  OS.rss_bytes.should be < 25_000_000
189
208
  end
190
209
 
210
+ it "should do delete from dense" do
211
+ GoogleHashDenseDoubleToInt.new.delete('a').should == nil
212
+ end
213
+
191
214
  it "should do int values as doubles" do
192
215
  a = GoogleHashDenseDoubleToInt.new
193
216
  a[1] = 1
194
217
  a[1].should == 1
195
218
  end
196
219
 
197
- it "should do float values as doubles" do
198
- pending "interest in floats"
199
- a = GoogleHashDenseDoubleToInt.new
200
- a[1.0] = 1
201
- a[1.0].should == 1
220
+ it "should do float values as doubles, too, not just big numbers" do
221
+ pending "request" do
222
+ a = GoogleHashDenseDoubleToInt.new
223
+ a[1.0] = 1
224
+ a[1.0].should == 1
225
+ end
202
226
  end
203
227
 
204
228
  it "should do bignum to doubles et al" do
@@ -220,25 +244,16 @@ describe "google_hash" do
220
244
  a[10000000000000000000] = 'abc'
221
245
  end
222
246
 
223
- it 'should be able to delete bignums without leaking' do
224
- pending
225
- a = GoogleHashDenseBignumToBignum.new
226
- 100_000.times {
227
- a[10000000000000000000] = 1
228
- a.size.should == 1
229
- a.delete[10000000000000000000]
230
- a.size.should == 0
231
- }
232
- assert OS.rss_bytes < 100_000
233
- end
234
-
235
- it "should have an Enumerator for values, keys, an on demand, getNext enumerator object..."
247
+ it "should have an Enumerator return for values, keys [?] instead of an array?"
236
248
 
237
- it "should have a block access for values, keys" do
238
- pending "interest"
239
- @a[3] = 4
240
- a.each_value {}
241
- a.each_key {}
249
+ it "should have a block access for just values, or just keys" do
250
+ pending "interest" do
251
+ @subject[3] = 4
252
+ sum = 0
253
+ @subject.each_value {|v| sum += v}
254
+ @subject.each_key {|k| sum += k}
255
+ sum.should == 7
256
+ end
242
257
  end
243
258
 
244
259
  it "should have nice inspect" do
@@ -248,24 +263,29 @@ describe "google_hash" do
248
263
  a.inspect.should == "GoogleHashSparseIntToRuby {3=>4,4=>5}"
249
264
  end
250
265
 
251
- it "should have sets, too, not just hashes"
252
-
253
266
  it "should skip GC when native to native" do
254
- # tough to test...
267
+ pending 'caring, get from gc_bench.rb'
268
+ end
269
+
270
+ def get_all_classes
271
+ Object.constants.grep(/googlehash/i).map{|c| Object.const_get(c) }
255
272
  end
256
273
 
257
274
  it "should allow for setting the right keys" do
258
- all_classes = Object.constants.grep(/googlehash/i).map{|c| Object.const_get(c) }
259
- all_classes.select{|c| c.to_s =~ /(int|long)to/i}.each{|c|
275
+ all_classes = get_all_classes
276
+ all_classes.select{|c| c.to_s =~ /(int|long|double)to/i}.each{|c|
260
277
  p c
261
278
  keys = [0, 1, -1, 1<<29]
262
279
  if OS.bits == 64
263
- keys << (1<<61)
280
+ keys << (1<<61)
264
281
  end
265
282
  keys.each{|k|
266
283
  instance = c.new
284
+ instance[k].should == nil
267
285
  instance[k] = 0
286
+ instance[k-1] = 2
268
287
  instance[k].should == 0
288
+ instance[k-1].should == 2
269
289
  }
270
290
  }
271
291
  end
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_hash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-10 00:00:00.000000000 Z
12
+ date: 2013-01-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sane
@@ -58,6 +58,7 @@ files:
58
58
  - TODO
59
59
  - VERSION
60
60
  - changelog
61
+ - ext/clean.bat
61
62
  - ext/extconf.rb
62
63
  - ext/go.bat
63
64
  - ext/sparsehash-1.8.1/AUTHORS
@@ -142,12 +143,15 @@ files:
142
143
  - ext/sparsehash-1.8.1/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj
143
144
  - ext/sparsehash-1.8.1/vsprojects/time_hash_map/time_hash_map.vcproj
144
145
  - ext/sparsehash-1.8.1/vsprojects/type_traits_unittest/type_traits_unittest.vcproj
146
+ - ext/spec.bat
145
147
  - ext/template/google_hash.cpp.erb
146
148
  - ext/template/main.cpp.erb
147
149
  - results.txt
150
+ - spec/bench_gc.rb
148
151
  - spec/benchmark.rb
149
152
  - spec/scale.rb
150
153
  - spec/spec.google_hash.rb
154
+ - to_build_locally_run_ext_go_bat
151
155
  homepage: http://github.com/rdp/ruby_google_hash
152
156
  licenses: []
153
157
  post_install_message:
@@ -168,7 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
172
  version: '0'
169
173
  requirements: []
170
174
  rubyforge_project:
171
- rubygems_version: 1.8.23
175
+ rubygems_version: 1.8.24
172
176
  signing_key:
173
177
  specification_version: 3
174
178
  summary: Ruby wrappers to the google hash library