murmurhash3 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -109,6 +109,26 @@ getblock64(const uint64_t * p, int i)
109
109
  #define getblock64(p, i) (p[i])
110
110
  #endif
111
111
 
112
+ static const char hex[] =
113
+ "000102030405060708090a0b0c0d0e0f" \
114
+ "101112131415161718191a1b1c1d1e1f" \
115
+ "202122232425262728292a2b2c2d2e2f" \
116
+ "303132333435363738393a3b3c3d3e3f" \
117
+ "404142434445464748494a4b4c4d4e4f" \
118
+ "505152535455565758595a5b5c5d5e5f" \
119
+ "606162636465666768696a6b6c6d6e6f" \
120
+ "707172737475767778797a7b7c7d7e7f" \
121
+ "808182838485868788898a8b8c8d8e8f" \
122
+ "909192939495969798999a9b9c9d9e9f" \
123
+ "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" \
124
+ "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" \
125
+ "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" \
126
+ "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" \
127
+ "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" \
128
+ "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
129
+ static const char base64[] =
130
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
131
+
112
132
  /* Finalization mix - force all bits of a hash block to avalanche */
113
133
 
114
134
  static inline FORCE_INLINE uint32_t
@@ -161,7 +181,7 @@ MurmurHash3_x86_32 ( const void * key, long len, uint32_t seed)
161
181
  for(i = -nblocks; i; i++)
162
182
  {
163
183
  h1 ^= mmix32(getblock32(blocks, i));
164
- h1 = ROTL32(h1,13);
184
+ h1 = ROTL32(h1,13);
165
185
  h1 = h1*5+0xe6546b64;
166
186
  }
167
187
 
@@ -184,7 +204,7 @@ MurmurHash3_x86_32 ( const void * key, long len, uint32_t seed)
184
204
  h1 = fmix32(h1);
185
205
 
186
206
  return h1;
187
- }
207
+ }
188
208
 
189
209
  #define C1_128 BIG_CONSTANT(0x87c37b91114253d5)
190
210
  #define C2_128 BIG_CONSTANT(0x4cf5ad432745937f)
@@ -297,11 +317,10 @@ rb_fmix64(VALUE self, VALUE integer)
297
317
  #endif
298
318
  }
299
319
 
300
- static VALUE
301
- rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
320
+ static uint32_t
321
+ rb_murmur3_32_hash(int argc, VALUE* argv, VALUE self)
302
322
  {
303
323
  VALUE rstr;
304
- uint32_t result;
305
324
 
306
325
  if (argc == 0 || argc > 2) {
307
326
  rb_raise(rb_eArgError, "accept 1 or 2 arguments: (string[, seed])");
@@ -309,15 +328,90 @@ rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
309
328
  rstr = argv[0];
310
329
  StringValue(rstr);
311
330
 
312
- result = MurmurHash3_x86_32(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]));
331
+ return MurmurHash3_x86_32(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]));
332
+ }
313
333
 
314
- return UINT2NUM(result);
334
+ static VALUE
335
+ rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
336
+ {
337
+ return UINT2NUM(rb_murmur3_32_hash(argc, argv, self));
338
+ }
339
+
340
+ #define SWAP_32_INT(t) do { \
341
+ (t) = ((t) >> 24) | (((t) >> 8) & 0xff00) | \
342
+ (((t) & 0xff00) << 8) | ((t) << 24); \
343
+ } while (0)
344
+ static VALUE
345
+ rb_murmur3_32_str_digest(int argc, VALUE* argv, VALUE self)
346
+ {
347
+ union {
348
+ uint32_t result;
349
+ char res[4];
350
+ } r;
351
+
352
+ r.result = rb_murmur3_32_hash(argc, argv, self);
353
+ #if WORDS_BIGENDIAN
354
+ SWAP_32_INT(r.result);
355
+ #endif
356
+
357
+ return rb_str_new(r.res, 4);
358
+ }
359
+
360
+ static VALUE
361
+ rb_murmur3_32_str_hexdigest(int argc, VALUE* argv, VALUE self)
362
+ {
363
+ union {
364
+ uint32_t result;
365
+ unsigned char res[4];
366
+ } r;
367
+ char out[8];
368
+ int i;
369
+
370
+ r.result = rb_murmur3_32_hash(argc, argv, self);
371
+ #if WORDS_BIGENDIAN
372
+ SWAP_32_INT(r.result);
373
+ #endif
374
+ for(i = 0; i<4; i++) {
375
+ out[i*2] = hex[r.res[i]*2];
376
+ out[i*2+1] = hex[r.res[i]*2+1];
377
+ }
378
+
379
+ return rb_str_new(out, 8);
380
+ }
381
+
382
+ static VALUE
383
+ rb_murmur3_32_str_base64digest(int argc, VALUE *argv, VALUE self)
384
+ {
385
+ union {
386
+ uint32_t result;
387
+ unsigned char res[6];
388
+ } r;
389
+ char out[8];
390
+ int i;
391
+ r.result = rb_murmur3_32_hash(argc, argv, self);
392
+ #if WORDS_BIGENDIAN
393
+ SWAP_32_INT(r.result);
394
+ #endif
395
+ r.res[4] = 0;
396
+ r.res[5] = 0;
397
+ for(i = 0; i<2; i++) {
398
+ uint32_t b64 =
399
+ ((uint32_t)r.res[i*3+0] << 16) |
400
+ ((uint32_t)r.res[i*3+1] << 8) |
401
+ (uint32_t)r.res[i*3+2];
402
+ out[i*4+0] = base64[(b64 >> 18) & 0x3f];
403
+ out[i*4+1] = base64[(b64 >> 12) & 0x3f];
404
+ out[i*4+2] = base64[(b64 >> 6) & 0x3f];
405
+ out[i*4+3] = base64[(b64 >> 0) & 0x3f];
406
+ }
407
+ out[6] = '=';
408
+ out[7] = '=';
409
+ return rb_str_new(out, sizeof(out));
315
410
  }
316
411
 
317
412
  static VALUE
318
413
  rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
319
414
  {
320
- VALUE rint;
321
415
  uint32_t _int;
322
416
  uint32_t result;
323
417
 
@@ -334,7 +428,6 @@ rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
334
428
  static VALUE
335
429
  rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
336
430
  {
337
- VALUE rint;
338
431
  uint64_t _int;
339
432
  uint32_t result;
340
433
 
@@ -366,6 +459,20 @@ rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
366
459
  result[3] = tmp; \
367
460
  } while (0)
368
461
 
462
+ #define SWAP_128_BIT_BYTE() do { \
463
+ uint32_t tmp; \
464
+ tmp = r.result[0]; \
465
+ SWAP_32_INT(tmp); \
466
+ SWAP_32_INT(r.result[1]); \
467
+ r.result[0] = r.result[1]; \
468
+ r.result[1] = tmp; \
469
+ tmp = r.result[2]; \
470
+ SWAP_32_INT(tmp); \
471
+ SWAP_32_INT(r.result[3]); \
472
+ r.result[2] = r.result[3]; \
473
+ r.result[3] = tmp; \
474
+ } while (0)
475
+
369
476
  #define RETURN_128_BIT() \
370
477
  ar_result = rb_ary_new2(4); \
371
478
  rb_ary_push(ar_result, UINT2NUM(result[0])); \
@@ -374,11 +481,10 @@ rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
374
481
  rb_ary_push(ar_result, UINT2NUM(result[3])); \
375
482
  return ar_result
376
483
 
377
- static VALUE
378
- rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
484
+ static void
485
+ rb_murmur3_128_hash(int argc, VALUE* argv, VALUE self, uint32_t result[4])
379
486
  {
380
- VALUE rstr, ar_result;
381
- uint32_t result[4];
487
+ VALUE rstr;
382
488
 
383
489
  if (argc == 0 || argc > 2) {
384
490
  rb_raise(rb_eArgError, "accept 1 or 2 arguments: (string[, seed])");
@@ -387,12 +493,84 @@ rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
387
493
  StringValue(rstr);
388
494
 
389
495
  MurmurHash3_x64_128(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]), result);
496
+ }
497
+
498
+ static VALUE
499
+ rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
500
+ {
501
+ VALUE ar_result;
502
+ uint32_t result[4];
503
+ rb_murmur3_128_hash(argc, argv, self, result);
390
504
  #if WORDS_BIGENDIAN
391
505
  SWAP_128_BIT();
392
506
  #endif
393
507
  RETURN_128_BIT();
394
508
  }
395
509
 
510
+ static VALUE
511
+ rb_murmur3_128_str_digest(int argc, VALUE *argv, VALUE self)
512
+ {
513
+ union {
514
+ uint32_t result[4];
515
+ char res[16];
516
+ } r;
517
+ rb_murmur3_128_hash(argc, argv, self, r.result);
518
+ #if WORDS_BIGENDIAN
519
+ SWAP_128_BIT_BYTE();
520
+ #endif
521
+ return rb_str_new(r.res, sizeof(r.res));
522
+ }
523
+
524
+ static VALUE
525
+ rb_murmur3_128_str_hexdigest(int argc, VALUE *argv, VALUE self)
526
+ {
527
+ union {
528
+ uint32_t result[4];
529
+ unsigned char res[16];
530
+ } r;
531
+ char out[32];
532
+ int i;
533
+ rb_murmur3_128_hash(argc, argv, self, r.result);
534
+ #if WORDS_BIGENDIAN
535
+ SWAP_128_BIT_BYTE();
536
+ #endif
537
+ for(i = 0; i<16; i++) {
538
+ out[i*2] = hex[r.res[i]*2];
539
+ out[i*2+1] = hex[r.res[i]*2+1];
540
+ }
541
+ return rb_str_new(out, sizeof(out));
542
+ }
543
+
544
+ static VALUE
545
+ rb_murmur3_128_str_base64digest(int argc, VALUE *argv, VALUE self)
546
+ {
547
+ union {
548
+ uint32_t result[4];
549
+ unsigned char res[18];
550
+ } r;
551
+ char out[24];
552
+ int i;
553
+ rb_murmur3_128_hash(argc, argv, self, r.result);
554
+ #if WORDS_BIGENDIAN
555
+ SWAP_128_BIT_BYTE();
556
+ #endif
557
+ r.res[16] = 0;
558
+ r.res[17] = 0;
559
+ for(i = 0; i<6; i++) {
560
+ uint32_t b64 =
561
+ ((uint32_t)r.res[i*3+0] << 16) |
562
+ ((uint32_t)r.res[i*3+1] << 8) |
563
+ (uint32_t)r.res[i*3+2];
564
+ out[i*4+0] = base64[(b64 >> 18) & 0x3f];
565
+ out[i*4+1] = base64[(b64 >> 12) & 0x3f];
566
+ out[i*4+2] = base64[(b64 >> 6) & 0x3f];
567
+ out[i*4+3] = base64[(b64 >> 0) & 0x3f];
568
+ }
569
+ out[22] = '=';
570
+ out[23] = '=';
571
+ return rb_str_new(out, sizeof(out));
572
+ }
573
+
396
574
  static VALUE
397
575
  rb_murmur3_128_int32_hash(int argc, VALUE* argv, VALUE self)
398
576
  {
@@ -441,6 +619,9 @@ Init_native_murmur() {
441
619
 
442
620
  rb_define_method(mod_murmur32, "murmur3_32_fmix", rb_fmix32, 1);
443
621
  rb_define_method(mod_murmur32, "murmur3_32_str_hash", rb_murmur3_32_str_hash, -1);
622
+ rb_define_method(mod_murmur32, "murmur3_32_str_digest", rb_murmur3_32_str_digest, -1);
623
+ rb_define_method(mod_murmur32, "murmur3_32_str_hexdigest", rb_murmur3_32_str_hexdigest, -1);
624
+ rb_define_method(mod_murmur32, "murmur3_32_str_base64digest", rb_murmur3_32_str_base64digest, -1);
444
625
  rb_define_method(mod_murmur32, "murmur3_32_int32_hash", rb_murmur3_32_int32_hash, -1);
445
626
  rb_define_method(mod_murmur32, "murmur3_32_int64_hash", rb_murmur3_32_int64_hash, -1);
446
627
 
@@ -448,12 +629,18 @@ Init_native_murmur() {
448
629
  singleton = rb_singleton_class(mod_murmur32);
449
630
  rb_define_alias(singleton, "fmix", "murmur3_32_fmix");
450
631
  rb_define_alias(singleton, "str_hash", "murmur3_32_str_hash");
632
+ rb_define_alias(singleton, "str_digest", "murmur3_32_str_digest");
633
+ rb_define_alias(singleton, "str_hexdigest", "murmur3_32_str_hexdigest");
634
+ rb_define_alias(singleton, "str_base64digest", "murmur3_32_str_base64digest");
451
635
  rb_define_alias(singleton, "int32_hash", "murmur3_32_int32_hash");
452
636
  rb_define_alias(singleton, "int64_hash", "murmur3_32_int64_hash");
453
637
 
454
638
 
455
639
  rb_define_method(mod_murmur128, "murmur3_128_fmix", rb_fmix64, 1);
456
640
  rb_define_method(mod_murmur128, "murmur3_128_str_hash", rb_murmur3_128_str_hash, -1);
641
+ rb_define_method(mod_murmur128, "murmur3_128_str_digest", rb_murmur3_128_str_digest, -1);
642
+ rb_define_method(mod_murmur128, "murmur3_128_str_hexdigest", rb_murmur3_128_str_hexdigest, -1);
643
+ rb_define_method(mod_murmur128, "murmur3_128_str_base64digest", rb_murmur3_128_str_base64digest, -1);
457
644
  rb_define_method(mod_murmur128, "murmur3_128_int32_hash", rb_murmur3_128_int32_hash, -1);
458
645
  rb_define_method(mod_murmur128, "murmur3_128_int64_hash", rb_murmur3_128_int64_hash, -1);
459
646
 
@@ -461,6 +648,9 @@ Init_native_murmur() {
461
648
  singleton = rb_singleton_class(mod_murmur128);
462
649
  rb_define_alias(singleton, "fmix", "murmur3_128_fmix");
463
650
  rb_define_alias(singleton, "str_hash", "murmur3_128_str_hash");
651
+ rb_define_alias(singleton, "str_digest", "murmur3_128_str_digest");
652
+ rb_define_alias(singleton, "str_hexdigest", "murmur3_128_str_hexdigest");
653
+ rb_define_alias(singleton, "str_base64digest", "murmur3_128_str_base64digest");
464
654
  rb_define_alias(singleton, "int32_hash", "murmur3_128_int32_hash");
465
655
  rb_define_alias(singleton, "int64_hash", "murmur3_128_int64_hash");
466
656
 
@@ -53,10 +53,25 @@ module MurmurHash3
53
53
  str_hash([i].pack("Q<"), seed)
54
54
  end
55
55
 
56
+ def murmur3_32_str_digest(str, seed=0)
57
+ [str_hash(str, seed)].pack("V")
58
+ end
59
+
60
+ def murmur3_32_str_hexdigest(str, seed=0)
61
+ [str_hash(str, seed)].pack("V").unpack("H*")[0]
62
+ end
63
+
64
+ def murmur3_32_str_base64digest(str, seed=0)
65
+ [[str_hash(str, seed)].pack("V")].pack("m").chomp!
66
+ end
67
+
56
68
  class << self
57
69
  include MurmurHash3::PureRuby32
58
70
  alias fmix murmur3_32_fmix
59
71
  alias str_hash murmur3_32_str_hash
72
+ alias str_digest murmur3_32_str_digest
73
+ alias str_hexdigest murmur3_32_str_hexdigest
74
+ alias str_base64digest murmur3_32_str_base64digest
60
75
  alias int32_hash murmur3_32_int32_hash
61
76
  alias int64_hash murmur3_32_int64_hash
62
77
  end
@@ -144,10 +159,25 @@ module MurmurHash3
144
159
  str_hash([i].pack("Q<"), seed)
145
160
  end
146
161
 
162
+ def murmur3_128_str_digest(str, seed=0)
163
+ str_hash(str, seed).pack("V4")
164
+ end
165
+
166
+ def murmur3_128_str_hexdigest(str, seed=0)
167
+ str_hash(str, seed).pack("V4").unpack("H*")[0]
168
+ end
169
+
170
+ def murmur3_128_str_base64digest(str, seed=0)
171
+ [str_hash(str, seed).pack("V4")].pack('m').chomp!
172
+ end
173
+
147
174
  class << self
148
175
  include MurmurHash3::PureRuby128
149
176
  alias fmix murmur3_128_fmix
150
177
  alias str_hash murmur3_128_str_hash
178
+ alias str_digest murmur3_128_str_digest
179
+ alias str_hexdigest murmur3_128_str_hexdigest
180
+ alias str_base64digest murmur3_128_str_base64digest
151
181
  alias int32_hash murmur3_128_int32_hash
152
182
  alias int64_hash murmur3_128_int64_hash
153
183
  end
@@ -1,3 +1,3 @@
1
1
  module MurmurHash3
2
- VERSION = "0.1.3.1"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: murmurhash3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3.1
5
4
  prerelease:
5
+ version: 0.1.4
6
6
  platform: ruby
7
7
  authors:
8
8
  - Sokolov Yura 'funny-falcon'
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-09 00:00:00.000000000 Z
12
+ date: 2014-01-15 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: implementation of murmur3 hashing function
15
15
  email:
@@ -34,20 +34,20 @@ require_paths:
34
34
  - lib
35
35
  - ext
36
36
  required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
37
38
  requirements:
38
39
  - - ! '>='
39
40
  - !ruby/object:Gem::Version
40
41
  version: '0'
41
- none: false
42
42
  required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
43
44
  requirements:
44
45
  - - ! '>='
45
46
  - !ruby/object:Gem::Version
46
47
  version: '0'
47
- none: false
48
48
  requirements: []
49
49
  rubyforge_project:
50
- rubygems_version: 1.8.25
50
+ rubygems_version: 1.8.23
51
51
  signing_key:
52
52
  specification_version: 3
53
53
  summary: implements mumur3 hashing function