RubyGems - mammoth-hasher - Versions diffs - 0.1.0 → 0.2.0 - Mend

mammoth-hasher 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5c93a9f01f7da01607911a7f7c5df3acc1ff0957
-  data.tar.gz: 1704ea09e5aeb7422ed007826d66363c2ec6737d
+  metadata.gz: e8a53d70781a6348d58af444f42747ffcb700b64
+  data.tar.gz: caa0fff7b6d9053006daf8b1bcb2cd4e4f0e5c5f
 SHA512:
-  metadata.gz: b6040ec90679e24ecd5e1bbdf08a6ac719ac1a0621ec4fa8767a092b1864b448aa47a0bc52c1940fa5060f825392bc060030d25b0ff14b17c3e0ea0827729cc2
-  data.tar.gz: 9c5802cf32e2743a6f99448f62e108880c3ad1963b6052de8442b52aee504c7e02237d088879beb2819854caa0c4d0ccc305f84d5dd0e936fb4f54cd1f35614e
+  metadata.gz: 4dcad29b2156bbe8343a2f5fd93c637b1ec447ffdb2f062b5fc6eb5dab3cc9857ab7c325aa29a3fe862c2ee2622f8f211e04d24f8e3da2204747bba2f2531947
+  data.tar.gz: 7b224f29d208466247a2a89b7e0e60190ab42a4b088bd01a72bbc982cdefbd6389f0369e3304c70838a6f5f6b591667e189f03cf4b73a3d6a45c91befb489a1d

data/Rakefile ADDED Viewed

@@ -0,0 +1,8 @@
+require 'rake/testtask'
+Rake::TestTask.new do |t|
+  t.libs << 'test'
+end
+desc "Run tests"
+task :default => :test

data/lib/mammoth-hasher.rb CHANGED Viewed

@@ -13,7 +13,7 @@ class MammothHasher
     # algorithm parameters
     # WARNING: if you change them, the resulting hash will be different !
     number_of_chunks = 100
-    length_of_chunks = 100
+    length_of_chunks = 4
     # we get the file size (in bytes), used as PRNG (Pseudo Random Number Generator)
     filesize = File.size filename
@@ -22,18 +22,22 @@ class MammothHasher
     # the MD5 of the whole file than to apply our custom algorithm
     if filesize <= number_of_chunks*length_of_chunks
       file = File.open(filename, 'r')
-      final_hash = Digest::MD5.file(file).hexdigest
+      hash = Digest::MD5.file(file).hexdigest
       file.close
       puts (Time.now - time_start).to_s + " seconds" if debug
-      return final_hash
+      return hash
     end
     # we initialize the PRNG
     prng = Random.new filesize
-    # we get 1000 numbers between 0 and filesize-size_of_chunk
-    offsets = []
-    for i in 0..number_of_chunks
+    # we always get a chunk at the offset 0 (beginning of file)
+    # because that's where the magic number indicating the file type is
+    # so making sure that it's still the same may prevent from some attacks
+    offsets = [0]
+    # we get 99 other offsets between 0 and (filesize - length_of_chunk)
+    for i in 1..(number_of_chunks-1)
       offsets << prng.rand(filesize - length_of_chunks)
     end
@@ -41,24 +45,17 @@ class MammothHasher
     # (in order to optimize the way the file will be read (in only one direction))
     offsets.sort
-    # we compute the hashes of several parts of the file
-    hashes = ""
-    # first, we compute the hash of the first bytes of the file,
-    # because that's where the magic number indicating the file type is
-    # so making sure that it's still the same may be safer
-    hashes << Digest::MD5.new.hexdigest(File.read(filename, 100))
-    # for each offset, we compute the hash of the following bytes
-    # and we concatenate these hashes
+    # we concatenate all the bytes from all the chunks at the offset we choose
+    bytes = ""
     for offset in offsets
-      hashes += Digest::MD5.new.hexdigest(File.read(filename, length_of_chunks, offset))
+      bytes = "#{bytes}#{File.read(filename, length_of_chunks, offset)}"
     end
-    # we compute the final hash, which is the hash of the concatenation
-    # of the previous hashes
-    final_hash = Digest::MD5.new.hexdigest hashes
+    # we compute the final hash, which is the hash of the concatenation of all chunks
+    hash = Digest::MD5.new.hexdigest bytes
     puts (Time.now - time_start).to_s + " seconds" if debug
-    return final_hash
+    return hash
   end
 end

data/test/test_mammoth-hasher.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require 'test/unit'
+require 'digest'
+require 'mammoth-hasher'
+class MammothHasherTest < Test::Unit::TestCase
+  # for small files, instead of using our custom algorithm,
+  # it's simpler to use the md5 hash directly,
+  # so here we test that MammothHasher hash is the same than md5 hash
+  def test_small_file_hash
+    filename = "test/fixtures/small.txt"
+    file = File.open(filename, 'r')
+    assert_equal MammothHasher.hash(filename), Digest::MD5.file(file).hexdigest
+  end
+  def test_hash_size
+    filename = "test/fixtures/large.txt"
+    assert_equal MammothHasher.hash(filename).length, 32
+  end
+  def test_hash_result
+    filename = "test/fixtures/large.txt"
+    assert_equal MammothHasher.hash(filename), "d5d198a347f02adafa6e1749ad594340"
+  end
+end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: mammoth-hasher
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Vincent Marquet
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-02-21 00:00:00.000000000 Z
+date: 2015-06-07 00:00:00.000000000 Z
 dependencies: []
 description: A library to compute fingerprints for big files, when runnning usual
   algorithms as MD5 is too long.
@@ -17,10 +17,12 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- Rakefile
 - lib/mammoth-hasher.rb
+- test/test_mammoth-hasher.rb
 homepage: http://github.com/vmarquet/ruby-mammoth-hasher
 licenses:
-- WTFPL
+- MIT
 metadata: {}
 post_install_message:
 rdoc_options: []
@@ -38,8 +40,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5
+rubygems_version: 2.4.6
 signing_key:
 specification_version: 4
 summary: A library to compute fingerprints for big files.
-test_files: []
+test_files:
+- test/test_mammoth-hasher.rb