mammoth-hasher 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c93a9f01f7da01607911a7f7c5df3acc1ff0957
4
- data.tar.gz: 1704ea09e5aeb7422ed007826d66363c2ec6737d
3
+ metadata.gz: e8a53d70781a6348d58af444f42747ffcb700b64
4
+ data.tar.gz: caa0fff7b6d9053006daf8b1bcb2cd4e4f0e5c5f
5
5
  SHA512:
6
- metadata.gz: b6040ec90679e24ecd5e1bbdf08a6ac719ac1a0621ec4fa8767a092b1864b448aa47a0bc52c1940fa5060f825392bc060030d25b0ff14b17c3e0ea0827729cc2
7
- data.tar.gz: 9c5802cf32e2743a6f99448f62e108880c3ad1963b6052de8442b52aee504c7e02237d088879beb2819854caa0c4d0ccc305f84d5dd0e936fb4f54cd1f35614e
6
+ metadata.gz: 4dcad29b2156bbe8343a2f5fd93c637b1ec447ffdb2f062b5fc6eb5dab3cc9857ab7c325aa29a3fe862c2ee2622f8f211e04d24f8e3da2204747bba2f2531947
7
+ data.tar.gz: 7b224f29d208466247a2a89b7e0e60190ab42a4b088bd01a72bbc982cdefbd6389f0369e3304c70838a6f5f6b591667e189f03cf4b73a3d6a45c91befb489a1d
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
@@ -13,7 +13,7 @@ class MammothHasher
13
13
  # algorithm parameters
14
14
  # WARNING: if you change them, the resulting hash will be different !
15
15
  number_of_chunks = 100
16
- length_of_chunks = 100
16
+ length_of_chunks = 4
17
17
 
18
18
  # we get the file size (in bytes), used as PRNG (Pseudo Random Number Generator)
19
19
  filesize = File.size filename
@@ -22,18 +22,22 @@ class MammothHasher
22
22
  # the MD5 of the whole file than to apply our custom algorithm
23
23
  if filesize <= number_of_chunks*length_of_chunks
24
24
  file = File.open(filename, 'r')
25
- final_hash = Digest::MD5.file(file).hexdigest
25
+ hash = Digest::MD5.file(file).hexdigest
26
26
  file.close
27
27
  puts (Time.now - time_start).to_s + " seconds" if debug
28
- return final_hash
28
+ return hash
29
29
  end
30
30
 
31
31
  # we initialize the PRNG
32
32
  prng = Random.new filesize
33
33
 
34
- # we get 1000 numbers between 0 and filesize-size_of_chunk
35
- offsets = []
36
- for i in 0..number_of_chunks
34
+ # we always get a chunk at the offset 0 (beginning of file)
35
+ # because that's where the magic number indicating the file type is
36
+ # so making sure that it's still the same may prevent from some attacks
37
+ offsets = [0]
38
+
39
+ # we get 99 other offsets between 0 and (filesize - length_of_chunk)
40
+ for i in 1..(number_of_chunks-1)
37
41
  offsets << prng.rand(filesize - length_of_chunks)
38
42
  end
39
43
 
@@ -41,24 +45,17 @@ class MammothHasher
41
45
  # (in order to optimize the way the file will be read (in only one direction))
42
46
  offsets.sort
43
47
 
44
- # we compute the hashes of several parts of the file
45
- hashes = ""
46
- # first, we compute the hash of the first bytes of the file,
47
- # because that's where the magic number indicating the file type is
48
- # so making sure that it's still the same may be safer
49
- hashes << Digest::MD5.new.hexdigest(File.read(filename, 100))
50
- # for each offset, we compute the hash of the following bytes
51
- # and we concatenate these hashes
48
+ # we concatenate all the bytes from all the chunks at the offset we choose
49
+ bytes = ""
52
50
  for offset in offsets
53
- hashes += Digest::MD5.new.hexdigest(File.read(filename, length_of_chunks, offset))
51
+ bytes = "#{bytes}#{File.read(filename, length_of_chunks, offset)}"
54
52
  end
55
53
 
56
- # we compute the final hash, which is the hash of the concatenation
57
- # of the previous hashes
58
- final_hash = Digest::MD5.new.hexdigest hashes
54
+ # we compute the final hash, which is the hash of the concatenation of all chunks
55
+ hash = Digest::MD5.new.hexdigest bytes
59
56
 
60
57
  puts (Time.now - time_start).to_s + " seconds" if debug
61
58
 
62
- return final_hash
59
+ return hash
63
60
  end
64
61
  end
@@ -0,0 +1,24 @@
1
+ require 'test/unit'
2
+ require 'digest'
3
+ require 'mammoth-hasher'
4
+
5
+ class MammothHasherTest < Test::Unit::TestCase
6
+ # for small files, instead of using our custom algorithm,
7
+ # it's simpler to use the md5 hash directly,
8
+ # so here we test that MammothHasher hash is the same than md5 hash
9
+ def test_small_file_hash
10
+ filename = "test/fixtures/small.txt"
11
+ file = File.open(filename, 'r')
12
+ assert_equal MammothHasher.hash(filename), Digest::MD5.file(file).hexdigest
13
+ end
14
+
15
+ def test_hash_size
16
+ filename = "test/fixtures/large.txt"
17
+ assert_equal MammothHasher.hash(filename).length, 32
18
+ end
19
+
20
+ def test_hash_result
21
+ filename = "test/fixtures/large.txt"
22
+ assert_equal MammothHasher.hash(filename), "d5d198a347f02adafa6e1749ad594340"
23
+ end
24
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mammoth-hasher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Marquet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-21 00:00:00.000000000 Z
11
+ date: 2015-06-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library to compute fingerprints for big files, when runnning usual
14
14
  algorithms as MD5 is too long.
@@ -17,10 +17,12 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - Rakefile
20
21
  - lib/mammoth-hasher.rb
22
+ - test/test_mammoth-hasher.rb
21
23
  homepage: http://github.com/vmarquet/ruby-mammoth-hasher
22
24
  licenses:
23
- - WTFPL
25
+ - MIT
24
26
  metadata: {}
25
27
  post_install_message:
26
28
  rdoc_options: []
@@ -38,8 +40,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
40
  version: '0'
39
41
  requirements: []
40
42
  rubyforge_project:
41
- rubygems_version: 2.4.5
43
+ rubygems_version: 2.4.6
42
44
  signing_key:
43
45
  specification_version: 4
44
46
  summary: A library to compute fingerprints for big files.
45
- test_files: []
47
+ test_files:
48
+ - test/test_mammoth-hasher.rb