mammoth-hasher 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c93a9f01f7da01607911a7f7c5df3acc1ff0957
4
- data.tar.gz: 1704ea09e5aeb7422ed007826d66363c2ec6737d
3
+ metadata.gz: e8a53d70781a6348d58af444f42747ffcb700b64
4
+ data.tar.gz: caa0fff7b6d9053006daf8b1bcb2cd4e4f0e5c5f
5
5
  SHA512:
6
- metadata.gz: b6040ec90679e24ecd5e1bbdf08a6ac719ac1a0621ec4fa8767a092b1864b448aa47a0bc52c1940fa5060f825392bc060030d25b0ff14b17c3e0ea0827729cc2
7
- data.tar.gz: 9c5802cf32e2743a6f99448f62e108880c3ad1963b6052de8442b52aee504c7e02237d088879beb2819854caa0c4d0ccc305f84d5dd0e936fb4f54cd1f35614e
6
+ metadata.gz: 4dcad29b2156bbe8343a2f5fd93c637b1ec447ffdb2f062b5fc6eb5dab3cc9857ab7c325aa29a3fe862c2ee2622f8f211e04d24f8e3da2204747bba2f2531947
7
+ data.tar.gz: 7b224f29d208466247a2a89b7e0e60190ab42a4b088bd01a72bbc982cdefbd6389f0369e3304c70838a6f5f6b591667e189f03cf4b73a3d6a45c91befb489a1d
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
@@ -13,7 +13,7 @@ class MammothHasher
13
13
  # algorithm parameters
14
14
  # WARNING: if you change them, the resulting hash will be different !
15
15
  number_of_chunks = 100
16
- length_of_chunks = 100
16
+ length_of_chunks = 4
17
17
 
18
18
  # we get the file size (in bytes), used as PRNG (Pseudo Random Number Generator)
19
19
  filesize = File.size filename
@@ -22,18 +22,22 @@ class MammothHasher
22
22
  # the MD5 of the whole file than to apply our custom algorithm
23
23
  if filesize <= number_of_chunks*length_of_chunks
24
24
  file = File.open(filename, 'r')
25
- final_hash = Digest::MD5.file(file).hexdigest
25
+ hash = Digest::MD5.file(file).hexdigest
26
26
  file.close
27
27
  puts (Time.now - time_start).to_s + " seconds" if debug
28
- return final_hash
28
+ return hash
29
29
  end
30
30
 
31
31
  # we initialize the PRNG
32
32
  prng = Random.new filesize
33
33
 
34
- # we get 1000 numbers between 0 and filesize-size_of_chunk
35
- offsets = []
36
- for i in 0..number_of_chunks
34
+ # we always get a chunk at the offset 0 (beginning of file)
35
+ # because that's where the magic number indicating the file type is
36
+ # so making sure that it's still the same may prevent from some attacks
37
+ offsets = [0]
38
+
39
+ # we get 99 other offsets between 0 and (filesize - length_of_chunk)
40
+ for i in 1..(number_of_chunks-1)
37
41
  offsets << prng.rand(filesize - length_of_chunks)
38
42
  end
39
43
 
@@ -41,24 +45,17 @@ class MammothHasher
41
45
  # (in order to optimize the way the file will be read (in only one direction))
42
46
  offsets.sort
43
47
 
44
- # we compute the hashes of several parts of the file
45
- hashes = ""
46
- # first, we compute the hash of the first bytes of the file,
47
- # because that's where the magic number indicating the file type is
48
- # so making sure that it's still the same may be safer
49
- hashes << Digest::MD5.new.hexdigest(File.read(filename, 100))
50
- # for each offset, we compute the hash of the following bytes
51
- # and we concatenate these hashes
48
+ # we concatenate all the bytes from all the chunks at the offset we choose
49
+ bytes = ""
52
50
  for offset in offsets
53
- hashes += Digest::MD5.new.hexdigest(File.read(filename, length_of_chunks, offset))
51
+ bytes = "#{bytes}#{File.read(filename, length_of_chunks, offset)}"
54
52
  end
55
53
 
56
- # we compute the final hash, which is the hash of the concatenation
57
- # of the previous hashes
58
- final_hash = Digest::MD5.new.hexdigest hashes
54
+ # we compute the final hash, which is the hash of the concatenation of all chunks
55
+ hash = Digest::MD5.new.hexdigest bytes
59
56
 
60
57
  puts (Time.now - time_start).to_s + " seconds" if debug
61
58
 
62
- return final_hash
59
+ return hash
63
60
  end
64
61
  end
@@ -0,0 +1,24 @@
1
+ require 'test/unit'
2
+ require 'digest'
3
+ require 'mammoth-hasher'
4
+
5
+ class MammothHasherTest < Test::Unit::TestCase
6
+ # for small files, instead of using our custom algorithm,
7
+ # it's simpler to use the md5 hash directly,
8
+ # so here we test that MammothHasher hash is the same than md5 hash
9
+ def test_small_file_hash
10
+ filename = "test/fixtures/small.txt"
11
+ file = File.open(filename, 'r')
12
+ assert_equal MammothHasher.hash(filename), Digest::MD5.file(file).hexdigest
13
+ end
14
+
15
+ def test_hash_size
16
+ filename = "test/fixtures/large.txt"
17
+ assert_equal MammothHasher.hash(filename).length, 32
18
+ end
19
+
20
+ def test_hash_result
21
+ filename = "test/fixtures/large.txt"
22
+ assert_equal MammothHasher.hash(filename), "d5d198a347f02adafa6e1749ad594340"
23
+ end
24
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mammoth-hasher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Marquet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-21 00:00:00.000000000 Z
11
+ date: 2015-06-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library to compute fingerprints for big files, when runnning usual
14
14
  algorithms as MD5 is too long.
@@ -17,10 +17,12 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - Rakefile
20
21
  - lib/mammoth-hasher.rb
22
+ - test/test_mammoth-hasher.rb
21
23
  homepage: http://github.com/vmarquet/ruby-mammoth-hasher
22
24
  licenses:
23
- - WTFPL
25
+ - MIT
24
26
  metadata: {}
25
27
  post_install_message:
26
28
  rdoc_options: []
@@ -38,8 +40,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
40
  version: '0'
39
41
  requirements: []
40
42
  rubyforge_project:
41
- rubygems_version: 2.4.5
43
+ rubygems_version: 2.4.6
42
44
  signing_key:
43
45
  specification_version: 4
44
46
  summary: A library to compute fingerprints for big files.
45
- test_files: []
47
+ test_files:
48
+ - test/test_mammoth-hasher.rb