mammoth-hasher 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +8 -0
- data/lib/mammoth-hasher.rb +16 -19
- data/test/test_mammoth-hasher.rb +24 -0
- metadata +8 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8a53d70781a6348d58af444f42747ffcb700b64
|
4
|
+
data.tar.gz: caa0fff7b6d9053006daf8b1bcb2cd4e4f0e5c5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4dcad29b2156bbe8343a2f5fd93c637b1ec447ffdb2f062b5fc6eb5dab3cc9857ab7c325aa29a3fe862c2ee2622f8f211e04d24f8e3da2204747bba2f2531947
|
7
|
+
data.tar.gz: 7b224f29d208466247a2a89b7e0e60190ab42a4b088bd01a72bbc982cdefbd6389f0369e3304c70838a6f5f6b591667e189f03cf4b73a3d6a45c91befb489a1d
|
data/Rakefile
ADDED
data/lib/mammoth-hasher.rb
CHANGED
@@ -13,7 +13,7 @@ class MammothHasher
|
|
13
13
|
# algorithm parameters
|
14
14
|
# WARNING: if you change them, the resulting hash will be different !
|
15
15
|
number_of_chunks = 100
|
16
|
-
length_of_chunks =
|
16
|
+
length_of_chunks = 4
|
17
17
|
|
18
18
|
# we get the file size (in bytes), used as PRNG (Pseudo Random Number Generator)
|
19
19
|
filesize = File.size filename
|
@@ -22,18 +22,22 @@ class MammothHasher
|
|
22
22
|
# the MD5 of the whole file than to apply our custom algorithm
|
23
23
|
if filesize <= number_of_chunks*length_of_chunks
|
24
24
|
file = File.open(filename, 'r')
|
25
|
-
|
25
|
+
hash = Digest::MD5.file(file).hexdigest
|
26
26
|
file.close
|
27
27
|
puts (Time.now - time_start).to_s + " seconds" if debug
|
28
|
-
return
|
28
|
+
return hash
|
29
29
|
end
|
30
30
|
|
31
31
|
# we initialize the PRNG
|
32
32
|
prng = Random.new filesize
|
33
33
|
|
34
|
-
# we get
|
35
|
-
|
36
|
-
|
34
|
+
# we always get a chunk at the offset 0 (beginning of file)
|
35
|
+
# because that's where the magic number indicating the file type is
|
36
|
+
# so making sure that it's still the same may prevent from some attacks
|
37
|
+
offsets = [0]
|
38
|
+
|
39
|
+
# we get 99 other offsets between 0 and (filesize - length_of_chunk)
|
40
|
+
for i in 1..(number_of_chunks-1)
|
37
41
|
offsets << prng.rand(filesize - length_of_chunks)
|
38
42
|
end
|
39
43
|
|
@@ -41,24 +45,17 @@ class MammothHasher
|
|
41
45
|
# (in order to optimize the way the file will be read (in only one direction))
|
42
46
|
offsets.sort
|
43
47
|
|
44
|
-
# we
|
45
|
-
|
46
|
-
# first, we compute the hash of the first bytes of the file,
|
47
|
-
# because that's where the magic number indicating the file type is
|
48
|
-
# so making sure that it's still the same may be safer
|
49
|
-
hashes << Digest::MD5.new.hexdigest(File.read(filename, 100))
|
50
|
-
# for each offset, we compute the hash of the following bytes
|
51
|
-
# and we concatenate these hashes
|
48
|
+
# we concatenate all the bytes from all the chunks at the offset we choose
|
49
|
+
bytes = ""
|
52
50
|
for offset in offsets
|
53
|
-
|
51
|
+
bytes = "#{bytes}#{File.read(filename, length_of_chunks, offset)}"
|
54
52
|
end
|
55
53
|
|
56
|
-
# we compute the final hash, which is the hash of the concatenation
|
57
|
-
|
58
|
-
final_hash = Digest::MD5.new.hexdigest hashes
|
54
|
+
# we compute the final hash, which is the hash of the concatenation of all chunks
|
55
|
+
hash = Digest::MD5.new.hexdigest bytes
|
59
56
|
|
60
57
|
puts (Time.now - time_start).to_s + " seconds" if debug
|
61
58
|
|
62
|
-
return
|
59
|
+
return hash
|
63
60
|
end
|
64
61
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'digest'
|
3
|
+
require 'mammoth-hasher'
|
4
|
+
|
5
|
+
class MammothHasherTest < Test::Unit::TestCase
|
6
|
+
# for small files, instead of using our custom algorithm,
|
7
|
+
# it's simpler to use the md5 hash directly,
|
8
|
+
# so here we test that MammothHasher hash is the same than md5 hash
|
9
|
+
def test_small_file_hash
|
10
|
+
filename = "test/fixtures/small.txt"
|
11
|
+
file = File.open(filename, 'r')
|
12
|
+
assert_equal MammothHasher.hash(filename), Digest::MD5.file(file).hexdigest
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_hash_size
|
16
|
+
filename = "test/fixtures/large.txt"
|
17
|
+
assert_equal MammothHasher.hash(filename).length, 32
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_hash_result
|
21
|
+
filename = "test/fixtures/large.txt"
|
22
|
+
assert_equal MammothHasher.hash(filename), "d5d198a347f02adafa6e1749ad594340"
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mammoth-hasher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vincent Marquet
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library to compute fingerprints for big files, when runnning usual
|
14
14
|
algorithms as MD5 is too long.
|
@@ -17,10 +17,12 @@ executables: []
|
|
17
17
|
extensions: []
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
|
+
- Rakefile
|
20
21
|
- lib/mammoth-hasher.rb
|
22
|
+
- test/test_mammoth-hasher.rb
|
21
23
|
homepage: http://github.com/vmarquet/ruby-mammoth-hasher
|
22
24
|
licenses:
|
23
|
-
-
|
25
|
+
- MIT
|
24
26
|
metadata: {}
|
25
27
|
post_install_message:
|
26
28
|
rdoc_options: []
|
@@ -38,8 +40,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
38
40
|
version: '0'
|
39
41
|
requirements: []
|
40
42
|
rubyforge_project:
|
41
|
-
rubygems_version: 2.4.
|
43
|
+
rubygems_version: 2.4.6
|
42
44
|
signing_key:
|
43
45
|
specification_version: 4
|
44
46
|
summary: A library to compute fingerprints for big files.
|
45
|
-
test_files:
|
47
|
+
test_files:
|
48
|
+
- test/test_mammoth-hasher.rb
|