mammoth-hasher 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/mammoth-hasher.rb +64 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5c93a9f01f7da01607911a7f7c5df3acc1ff0957
|
4
|
+
data.tar.gz: 1704ea09e5aeb7422ed007826d66363c2ec6737d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b6040ec90679e24ecd5e1bbdf08a6ac719ac1a0621ec4fa8767a092b1864b448aa47a0bc52c1940fa5060f825392bc060030d25b0ff14b17c3e0ea0827729cc2
|
7
|
+
data.tar.gz: 9c5802cf32e2743a6f99448f62e108880c3ad1963b6052de8442b52aee504c7e02237d088879beb2819854caa0c4d0ccc305f84d5dd0e936fb4f54cd1f35614e
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'digest' # needed for the md5 hash algorithm
|
2
|
+
|
3
|
+
class MammothHasher
|
4
|
+
def self.hash filename, debug=false
|
5
|
+
time_start = Time.now if debug
|
6
|
+
|
7
|
+
# we check that the file exist
|
8
|
+
raise ArgumentError, "give the filename as a parameter (got nil)" if filename == nil
|
9
|
+
raise ArgumentError, "filename must be a string" if ! filename.is_a? String
|
10
|
+
filename = File.expand_path filename
|
11
|
+
raise ArgumentError, "#{filename} does not exist" if ! File.exist? filename
|
12
|
+
|
13
|
+
# algorithm parameters
|
14
|
+
# WARNING: if you change them, the resulting hash will be different !
|
15
|
+
number_of_chunks = 100
|
16
|
+
length_of_chunks = 100
|
17
|
+
|
18
|
+
# we get the file size (in bytes), used as PRNG (Pseudo Random Number Generator)
|
19
|
+
filesize = File.size filename
|
20
|
+
|
21
|
+
# if the file is not a big file, it's quicker to compute
|
22
|
+
# the MD5 of the whole file than to apply our custom algorithm
|
23
|
+
if filesize <= number_of_chunks*length_of_chunks
|
24
|
+
file = File.open(filename, 'r')
|
25
|
+
final_hash = Digest::MD5.file(file).hexdigest
|
26
|
+
file.close
|
27
|
+
puts (Time.now - time_start).to_s + " seconds" if debug
|
28
|
+
return final_hash
|
29
|
+
end
|
30
|
+
|
31
|
+
# we initialize the PRNG
|
32
|
+
prng = Random.new filesize
|
33
|
+
|
34
|
+
# we get 1000 numbers between 0 and filesize-size_of_chunk
|
35
|
+
offsets = []
|
36
|
+
for i in 0..number_of_chunks
|
37
|
+
offsets << prng.rand(filesize - length_of_chunks)
|
38
|
+
end
|
39
|
+
|
40
|
+
# we sort the offsets in ascending order
|
41
|
+
# (in order to optimize the way the file will be read (in only one direction))
|
42
|
+
offsets.sort
|
43
|
+
|
44
|
+
# we compute the hashes of several parts of the file
|
45
|
+
hashes = ""
|
46
|
+
# first, we compute the hash of the first bytes of the file,
|
47
|
+
# because that's where the magic number indicating the file type is
|
48
|
+
# so making sure that it's still the same may be safer
|
49
|
+
hashes << Digest::MD5.new.hexdigest(File.read(filename, 100))
|
50
|
+
# for each offset, we compute the hash of the following bytes
|
51
|
+
# and we concatenate these hashes
|
52
|
+
for offset in offsets
|
53
|
+
hashes += Digest::MD5.new.hexdigest(File.read(filename, length_of_chunks, offset))
|
54
|
+
end
|
55
|
+
|
56
|
+
# we compute the final hash, which is the hash of the concatenation
|
57
|
+
# of the previous hashes
|
58
|
+
final_hash = Digest::MD5.new.hexdigest hashes
|
59
|
+
|
60
|
+
puts (Time.now - time_start).to_s + " seconds" if debug
|
61
|
+
|
62
|
+
return final_hash
|
63
|
+
end
|
64
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mammoth-hasher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Vincent Marquet
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-21 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A library to compute fingerprints for big files, when runnning usual
|
14
|
+
algorithms as MD5 is too long.
|
15
|
+
email:
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/mammoth-hasher.rb
|
21
|
+
homepage: http://github.com/vmarquet/ruby-mammoth-hasher
|
22
|
+
licenses:
|
23
|
+
- WTFPL
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.4.5
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: A library to compute fingerprints for big files.
|
45
|
+
test_files: []
|