sampling-hash 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MDAwYWQ2MTFmMWQ4ZTdlNDYxY2RhZDhjZGQ4M2MyODBjMTViMmJiOQ==
5
+ data.tar.gz: !binary |-
6
+ MTk2ZGJjOGYxYjIyM2QwZmI4ZGE1NWNiNjE3M2ZlMjcxYTU0YjVhNQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MjRjZDZjMGVjZTE1ZDczMDA1Y2NkYmJiMmY4NjkyODk0Y2I3MjdhMGU0ODFi
10
+ ODRkMmYzMTBiNDk5ZjVlMDVmZmIwY2M2MmY2Y2IzMWZmYzBiNjY5Mjg1NzBi
11
+ ZDlmZTJmMjg3YzllOGI1NTk0M2I5OTVkNGRkMTdmYzU5MTRlOTQ=
12
+ data.tar.gz: !binary |-
13
+ MjNmNThiZTRhNGQxNDMxOGJjOGMxZGJhYzBhNTE1NmY1MjkwNDg2NGIyYzk4
14
+ ZTg5YTAzNGJlYjVkY2Y1ZDE3YzdkMzZhNmM2OTBkZDMzZWJlYmE1NjYzNjI2
15
+ NjdlNDNjNWZkYWFlZTM5MjU2YjNmNzMzNzY1NjRhYTIxZDU1MTk=
@@ -0,0 +1,42 @@
1
+ module SamplingHash
2
+ class SamplingIO
3
+ def initialize(io)
4
+ raise ArgumentError, 'first parameter should be IO' unless io.kind_of?(IO)
5
+
6
+ @io = io
7
+ @chunk = 0
8
+ end
9
+
10
+ def sample
11
+ return nil if @chunk > samples
12
+
13
+ @io.seek(offset, IO::SEEK_SET)
14
+ @chunk += 1
15
+ @io.read(CHUNK_SIZE)
16
+ end
17
+
18
+ private
19
+
20
+ CHUNK_SIZE = 256
21
+
22
+ def file_size
23
+ @file_size ||= @io.stat.size
24
+ end
25
+
26
+ def reduce
27
+ (Math.log(file_size / 1000) * 1000).truncate & 0xFFFFFF00
28
+ end
29
+
30
+ def samples_size
31
+ @samples_size ||= (file_size < 3000 ? file_size : reduce)
32
+ end
33
+
34
+ def samples
35
+ samples_size / CHUNK_SIZE + 1
36
+ end
37
+
38
+ def offset
39
+ @chunk * CHUNK_SIZE
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,3 @@
1
+ module SamplingHash
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,18 @@
1
+ require 'sampling-hash/sampling-io'
2
+ require 'sampling-hash/version'
3
+ require 'xxhash'
4
+
5
+ module SamplingHash
6
+ def self.hash(path, seed)
7
+ raise ArgumentError, 'file not found' unless File.file?(path)
8
+
9
+ hash = XXhash::Internal::StreamingHash.new(seed)
10
+ sio = SamplingIO.new(File.open(path, 'r'))
11
+
12
+ while chunk = sio.sample
13
+ hash.update(chunk)
14
+ end
15
+
16
+ hash.digest
17
+ end
18
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sampling-hash
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Malte Rohde
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: xxhash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Calculates deterministic hashes from file samples
56
+ email:
57
+ - malte.rohde@flavoursys.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/sampling-hash.rb
63
+ - lib/sampling-hash/version.rb
64
+ - lib/sampling-hash/sampling-io.rb
65
+ homepage: http://github.com/flavoursys/sampling-hash
66
+ licenses: []
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.0.3
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Sampling hash algorithm for large files
88
+ test_files: []