pwnedkeys-filter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,110 @@
1
+ This is a gem for creating and querying [pwnedkeys.com](https://pwnedkeys.com)
2
+ [bloom filters](https://pwnedkeys.com/filter.html).
3
+
4
+
5
+ # Installation
6
+
7
+ It's a gem:
8
+
9
+ gem install pwnedkeys-filter
10
+
11
+ There's also the wonders of [the Gemfile](http://bundler.io):
12
+
13
+ gem 'pwnedkeys-filter'
14
+
15
+ If you're the sturdy type that likes to run from git:
16
+
17
+ rake install
18
+
19
+ Or, if you've eschewed the convenience of Rubygems entirely, then you
20
+ presumably know what to do already.
21
+
22
+
23
+ # Usage
24
+
25
+ Before you do anything, load the code:
26
+
27
+ require "pwnedkeys/filter"
28
+
29
+ The following illustrative examples do not show the full capabilities of the
30
+ gem. The [API documentation](https://rubydoc.info/gems/pwnedkeys-filter) gives
31
+ all the possibilities.
32
+
33
+
34
+ ## Querying the filter
35
+
36
+ Open the filter file, then call `#probably_includes?` with a key:
37
+
38
+ filter = Pwnedkeys::Filter.open("/path/to/the/filter/data/file")
39
+ key = OpenSSL::PKey::RSA.new(2048)
40
+
41
+ # if this returns `true`, it's not your lucky day
42
+ filter.probably_includes?(key)
43
+
44
+ # From https://pwnedkeys.com/examples/rsa2048_key.pem
45
+ key = OpenSSL::PKey.read(File.read("pwnedkeys_demo_rsa2048.pem"))
46
+
47
+ # This should definitely return `true`
48
+ filter.probably_includes?(key)
49
+
50
+ Essentially, when passed anything that looks suspiciously like a key, the
51
+ `#probably_includes?` method will return `true` if the key is *probably* in the
52
+ dataset, and will return `false` if the key is *definitely not* in the dataset.
53
+ For more on why the answers are "definitely not" and "probably", it's best to
54
+ read [the wikipedia page on bloom
55
+ filters](https://en.wikipedia.org/wiki/Bloom_filter).
56
+
57
+
58
+ ## Creating a new filter file
59
+
60
+ To create a new filter data file:
61
+
62
+ Pwnedkeys::Filter.create("/some/file/name", hash_count: 4, hash_length: 12)
63
+
64
+ This does not return an open filter file, it merely creates a new file on
65
+ disk, initialized appropriately.
66
+
67
+
68
+ ## Adding new entries to a filter file
69
+
70
+ To add entries to an open filter:
71
+
72
+ filter.add(key)
73
+
74
+ When you're done adding entries:
75
+
76
+ filter.close
77
+
78
+ The API takes care of updating the filter metadata automatically.
79
+
80
+ If you don't want to have to manually close the filter (and who does, really?),
81
+ then the block form of `#open` is for you:
82
+
83
+ Pwnedkeys::Filter.open("/filter/file") do |filter|
84
+ filter.add(key)
85
+ end
86
+
87
+
88
+ # Contributing
89
+
90
+ See [`CONTRIBUTING.md`](CONTRIBUTING.md).
91
+
92
+
93
+ # Licence
94
+
95
+ Unless otherwise stated, everything in this repo is covered by the following
96
+ copyright notice:
97
+
98
+ Copyright (C) 2019 Matt Palmer <matt@hezmatt.org>
99
+
100
+ This program is free software: you can redistribute it and/or modify it
101
+ under the terms of the GNU General Public License version 3, as
102
+ published by the Free Software Foundation.
103
+
104
+ This program is distributed in the hope that it will be useful,
105
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
106
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
107
+ GNU General Public License for more details.
108
+
109
+ You should have received a copy of the GNU General Public License
110
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -0,0 +1,309 @@
1
+ require "xxhash"
2
+ require "openssl/x509/spki"
3
+
4
+ module Pwnedkeys
5
+ class Filter
6
+
7
+ # Base class for all Pwnedkeys::Filter exceptions
8
+ class Error < StandardError; end
9
+
10
+ # Raised when a data file appears to not be a valid data file.
11
+ class InvalidFileError < Error; end
12
+
13
+ # Raised when the key to be searched for isn't a key
14
+ class InvalidKeyError < Error; end
15
+
16
+ # Attempt was made to query or modify a closed filter
17
+ class FilterClosedError < Error; end
18
+
19
+ class Header
20
+ attr_reader :signature, :revision, :update_time, :entry_count, :hash_count, :hash_length
21
+
22
+ def self.from_fd(fd)
23
+ fd.seek(0)
24
+ case fd.read(6)
25
+ when "pkbfv1"
26
+ V1Header.from_fd(fd)
27
+ else
28
+ raise InvalidFileError,
29
+ "No recognised file signature found"
30
+ end
31
+ end
32
+
33
+ def initialize(**params)
34
+ @revision = 0
35
+ @update_time = Time.at(0)
36
+ @entry_count = 0
37
+
38
+ params.each do |k, v|
39
+ instance_variable_set(:"@#{k}", v)
40
+ end
41
+ end
42
+
43
+ def header_size
44
+ to_s.length
45
+ end
46
+
47
+ def update!
48
+ @revision += 1
49
+ end
50
+
51
+ def entry_added!
52
+ @update_time = Time.now
53
+ @entry_count += 1
54
+ end
55
+ end
56
+ private_constant :Header
57
+
58
+ class V1Header < Header
59
+ def self.from_fd(fd)
60
+ fd.seek(0)
61
+ signature, revision, update, entry_count, hash_count, hash_length = fd.read(24).unpack("a6L>Q>L>CC")
62
+ update_time = Time.at(update)
63
+
64
+ self.new(
65
+ signature: signature,
66
+ revision: revision,
67
+ update_time: update_time,
68
+ entry_count: entry_count,
69
+ hash_count: hash_count,
70
+ hash_length: hash_length
71
+ )
72
+ end
73
+
74
+ def initialize(**params)
75
+ @signature = "pkbfv1"
76
+
77
+ super
78
+ end
79
+
80
+ def to_s
81
+ [@signature, @revision, @update_time.to_i, @entry_count, @hash_count, @hash_length].pack("a*L>Q>L>CC")
82
+ end
83
+ end
84
+ private_constant :V1Header
85
+
86
+ # Create a new filter data file.
87
+ #
88
+ # Initialize a file, which cannot already exist, to be a pwnedkeys bloom filter
89
+ # v1 file. The file is not opened for use, it is simply created on the filesystem
90
+ # at the location specified.
91
+ #
92
+ # @param filename [String] the file to be created. It can be an absolute or relative
93
+ # path, or anything else that `File.open` will accept.
94
+ #
95
+ # @param hash_count [Integer] how many filter bits each element in the bloom filter will
96
+ # set.
97
+ #
98
+ # @param hash_length [Integer] the number of bits that will be used for each hash value.
99
+ #
100
+ # @raise [SystemCallError] if any sort of filesystem-related problem occurs, an
101
+ # `Errno`-related exception will be raised. Likely candidates include `EEXIST`
102
+ # (the file you specified already exists), `ENOENT` (the directory you specified
103
+ # doesn't exist), and `EPERM` (you don't have permissions to create a file where
104
+ # you want it).
105
+ #
106
+ # @return [void]
107
+ #
108
+ def self.create(filename, hash_count:, hash_length:)
109
+ File.open(filename, File::WRONLY | File::CREAT | File::EXCL) do |fd|
110
+ header = V1Header.new(hash_count: hash_count, hash_length: hash_length)
111
+
112
+ fd.write(header.to_s)
113
+ fd.seek((2 ** hash_length) / 8 - 1, :CUR)
114
+ fd.write("\0")
115
+ end
116
+
117
+ nil
118
+ end
119
+
120
+ # Open an existing pwnedkeys bloom filter data file.
121
+ #
122
+ # @param filename [String] the file to open.
123
+ #
124
+ # @raise [SystemCallError] if anything low-level goes wrong, you will get some
125
+ # sort of `Errno`-related exception raised, such as `ENOENT` (the file you
126
+ # specified does not exist) or `EPERM` (you don't have access to the file
127
+ # specified).
128
+ #
129
+ # @raise [Pwnedkeys::Filter::InvalidFileError] if the specified file exists,
130
+ # but is not recognised as a valid pwnedkeys filter file.
131
+ #
132
+ # @return [Pwnedkeys::Filter]
133
+ #
134
+ def self.open(filename)
135
+ filter = Pwnedkeys::Filter.new(filename)
136
+
137
+ if block_given?
138
+ yield filter
139
+ else
140
+ filter
141
+ end
142
+ end
143
+
144
+ # Create a new Pwnedkeys::Filter.
145
+ #
146
+ # Equivalent to {.open}, without the possibility of block-style access.
147
+ #
148
+ # @see .open
149
+ #
150
+ def initialize(filename)
151
+ @fd = File.open(filename, File::RDWR, binmode: true)
152
+ @header = Header.from_fd(@fd)
153
+ end
154
+
155
+ # Query the bloom filter.
156
+ #
157
+ # @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
158
+ # to query the filter for.
159
+ #
160
+ # @return [Boolean] whether the queried key *probably* exists in the
161
+ # filter (`true`), or whether it *definitely doesn't* (`false`).
162
+ #
163
+ # @raise [Pwnedkeys::Filter::InvalidKeyError] if the object passed in
164
+ # isn't recognised as a key.
165
+ #
166
+ # @raise [Pwnedkeys::Filter::FilterClosedError] if you try to query
167
+ # a filter object which has had {#close} called on it.
168
+ #
169
+ def probably_includes?(key)
170
+ raise FilterClosedError if @fd.nil?
171
+
172
+ spki = spkify(key)
173
+ filter_bits(spki.to_der).all?
174
+ end
175
+
176
+ # Add a new key (or SPKI) to the filter.
177
+ #
178
+ # @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
179
+ # to add to the filter.
180
+ #
181
+ # @return [Boolean] whether the key was added as a new entry. Due to the
182
+ # probabilistic nature of the bloom filter structure, it is possible to
183
+ # add two completely different keys and yet it looks like the "same"
184
+ # key to the bloom filter. Adding two colliding keys isn't a fatal
185
+ # error, but it is a hint that perhaps the existing filter is getting
186
+ # a little too full.
187
+ #
188
+ # @raise [Pwnedkeys::Filter::FilterClosedError] if you try to add a key
189
+ # to a filter object which has had {#close} called on it.
190
+ #
191
+ def add(key)
192
+ raise FilterClosedError if @fd.nil?
193
+
194
+ return false if probably_includes?(key)
195
+
196
+ spki = spkify(key)
197
+
198
+ filter_positions(spki.to_der).each do |n|
199
+ @fd.seek(n / 8 + @header.header_size, :SET)
200
+ byte = @fd.read(1).ord
201
+ @fd.seek(-1, :CUR)
202
+
203
+ mask = 2 ** (7 - (n % 8))
204
+ new_byte = byte | mask
205
+
206
+ @fd.write(new_byte.chr)
207
+ end
208
+
209
+ @header.entry_added!
210
+
211
+ # Only update the revision if this is the first add in this filter,
212
+ # because otherwise the revision counter would just be the same as the
213
+ # entry counter, and that would be pointless.
214
+ unless @already_modified
215
+ @header.update!
216
+ end
217
+
218
+ @fd.seek(0)
219
+ @fd.write(@header.to_s)
220
+
221
+ @already_modified = true
222
+ end
223
+
224
+ # Signal that the filter should be closed for further querying and manipulation.
225
+ #
226
+ # @return [void]
227
+ #
228
+ # @raise [SystemCallError] if something filesystem-ish fails.
229
+ #
230
+ def close
231
+ @fd.close
232
+ @fd = nil
233
+ end
234
+
235
+ # An estimate of the false-positive rate inherent in the filter.
236
+ #
237
+ # Given the parameters of the filter, we can estimate roughly what the
238
+ # false-positive rate will be when querying this filter.
239
+ #
240
+ # @return [Float] the approximate probability of a query result being a
241
+ # false positive, expressed as a floating-point number between 0 and 1.
242
+ #
243
+ def false_positive_rate
244
+ # Taken wholesale from https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
245
+ (1 - (1 - 1.0 / filter_bit_count) ** (hash_count * entry_count)) ** hash_count
246
+ end
247
+
248
+
249
+ private
250
+
251
+ def hash_count
252
+ @header.hash_count
253
+ end
254
+
255
+ def hash_length
256
+ @header.hash_length
257
+ end
258
+
259
+ def entry_count
260
+ @header.entry_count
261
+ end
262
+
263
+ def filter_bit_count
264
+ @filter_size ||= (2 ** hash_length)
265
+ end
266
+
267
+ def spkify(key)
268
+ if key.is_a?(OpenSSL::X509::SPKI)
269
+ key
270
+ elsif key.is_a?(OpenSSL::PKey::PKey)
271
+ key.to_spki
272
+ elsif key.is_a?(String)
273
+ begin
274
+ OpenSSL::PKey.read(key).to_spki
275
+ rescue OpenSSL::ASN1::ASN1Error, OpenSSL::PKey::PKeyError
276
+ begin
277
+ OpenSSL::X509::SPKI.new(key)
278
+ rescue OpenSSL::ASN1::ASN1Error, OpenSSL::X509::SPKIError
279
+ raise InvalidKeyError,
280
+ "Could not parse provided key as a key or SPKI structure"
281
+ end
282
+ end
283
+ else
284
+ raise InvalidKeyError,
285
+ "Did not recognise the provided key"
286
+ end
287
+ end
288
+
289
+ def filter_bits(s)
290
+ filter_positions(s).map do |n|
291
+ @fd.seek(n / 8 + @header.header_size, :SET)
292
+ byte = @fd.read(1).ord
293
+ mask = 2 ** (7 - (n % 8))
294
+
295
+ (byte & mask) > 0
296
+ end
297
+ end
298
+
299
+ def filter_positions(s)
300
+ h1 = XXhash.xxh64(s, 0)
301
+ h2 = XXhash.xxh64(s, 1)
302
+ h2 += 1 if h2 % 2 == 0
303
+
304
+ (0..hash_count-1).map do |i|
305
+ (h1 + i * h2 + (i ** 3 - i) / 6) % filter_bit_count
306
+ end
307
+ end
308
+ end
309
+ end
@@ -0,0 +1,38 @@
1
+ begin
2
+ require 'git-version-bump'
3
+ rescue LoadError
4
+ nil
5
+ end
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "pwnedkeys-filter"
9
+
10
+ s.version = GVB.version rescue "0.0.0.1.NOGVB"
11
+ s.date = GVB.date rescue Time.now.strftime("%Y-%m-%d")
12
+
13
+ s.platform = Gem::Platform::RUBY
14
+
15
+ s.summary = "Library to query pwnedkeys.com bloom filters"
16
+
17
+ s.authors = ["Matt Palmer"]
18
+ s.email = ["matt@hezmatt.org"]
19
+ s.homepage = "https://github.com/pwnedkeys/pwnedkeys-filter"
20
+
21
+ s.files = `git ls-files -z`.split("\0").reject { |f| f =~ /^(\.|G|spec|Rakefile)/ }
22
+
23
+ s.required_ruby_version = ">= 2.5.0"
24
+
25
+ s.add_runtime_dependency "openssl-additions"
26
+ s.add_runtime_dependency "xxhash"
27
+
28
+ s.add_development_dependency 'bundler'
29
+ s.add_development_dependency 'github-release'
30
+ s.add_development_dependency 'git-version-bump'
31
+ s.add_development_dependency 'guard-rspec'
32
+ s.add_development_dependency 'rack-test'
33
+ s.add_development_dependency 'rake', "~> 12.0"
34
+ s.add_development_dependency 'redcarpet'
35
+ s.add_development_dependency 'rspec'
36
+ s.add_development_dependency 'simplecov'
37
+ s.add_development_dependency 'yard'
38
+ end