pwnedkeys-filter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,110 @@
1
+ This is a gem for creating and querying [pwnedkeys.com](https://pwnedkeys.com)
2
+ [bloom filters](https://pwnedkeys.com/filter.html).
3
+
4
+
5
+ # Installation
6
+
7
+ It's a gem:
8
+
9
+ gem install pwnedkeys-filter
10
+
11
+ There's also the wonders of [the Gemfile](http://bundler.io):
12
+
13
+ gem 'pwnedkeys-filter'
14
+
15
+ If you're the sturdy type that likes to run from git:
16
+
17
+ rake install
18
+
19
+ Or, if you've eschewed the convenience of Rubygems entirely, then you
20
+ presumably know what to do already.
21
+
22
+
23
+ # Usage
24
+
25
+ Before you do anything, load the code:
26
+
27
+ require "pwnedkeys/filter"
28
+
29
+ The following illustrative examples do not show the full capabilities of the
30
+ gem. The [API documentation](https://rubydoc.info/gems/pwnedkeys-filter) gives
31
+ all the possibilities.
32
+
33
+
34
+ ## Querying the filter
35
+
36
+ Open the filter file, then call `#probably_includes?` with a key:
37
+
38
+ filter = Pwnedkeys::Filter.open("/path/to/the/filter/data/file")
39
+ key = OpenSSL::PKey::RSA.new(2048)
40
+
41
+ # if this returns `true`, it's not your lucky day
42
+ filter.probably_includes?(key)
43
+
44
+ # From https://pwnedkeys.com/examples/rsa2048_key.pem
45
+ key = OpenSSL::PKey.read(File.read("pwnedkeys_demo_rsa2048.pem"))
46
+
47
+ # This should definitely return `true`
48
+ filter.probably_includes?(key)
49
+
50
+ Essentially, when passed anything that looks suspiciously like a key, the
51
+ `#probably_includes?` method will return `true` if the key is *probably* in the
52
+ dataset, and will return `false` if the key is *definitely not* in the dataset.
53
+ For more on why the answers are "definitely not" and "probably", it's best to
54
+ read [the wikipedia page on bloom
55
+ filters](https://en.wikipedia.org/wiki/Bloom_filter).
56
+
57
+
58
+ ## Creating a new filter file
59
+
60
+ To create a new filter data file:
61
+
62
+ Pwnedkeys::Filter.create("/some/file/name", hash_count: 4, hash_length: 12)
63
+
64
+ This does not return an open filter file, it merely creates a new file on
65
+ disk, initialized appropriately.
66
+
67
+
68
+ ## Adding new entries to a filter file
69
+
70
+ To add entries to an open filter:
71
+
72
+ filter.add(key)
73
+
74
+ When you're done adding entries:
75
+
76
+ filter.close
77
+
78
+ The API takes care of updating the filter metadata automatically.
79
+
80
+ If you don't want to have to manually close the filter (and who does, really?),
81
+ then the block form of `#open` is for you:
82
+
83
+ Pwnedkeys::Filter.open("/filter/file") do |filter|
84
+ filter.add(key)
85
+ end
86
+
87
+
88
+ # Contributing
89
+
90
+ See [`CONTRIBUTING.md`](CONTRIBUTING.md).
91
+
92
+
93
+ # Licence
94
+
95
+ Unless otherwise stated, everything in this repo is covered by the following
96
+ copyright notice:
97
+
98
+ Copyright (C) 2019 Matt Palmer <matt@hezmatt.org>
99
+
100
+ This program is free software: you can redistribute it and/or modify it
101
+ under the terms of the GNU General Public License version 3, as
102
+ published by the Free Software Foundation.
103
+
104
+ This program is distributed in the hope that it will be useful,
105
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
106
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
107
+ GNU General Public License for more details.
108
+
109
+ You should have received a copy of the GNU General Public License
110
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -0,0 +1,309 @@
1
+ require "xxhash"
2
+ require "openssl/x509/spki"
3
+
4
+ module Pwnedkeys
5
+ class Filter
6
+
7
+ # Base class for all Pwnedkeys::Filter exceptions
8
+ class Error < StandardError; end
9
+
10
+ # Raised when a data file appears to not be a valid data file.
11
+ class InvalidFileError < Error; end
12
+
13
+ # Raised when the key to be searched for isn't a key
14
+ class InvalidKeyError < Error; end
15
+
16
+ # Attempt was made to query or modify a closed filter
17
+ class FilterClosedError < Error; end
18
+
19
+ class Header
20
+ attr_reader :signature, :revision, :update_time, :entry_count, :hash_count, :hash_length
21
+
22
+ def self.from_fd(fd)
23
+ fd.seek(0)
24
+ case fd.read(6)
25
+ when "pkbfv1"
26
+ V1Header.from_fd(fd)
27
+ else
28
+ raise InvalidFileError,
29
+ "No recognised file signature found"
30
+ end
31
+ end
32
+
33
+ def initialize(**params)
34
+ @revision = 0
35
+ @update_time = Time.at(0)
36
+ @entry_count = 0
37
+
38
+ params.each do |k, v|
39
+ instance_variable_set(:"@#{k}", v)
40
+ end
41
+ end
42
+
43
+ def header_size
44
+ to_s.length
45
+ end
46
+
47
+ def update!
48
+ @revision += 1
49
+ end
50
+
51
+ def entry_added!
52
+ @update_time = Time.now
53
+ @entry_count += 1
54
+ end
55
+ end
56
+ private_constant :Header
57
+
58
+ class V1Header < Header
59
+ def self.from_fd(fd)
60
+ fd.seek(0)
61
+ signature, revision, update, entry_count, hash_count, hash_length = fd.read(24).unpack("a6L>Q>L>CC")
62
+ update_time = Time.at(update)
63
+
64
+ self.new(
65
+ signature: signature,
66
+ revision: revision,
67
+ update_time: update_time,
68
+ entry_count: entry_count,
69
+ hash_count: hash_count,
70
+ hash_length: hash_length
71
+ )
72
+ end
73
+
74
+ def initialize(**params)
75
+ @signature = "pkbfv1"
76
+
77
+ super
78
+ end
79
+
80
+ def to_s
81
+ [@signature, @revision, @update_time.to_i, @entry_count, @hash_count, @hash_length].pack("a*L>Q>L>CC")
82
+ end
83
+ end
84
+ private_constant :V1Header
85
+
86
+ # Create a new filter data file.
87
+ #
88
+ # Initialize a file, which cannot already exist, to be a pwnedkeys bloom filter
89
+ # v1 file. The file is not opened for use, it is simply created on the filesystem
90
+ # at the location specified.
91
+ #
92
+ # @param filename [String] the file to be created. It can be an absolute or relative
93
+ # path, or anything else that `File.open` will accept.
94
+ #
95
+ # @param hash_count [Integer] how many filter bits each element in the bloom filter will
96
+ # set.
97
+ #
98
+ # @param hash_length [Integer] the number of bits that will be used for each hash value.
99
+ #
100
+ # @raise [SystemCallError] if any sort of filesystem-related problem occurs, an
101
+ # `Errno`-related exception will be raised. Likely candidates include `EEXIST`
102
+ # (the file you specified already exists), `ENOENT` (the directory you specified
103
+ # doesn't exist), and `EPERM` (you don't have permissions to create a file where
104
+ # you want it).
105
+ #
106
+ # @return [void]
107
+ #
108
+ def self.create(filename, hash_count:, hash_length:)
109
+ File.open(filename, File::WRONLY | File::CREAT | File::EXCL) do |fd|
110
+ header = V1Header.new(hash_count: hash_count, hash_length: hash_length)
111
+
112
+ fd.write(header.to_s)
113
+ fd.seek((2 ** hash_length) / 8 - 1, :CUR)
114
+ fd.write("\0")
115
+ end
116
+
117
+ nil
118
+ end
119
+
120
+ # Open an existing pwnedkeys bloom filter data file.
121
+ #
122
+ # @param filename [String] the file to open.
123
+ #
124
+ # @raise [SystemCallError] if anything low-level goes wrong, you will get some
125
+ # sort of `Errno`-related exception raised, such as `ENOENT` (the file you
126
+ # specified does not exist) or `EPERM` (you don't have access to the file
127
+ # specified).
128
+ #
129
+ # @raise [Pwnedkeys::Filter::InvalidFileError] if the specified file exists,
130
+ # but is not recognised as a valid pwnedkeys filter file.
131
+ #
132
+ # @return [Pwnedkeys::Filter]
133
+ #
134
+ def self.open(filename)
135
+ filter = Pwnedkeys::Filter.new(filename)
136
+
137
+ if block_given?
138
+ yield filter
139
+ else
140
+ filter
141
+ end
142
+ end
143
+
144
+ # Create a new Pwnedkeys::Filter.
145
+ #
146
+ # Equivalent to {.open}, without the possibility of block-style access.
147
+ #
148
+ # @see .open
149
+ #
150
+ def initialize(filename)
151
+ @fd = File.open(filename, File::RDWR, binmode: true)
152
+ @header = Header.from_fd(@fd)
153
+ end
154
+
155
+ # Query the bloom filter.
156
+ #
157
+ # @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
158
+ # to query the filter for.
159
+ #
160
+ # @return [Boolean] whether the queried key *probably* exists in the
161
+ # filter (`true`), or whether it *definitely doesn't* (`false`).
162
+ #
163
+ # @raise [Pwnedkeys::Filter::InvalidKeyError] if the object passed in
164
+ # isn't recognised as a key.
165
+ #
166
+ # @raise [Pwnedkeys::Filter::FilterClosedError] if you try to query
167
+ # a filter object which has had {#close} called on it.
168
+ #
169
+ def probably_includes?(key)
170
+ raise FilterClosedError if @fd.nil?
171
+
172
+ spki = spkify(key)
173
+ filter_bits(spki.to_der).all?
174
+ end
175
+
176
+ # Add a new key (or SPKI) to the filter.
177
+ #
178
+ # @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
179
+ # to add to the filter.
180
+ #
181
+ # @return [Boolean] whether the key was added as a new entry. Due to the
182
+ # probabilistic nature of the bloom filter structure, it is possible to
183
+ # add two completely different keys and yet it looks like the "same"
184
+ # key to the bloom filter. Adding two colliding keys isn't a fatal
185
+ # error, but it is a hint that perhaps the existing filter is getting
186
+ # a little too full.
187
+ #
188
+ # @raise [Pwnedkeys::Filter::FilterClosedError] if you try to add a key
189
+ # to a filter object which has had {#close} called on it.
190
+ #
191
+ def add(key)
192
+ raise FilterClosedError if @fd.nil?
193
+
194
+ return false if probably_includes?(key)
195
+
196
+ spki = spkify(key)
197
+
198
+ filter_positions(spki.to_der).each do |n|
199
+ @fd.seek(n / 8 + @header.header_size, :SET)
200
+ byte = @fd.read(1).ord
201
+ @fd.seek(-1, :CUR)
202
+
203
+ mask = 2 ** (7 - (n % 8))
204
+ new_byte = byte | mask
205
+
206
+ @fd.write(new_byte.chr)
207
+ end
208
+
209
+ @header.entry_added!
210
+
211
+ # Only update the revision if this is the first add in this filter,
212
+ # because otherwise the revision counter would just be the same as the
213
+ # entry counter, and that would be pointless.
214
+ unless @already_modified
215
+ @header.update!
216
+ end
217
+
218
+ @fd.seek(0)
219
+ @fd.write(@header.to_s)
220
+
221
+ @already_modified = true
222
+ end
223
+
224
+ # Signal that the filter should be closed for further querying and manipulation.
225
+ #
226
+ # @return [void]
227
+ #
228
+ # @raise [SystemCallError] if something filesystem-ish fails.
229
+ #
230
+ def close
231
+ @fd.close
232
+ @fd = nil
233
+ end
234
+
235
+ # An estimate of the false-positive rate inherent in the filter.
236
+ #
237
+ # Given the parameters of the filter, we can estimate roughly what the
238
+ # false-positive rate will be when querying this filter.
239
+ #
240
+ # @return [Float] the approximate probability of a query result being a
241
+ # false positive, expressed as a floating-point number between 0 and 1.
242
+ #
243
+ def false_positive_rate
244
+ # Taken wholesale from https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
245
+ (1 - (1 - 1.0 / filter_bit_count) ** (hash_count * entry_count)) ** hash_count
246
+ end
247
+
248
+
249
+ private
250
+
251
+ def hash_count
252
+ @header.hash_count
253
+ end
254
+
255
+ def hash_length
256
+ @header.hash_length
257
+ end
258
+
259
+ def entry_count
260
+ @header.entry_count
261
+ end
262
+
263
+ def filter_bit_count
264
+ @filter_size ||= (2 ** hash_length)
265
+ end
266
+
267
+ def spkify(key)
268
+ if key.is_a?(OpenSSL::X509::SPKI)
269
+ key
270
+ elsif key.is_a?(OpenSSL::PKey::PKey)
271
+ key.to_spki
272
+ elsif key.is_a?(String)
273
+ begin
274
+ OpenSSL::PKey.read(key).to_spki
275
+ rescue OpenSSL::ASN1::ASN1Error, OpenSSL::PKey::PKeyError
276
+ begin
277
+ OpenSSL::X509::SPKI.new(key)
278
+ rescue OpenSSL::ASN1::ASN1Error, OpenSSL::X509::SPKIError
279
+ raise InvalidKeyError,
280
+ "Could not parse provided key as a key or SPKI structure"
281
+ end
282
+ end
283
+ else
284
+ raise InvalidKeyError,
285
+ "Did not recognise the provided key"
286
+ end
287
+ end
288
+
289
+ def filter_bits(s)
290
+ filter_positions(s).map do |n|
291
+ @fd.seek(n / 8 + @header.header_size, :SET)
292
+ byte = @fd.read(1).ord
293
+ mask = 2 ** (7 - (n % 8))
294
+
295
+ (byte & mask) > 0
296
+ end
297
+ end
298
+
299
+ def filter_positions(s)
300
+ h1 = XXhash.xxh64(s, 0)
301
+ h2 = XXhash.xxh64(s, 1)
302
+ h2 += 1 if h2 % 2 == 0
303
+
304
+ (0..hash_count-1).map do |i|
305
+ (h1 + i * h2 + (i ** 3 - i) / 6) % filter_bit_count
306
+ end
307
+ end
308
+ end
309
+ end
@@ -0,0 +1,38 @@
1
+ begin
2
+ require 'git-version-bump'
3
+ rescue LoadError
4
+ nil
5
+ end
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "pwnedkeys-filter"
9
+
10
+ s.version = GVB.version rescue "0.0.0.1.NOGVB"
11
+ s.date = GVB.date rescue Time.now.strftime("%Y-%m-%d")
12
+
13
+ s.platform = Gem::Platform::RUBY
14
+
15
+ s.summary = "Library to query pwnedkeys.com bloom filters"
16
+
17
+ s.authors = ["Matt Palmer"]
18
+ s.email = ["matt@hezmatt.org"]
19
+ s.homepage = "https://github.com/pwnedkeys/pwnedkeys-filter"
20
+
21
+ s.files = `git ls-files -z`.split("\0").reject { |f| f =~ /^(\.|G|spec|Rakefile)/ }
22
+
23
+ s.required_ruby_version = ">= 2.5.0"
24
+
25
+ s.add_runtime_dependency "openssl-additions"
26
+ s.add_runtime_dependency "xxhash"
27
+
28
+ s.add_development_dependency 'bundler'
29
+ s.add_development_dependency 'github-release'
30
+ s.add_development_dependency 'git-version-bump'
31
+ s.add_development_dependency 'guard-rspec'
32
+ s.add_development_dependency 'rack-test'
33
+ s.add_development_dependency 'rake', "~> 12.0"
34
+ s.add_development_dependency 'redcarpet'
35
+ s.add_development_dependency 'rspec'
36
+ s.add_development_dependency 'simplecov'
37
+ s.add_development_dependency 'yard'
38
+ end