pwnedkeys-filter 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/CONTRIBUTING.md +14 -0
- data/LICENCE +674 -0
- data/README.md +110 -0
- data/lib/pwnedkeys/filter.rb +309 -0
- data/pwnedkeys-filter.gemspec +38 -0
- metadata +216 -0
data/README.md
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
This is a gem for creating and querying [pwnedkeys.com](https://pwnedkeys.com)
|
2
|
+
[bloom filters](https://pwnedkeys.com/filter.html).
|
3
|
+
|
4
|
+
|
5
|
+
# Installation
|
6
|
+
|
7
|
+
It's a gem:
|
8
|
+
|
9
|
+
gem install pwnedkeys-filter
|
10
|
+
|
11
|
+
There's also the wonders of [the Gemfile](http://bundler.io):
|
12
|
+
|
13
|
+
gem 'pwnedkeys-filter'
|
14
|
+
|
15
|
+
If you're the sturdy type that likes to run from git:
|
16
|
+
|
17
|
+
rake install
|
18
|
+
|
19
|
+
Or, if you've eschewed the convenience of Rubygems entirely, then you
|
20
|
+
presumably know what to do already.
|
21
|
+
|
22
|
+
|
23
|
+
# Usage
|
24
|
+
|
25
|
+
Before you do anything, load the code:
|
26
|
+
|
27
|
+
require "pwnedkeys/filter"
|
28
|
+
|
29
|
+
The following illustrative examples do not show the full capabilities of the
|
30
|
+
gem. The [API documentation](https://rubydoc.info/gems/pwnedkeys-filter) gives
|
31
|
+
all the possibilities.
|
32
|
+
|
33
|
+
|
34
|
+
## Querying the filter
|
35
|
+
|
36
|
+
Open the filter file, then call `#probably_includes?` with a key:
|
37
|
+
|
38
|
+
filter = Pwnedkeys::Filter.open("/path/to/the/filter/data/file")
|
39
|
+
key = OpenSSL::PKey::RSA.new(2048)
|
40
|
+
|
41
|
+
# if this returns `true`, it's not your lucky day
|
42
|
+
filter.probably_includes?(key)
|
43
|
+
|
44
|
+
# From https://pwnedkeys.com/examples/rsa2048_key.pem
|
45
|
+
key = OpenSSL::PKey.read(File.read("pwnedkeys_demo_rsa2048.pem"))
|
46
|
+
|
47
|
+
# This should definitely return `true`
|
48
|
+
filter.probably_includes?(key)
|
49
|
+
|
50
|
+
Essentially, when passed anything that looks suspiciously like a key, the
|
51
|
+
`#probably_includes?` method will return `true` if the key is *probably* in the
|
52
|
+
dataset, and will return `false` if the key is *definitely not* in the dataset.
|
53
|
+
For more on why the answers are "definitely not" and "probably", it's best to
|
54
|
+
read [the wikipedia page on bloom
|
55
|
+
filters](https://en.wikipedia.org/wiki/Bloom_filter).
|
56
|
+
|
57
|
+
|
58
|
+
## Creating a new filter file
|
59
|
+
|
60
|
+
To create a new filter data file:
|
61
|
+
|
62
|
+
Pwnedkeys::Filter.create("/some/file/name", hash_count: 4, hash_length: 12)
|
63
|
+
|
64
|
+
This does not return an open filter file, it merely creates a new file on
|
65
|
+
disk, initialized appropriately.
|
66
|
+
|
67
|
+
|
68
|
+
## Adding new entries to a filter file
|
69
|
+
|
70
|
+
To add entries to an open filter:
|
71
|
+
|
72
|
+
filter.add(key)
|
73
|
+
|
74
|
+
When you're done adding entries:
|
75
|
+
|
76
|
+
filter.close
|
77
|
+
|
78
|
+
The API takes care of updating the filter metadata automatically.
|
79
|
+
|
80
|
+
If you don't want to have to manually close the filter (and who does, really?),
|
81
|
+
then the block form of `#open` is for you:
|
82
|
+
|
83
|
+
Pwnedkeys::Filter.open("/filter/file") do |filter|
|
84
|
+
filter.add(key)
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Contributing
|
89
|
+
|
90
|
+
See [`CONTRIBUTING.md`](CONTRIBUTING.md).
|
91
|
+
|
92
|
+
|
93
|
+
# Licence
|
94
|
+
|
95
|
+
Unless otherwise stated, everything in this repo is covered by the following
|
96
|
+
copyright notice:
|
97
|
+
|
98
|
+
Copyright (C) 2019 Matt Palmer <matt@hezmatt.org>
|
99
|
+
|
100
|
+
This program is free software: you can redistribute it and/or modify it
|
101
|
+
under the terms of the GNU General Public License version 3, as
|
102
|
+
published by the Free Software Foundation.
|
103
|
+
|
104
|
+
This program is distributed in the hope that it will be useful,
|
105
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
106
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
107
|
+
GNU General Public License for more details.
|
108
|
+
|
109
|
+
You should have received a copy of the GNU General Public License
|
110
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -0,0 +1,309 @@
|
|
1
|
+
require "xxhash"
|
2
|
+
require "openssl/x509/spki"
|
3
|
+
|
4
|
+
module Pwnedkeys
|
5
|
+
class Filter
|
6
|
+
|
7
|
+
# Base class for all Pwnedkeys::Filter exceptions
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
# Raised when a data file appears to not be a valid data file.
|
11
|
+
class InvalidFileError < Error; end
|
12
|
+
|
13
|
+
# Raised when the key to be searched for isn't a key
|
14
|
+
class InvalidKeyError < Error; end
|
15
|
+
|
16
|
+
# Attempt was made to query or modify a closed filter
|
17
|
+
class FilterClosedError < Error; end
|
18
|
+
|
19
|
+
class Header
|
20
|
+
attr_reader :signature, :revision, :update_time, :entry_count, :hash_count, :hash_length
|
21
|
+
|
22
|
+
def self.from_fd(fd)
|
23
|
+
fd.seek(0)
|
24
|
+
case fd.read(6)
|
25
|
+
when "pkbfv1"
|
26
|
+
V1Header.from_fd(fd)
|
27
|
+
else
|
28
|
+
raise InvalidFileError,
|
29
|
+
"No recognised file signature found"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(**params)
|
34
|
+
@revision = 0
|
35
|
+
@update_time = Time.at(0)
|
36
|
+
@entry_count = 0
|
37
|
+
|
38
|
+
params.each do |k, v|
|
39
|
+
instance_variable_set(:"@#{k}", v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def header_size
|
44
|
+
to_s.length
|
45
|
+
end
|
46
|
+
|
47
|
+
def update!
|
48
|
+
@revision += 1
|
49
|
+
end
|
50
|
+
|
51
|
+
def entry_added!
|
52
|
+
@update_time = Time.now
|
53
|
+
@entry_count += 1
|
54
|
+
end
|
55
|
+
end
|
56
|
+
private_constant :Header
|
57
|
+
|
58
|
+
class V1Header < Header
|
59
|
+
def self.from_fd(fd)
|
60
|
+
fd.seek(0)
|
61
|
+
signature, revision, update, entry_count, hash_count, hash_length = fd.read(24).unpack("a6L>Q>L>CC")
|
62
|
+
update_time = Time.at(update)
|
63
|
+
|
64
|
+
self.new(
|
65
|
+
signature: signature,
|
66
|
+
revision: revision,
|
67
|
+
update_time: update_time,
|
68
|
+
entry_count: entry_count,
|
69
|
+
hash_count: hash_count,
|
70
|
+
hash_length: hash_length
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
def initialize(**params)
|
75
|
+
@signature = "pkbfv1"
|
76
|
+
|
77
|
+
super
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_s
|
81
|
+
[@signature, @revision, @update_time.to_i, @entry_count, @hash_count, @hash_length].pack("a*L>Q>L>CC")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
private_constant :V1Header
|
85
|
+
|
86
|
+
# Create a new filter data file.
|
87
|
+
#
|
88
|
+
# Initialize a file, which cannot already exist, to be a pwnedkeys bloom filter
|
89
|
+
# v1 file. The file is not opened for use, it is simply created on the filesystem
|
90
|
+
# at the location specified.
|
91
|
+
#
|
92
|
+
# @param filename [String] the file to be created. It can be an absolute or relative
|
93
|
+
# path, or anything else that `File.open` will accept.
|
94
|
+
#
|
95
|
+
# @param hash_count [Integer] how many filter bits each element in the bloom filter will
|
96
|
+
# set.
|
97
|
+
#
|
98
|
+
# @param hash_length [Integer] the number of bits that will be used for each hash value.
|
99
|
+
#
|
100
|
+
# @raise [SystemCallError] if any sort of filesystem-related problem occurs, an
|
101
|
+
# `Errno`-related exception will be raised. Likely candidates include `EEXIST`
|
102
|
+
# (the file you specified already exists), `ENOENT` (the directory you specified
|
103
|
+
# doesn't exist), and `EPERM` (you don't have permissions to create a file where
|
104
|
+
# you want it).
|
105
|
+
#
|
106
|
+
# @return [void]
|
107
|
+
#
|
108
|
+
def self.create(filename, hash_count:, hash_length:)
|
109
|
+
File.open(filename, File::WRONLY | File::CREAT | File::EXCL) do |fd|
|
110
|
+
header = V1Header.new(hash_count: hash_count, hash_length: hash_length)
|
111
|
+
|
112
|
+
fd.write(header.to_s)
|
113
|
+
fd.seek((2 ** hash_length) / 8 - 1, :CUR)
|
114
|
+
fd.write("\0")
|
115
|
+
end
|
116
|
+
|
117
|
+
nil
|
118
|
+
end
|
119
|
+
|
120
|
+
# Open an existing pwnedkeys bloom filter data file.
|
121
|
+
#
|
122
|
+
# @param filename [String] the file to open.
|
123
|
+
#
|
124
|
+
# @raise [SystemCallError] if anything low-level goes wrong, you will get some
|
125
|
+
# sort of `Errno`-related exception raised, such as `ENOENT` (the file you
|
126
|
+
# specified does not exist) or `EPERM` (you don't have access to the file
|
127
|
+
# specified).
|
128
|
+
#
|
129
|
+
# @raise [Pwnedkeys::Filter::InvalidFileError] if the specified file exists,
|
130
|
+
# but is not recognised as a valid pwnedkeys filter file.
|
131
|
+
#
|
132
|
+
# @return [Pwnedkeys::Filter]
|
133
|
+
#
|
134
|
+
def self.open(filename)
|
135
|
+
filter = Pwnedkeys::Filter.new(filename)
|
136
|
+
|
137
|
+
if block_given?
|
138
|
+
yield filter
|
139
|
+
else
|
140
|
+
filter
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Create a new Pwnedkeys::Filter.
|
145
|
+
#
|
146
|
+
# Equivalent to {.open}, without the possibility of block-style access.
|
147
|
+
#
|
148
|
+
# @see .open
|
149
|
+
#
|
150
|
+
def initialize(filename)
|
151
|
+
@fd = File.open(filename, File::RDWR, binmode: true)
|
152
|
+
@header = Header.from_fd(@fd)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Query the bloom filter.
|
156
|
+
#
|
157
|
+
# @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
|
158
|
+
# to query the filter for.
|
159
|
+
#
|
160
|
+
# @return [Boolean] whether the queried key *probably* exists in the
|
161
|
+
# filter (`true`), or whether it *definitely doesn't* (`false`).
|
162
|
+
#
|
163
|
+
# @raise [Pwnedkeys::Filter::InvalidKeyError] if the object passed in
|
164
|
+
# isn't recognised as a key.
|
165
|
+
#
|
166
|
+
# @raise [Pwnedkeys::Filter::FilterClosedError] if you try to query
|
167
|
+
# a filter object which has had {#close} called on it.
|
168
|
+
#
|
169
|
+
def probably_includes?(key)
|
170
|
+
raise FilterClosedError if @fd.nil?
|
171
|
+
|
172
|
+
spki = spkify(key)
|
173
|
+
filter_bits(spki.to_der).all?
|
174
|
+
end
|
175
|
+
|
176
|
+
# Add a new key (or SPKI) to the filter.
|
177
|
+
#
|
178
|
+
# @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
|
179
|
+
# to add to the filter.
|
180
|
+
#
|
181
|
+
# @return [Boolean] whether the key was added as a new entry. Due to the
|
182
|
+
# probabilistic nature of the bloom filter structure, it is possible to
|
183
|
+
# add two completely different keys and yet it looks like the "same"
|
184
|
+
# key to the bloom filter. Adding two colliding keys isn't a fatal
|
185
|
+
# error, but it is a hint that perhaps the existing filter is getting
|
186
|
+
# a little too full.
|
187
|
+
#
|
188
|
+
# @raise [Pwnedkeys::Filter::FilterClosedError] if you try to add a key
|
189
|
+
# to a filter object which has had {#close} called on it.
|
190
|
+
#
|
191
|
+
def add(key)
|
192
|
+
raise FilterClosedError if @fd.nil?
|
193
|
+
|
194
|
+
return false if probably_includes?(key)
|
195
|
+
|
196
|
+
spki = spkify(key)
|
197
|
+
|
198
|
+
filter_positions(spki.to_der).each do |n|
|
199
|
+
@fd.seek(n / 8 + @header.header_size, :SET)
|
200
|
+
byte = @fd.read(1).ord
|
201
|
+
@fd.seek(-1, :CUR)
|
202
|
+
|
203
|
+
mask = 2 ** (7 - (n % 8))
|
204
|
+
new_byte = byte | mask
|
205
|
+
|
206
|
+
@fd.write(new_byte.chr)
|
207
|
+
end
|
208
|
+
|
209
|
+
@header.entry_added!
|
210
|
+
|
211
|
+
# Only update the revision if this is the first add in this filter,
|
212
|
+
# because otherwise the revision counter would just be the same as the
|
213
|
+
# entry counter, and that would be pointless.
|
214
|
+
unless @already_modified
|
215
|
+
@header.update!
|
216
|
+
end
|
217
|
+
|
218
|
+
@fd.seek(0)
|
219
|
+
@fd.write(@header.to_s)
|
220
|
+
|
221
|
+
@already_modified = true
|
222
|
+
end
|
223
|
+
|
224
|
+
# Signal that the filter should be closed for further querying and manipulation.
|
225
|
+
#
|
226
|
+
# @return [void]
|
227
|
+
#
|
228
|
+
# @raise [SystemCallError] if something filesystem-ish fails.
|
229
|
+
#
|
230
|
+
def close
|
231
|
+
@fd.close
|
232
|
+
@fd = nil
|
233
|
+
end
|
234
|
+
|
235
|
+
# An estimate of the false-positive rate inherent in the filter.
|
236
|
+
#
|
237
|
+
# Given the parameters of the filter, we can estimate roughly what the
|
238
|
+
# false-positive rate will be when querying this filter.
|
239
|
+
#
|
240
|
+
# @return [Float] the approximate probability of a query result being a
|
241
|
+
# false positive, expressed as a floating-point number between 0 and 1.
|
242
|
+
#
|
243
|
+
def false_positive_rate
|
244
|
+
# Taken wholesale from https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
245
|
+
(1 - (1 - 1.0 / filter_bit_count) ** (hash_count * entry_count)) ** hash_count
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
private
|
250
|
+
|
251
|
+
def hash_count
|
252
|
+
@header.hash_count
|
253
|
+
end
|
254
|
+
|
255
|
+
def hash_length
|
256
|
+
@header.hash_length
|
257
|
+
end
|
258
|
+
|
259
|
+
def entry_count
|
260
|
+
@header.entry_count
|
261
|
+
end
|
262
|
+
|
263
|
+
def filter_bit_count
|
264
|
+
@filter_size ||= (2 ** hash_length)
|
265
|
+
end
|
266
|
+
|
267
|
+
def spkify(key)
|
268
|
+
if key.is_a?(OpenSSL::X509::SPKI)
|
269
|
+
key
|
270
|
+
elsif key.is_a?(OpenSSL::PKey::PKey)
|
271
|
+
key.to_spki
|
272
|
+
elsif key.is_a?(String)
|
273
|
+
begin
|
274
|
+
OpenSSL::PKey.read(key).to_spki
|
275
|
+
rescue OpenSSL::ASN1::ASN1Error, OpenSSL::PKey::PKeyError
|
276
|
+
begin
|
277
|
+
OpenSSL::X509::SPKI.new(key)
|
278
|
+
rescue OpenSSL::ASN1::ASN1Error, OpenSSL::X509::SPKIError
|
279
|
+
raise InvalidKeyError,
|
280
|
+
"Could not parse provided key as a key or SPKI structure"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
else
|
284
|
+
raise InvalidKeyError,
|
285
|
+
"Did not recognise the provided key"
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def filter_bits(s)
|
290
|
+
filter_positions(s).map do |n|
|
291
|
+
@fd.seek(n / 8 + @header.header_size, :SET)
|
292
|
+
byte = @fd.read(1).ord
|
293
|
+
mask = 2 ** (7 - (n % 8))
|
294
|
+
|
295
|
+
(byte & mask) > 0
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def filter_positions(s)
|
300
|
+
h1 = XXhash.xxh64(s, 0)
|
301
|
+
h2 = XXhash.xxh64(s, 1)
|
302
|
+
h2 += 1 if h2 % 2 == 0
|
303
|
+
|
304
|
+
(0..hash_count-1).map do |i|
|
305
|
+
(h1 + i * h2 + (i ** 3 - i) / 6) % filter_bit_count
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
begin
|
2
|
+
require 'git-version-bump'
|
3
|
+
rescue LoadError
|
4
|
+
nil
|
5
|
+
end
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "pwnedkeys-filter"
|
9
|
+
|
10
|
+
s.version = GVB.version rescue "0.0.0.1.NOGVB"
|
11
|
+
s.date = GVB.date rescue Time.now.strftime("%Y-%m-%d")
|
12
|
+
|
13
|
+
s.platform = Gem::Platform::RUBY
|
14
|
+
|
15
|
+
s.summary = "Library to query pwnedkeys.com bloom filters"
|
16
|
+
|
17
|
+
s.authors = ["Matt Palmer"]
|
18
|
+
s.email = ["matt@hezmatt.org"]
|
19
|
+
s.homepage = "https://github.com/pwnedkeys/pwnedkeys-filter"
|
20
|
+
|
21
|
+
s.files = `git ls-files -z`.split("\0").reject { |f| f =~ /^(\.|G|spec|Rakefile)/ }
|
22
|
+
|
23
|
+
s.required_ruby_version = ">= 2.5.0"
|
24
|
+
|
25
|
+
s.add_runtime_dependency "openssl-additions"
|
26
|
+
s.add_runtime_dependency "xxhash"
|
27
|
+
|
28
|
+
s.add_development_dependency 'bundler'
|
29
|
+
s.add_development_dependency 'github-release'
|
30
|
+
s.add_development_dependency 'git-version-bump'
|
31
|
+
s.add_development_dependency 'guard-rspec'
|
32
|
+
s.add_development_dependency 'rack-test'
|
33
|
+
s.add_development_dependency 'rake', "~> 12.0"
|
34
|
+
s.add_development_dependency 'redcarpet'
|
35
|
+
s.add_development_dependency 'rspec'
|
36
|
+
s.add_development_dependency 'simplecov'
|
37
|
+
s.add_development_dependency 'yard'
|
38
|
+
end
|