pwnedkeys-filter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/CONTRIBUTING.md +14 -0
- data/LICENCE +674 -0
- data/README.md +110 -0
- data/lib/pwnedkeys/filter.rb +309 -0
- data/pwnedkeys-filter.gemspec +38 -0
- metadata +216 -0
data/README.md
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
This is a gem for creating and querying [pwnedkeys.com](https://pwnedkeys.com)
|
2
|
+
[bloom filters](https://pwnedkeys.com/filter.html).
|
3
|
+
|
4
|
+
|
5
|
+
# Installation
|
6
|
+
|
7
|
+
It's a gem:
|
8
|
+
|
9
|
+
gem install pwnedkeys-filter
|
10
|
+
|
11
|
+
There's also the wonders of [the Gemfile](http://bundler.io):
|
12
|
+
|
13
|
+
gem 'pwnedkeys-filter'
|
14
|
+
|
15
|
+
If you're the sturdy type that likes to run from git:
|
16
|
+
|
17
|
+
rake install
|
18
|
+
|
19
|
+
Or, if you've eschewed the convenience of Rubygems entirely, then you
|
20
|
+
presumably know what to do already.
|
21
|
+
|
22
|
+
|
23
|
+
# Usage
|
24
|
+
|
25
|
+
Before you do anything, load the code:
|
26
|
+
|
27
|
+
require "pwnedkeys/filter"
|
28
|
+
|
29
|
+
The following illustrative examples do not show the full capabilities of the
|
30
|
+
gem. The [API documentation](https://rubydoc.info/gems/pwnedkeys-filter) gives
|
31
|
+
all the possibilities.
|
32
|
+
|
33
|
+
|
34
|
+
## Querying the filter
|
35
|
+
|
36
|
+
Open the filter file, then call `#probably_includes?` with a key:
|
37
|
+
|
38
|
+
filter = Pwnedkeys::Filter.open("/path/to/the/filter/data/file")
|
39
|
+
key = OpenSSL::PKey::RSA.new(2048)
|
40
|
+
|
41
|
+
# if this returns `true`, it's not your lucky day
|
42
|
+
filter.probably_includes?(key)
|
43
|
+
|
44
|
+
# From https://pwnedkeys.com/examples/rsa2048_key.pem
|
45
|
+
key = OpenSSL::PKey.read(File.read("pwnedkeys_demo_rsa2048.pem"))
|
46
|
+
|
47
|
+
# This should definitely return `true`
|
48
|
+
filter.probably_includes?(key)
|
49
|
+
|
50
|
+
Essentially, when passed anything that looks suspiciously like a key, the
|
51
|
+
`#probably_includes?` method will return `true` if the key is *probably* in the
|
52
|
+
dataset, and will return `false` if the key is *definitely not* in the dataset.
|
53
|
+
For more on why the answers are "definitely not" and "probably", it's best to
|
54
|
+
read [the wikipedia page on bloom
|
55
|
+
filters](https://en.wikipedia.org/wiki/Bloom_filter).
|
56
|
+
|
57
|
+
|
58
|
+
## Creating a new filter file
|
59
|
+
|
60
|
+
To create a new filter data file:
|
61
|
+
|
62
|
+
Pwnedkeys::Filter.create("/some/file/name", hash_count: 4, hash_length: 12)
|
63
|
+
|
64
|
+
This does not return an open filter file, it merely creates a new file on
|
65
|
+
disk, initialized appropriately.
|
66
|
+
|
67
|
+
|
68
|
+
## Adding new entries to a filter file
|
69
|
+
|
70
|
+
To add entries to an open filter:
|
71
|
+
|
72
|
+
filter.add(key)
|
73
|
+
|
74
|
+
When you're done adding entries:
|
75
|
+
|
76
|
+
filter.close
|
77
|
+
|
78
|
+
The API takes care of updating the filter metadata automatically.
|
79
|
+
|
80
|
+
If you don't want to have to manually close the filter (and who does, really?),
|
81
|
+
then the block form of `#open` is for you:
|
82
|
+
|
83
|
+
Pwnedkeys::Filter.open("/filter/file") do |filter|
|
84
|
+
filter.add(key)
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Contributing
|
89
|
+
|
90
|
+
See [`CONTRIBUTING.md`](CONTRIBUTING.md).
|
91
|
+
|
92
|
+
|
93
|
+
# Licence
|
94
|
+
|
95
|
+
Unless otherwise stated, everything in this repo is covered by the following
|
96
|
+
copyright notice:
|
97
|
+
|
98
|
+
Copyright (C) 2019 Matt Palmer <matt@hezmatt.org>
|
99
|
+
|
100
|
+
This program is free software: you can redistribute it and/or modify it
|
101
|
+
under the terms of the GNU General Public License version 3, as
|
102
|
+
published by the Free Software Foundation.
|
103
|
+
|
104
|
+
This program is distributed in the hope that it will be useful,
|
105
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
106
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
107
|
+
GNU General Public License for more details.
|
108
|
+
|
109
|
+
You should have received a copy of the GNU General Public License
|
110
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -0,0 +1,309 @@
|
|
1
|
+
require "xxhash"
|
2
|
+
require "openssl/x509/spki"
|
3
|
+
|
4
|
+
module Pwnedkeys
|
5
|
+
class Filter
|
6
|
+
|
7
|
+
# Base class for all Pwnedkeys::Filter exceptions
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
# Raised when a data file appears to not be a valid data file.
|
11
|
+
class InvalidFileError < Error; end
|
12
|
+
|
13
|
+
# Raised when the key to be searched for isn't a key
|
14
|
+
class InvalidKeyError < Error; end
|
15
|
+
|
16
|
+
# Attempt was made to query or modify a closed filter
|
17
|
+
class FilterClosedError < Error; end
|
18
|
+
|
19
|
+
class Header
|
20
|
+
attr_reader :signature, :revision, :update_time, :entry_count, :hash_count, :hash_length
|
21
|
+
|
22
|
+
def self.from_fd(fd)
|
23
|
+
fd.seek(0)
|
24
|
+
case fd.read(6)
|
25
|
+
when "pkbfv1"
|
26
|
+
V1Header.from_fd(fd)
|
27
|
+
else
|
28
|
+
raise InvalidFileError,
|
29
|
+
"No recognised file signature found"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(**params)
|
34
|
+
@revision = 0
|
35
|
+
@update_time = Time.at(0)
|
36
|
+
@entry_count = 0
|
37
|
+
|
38
|
+
params.each do |k, v|
|
39
|
+
instance_variable_set(:"@#{k}", v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def header_size
|
44
|
+
to_s.length
|
45
|
+
end
|
46
|
+
|
47
|
+
def update!
|
48
|
+
@revision += 1
|
49
|
+
end
|
50
|
+
|
51
|
+
def entry_added!
|
52
|
+
@update_time = Time.now
|
53
|
+
@entry_count += 1
|
54
|
+
end
|
55
|
+
end
|
56
|
+
private_constant :Header
|
57
|
+
|
58
|
+
class V1Header < Header
|
59
|
+
def self.from_fd(fd)
|
60
|
+
fd.seek(0)
|
61
|
+
signature, revision, update, entry_count, hash_count, hash_length = fd.read(24).unpack("a6L>Q>L>CC")
|
62
|
+
update_time = Time.at(update)
|
63
|
+
|
64
|
+
self.new(
|
65
|
+
signature: signature,
|
66
|
+
revision: revision,
|
67
|
+
update_time: update_time,
|
68
|
+
entry_count: entry_count,
|
69
|
+
hash_count: hash_count,
|
70
|
+
hash_length: hash_length
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
def initialize(**params)
|
75
|
+
@signature = "pkbfv1"
|
76
|
+
|
77
|
+
super
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_s
|
81
|
+
[@signature, @revision, @update_time.to_i, @entry_count, @hash_count, @hash_length].pack("a*L>Q>L>CC")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
private_constant :V1Header
|
85
|
+
|
86
|
+
# Create a new filter data file.
|
87
|
+
#
|
88
|
+
# Initialize a file, which cannot already exist, to be a pwnedkeys bloom filter
|
89
|
+
# v1 file. The file is not opened for use, it is simply created on the filesystem
|
90
|
+
# at the location specified.
|
91
|
+
#
|
92
|
+
# @param filename [String] the file to be created. It can be an absolute or relative
|
93
|
+
# path, or anything else that `File.open` will accept.
|
94
|
+
#
|
95
|
+
# @param hash_count [Integer] how many filter bits each element in the bloom filter will
|
96
|
+
# set.
|
97
|
+
#
|
98
|
+
# @param hash_length [Integer] the number of bits that will be used for each hash value.
|
99
|
+
#
|
100
|
+
# @raise [SystemCallError] if any sort of filesystem-related problem occurs, an
|
101
|
+
# `Errno`-related exception will be raised. Likely candidates include `EEXIST`
|
102
|
+
# (the file you specified already exists), `ENOENT` (the directory you specified
|
103
|
+
# doesn't exist), and `EPERM` (you don't have permissions to create a file where
|
104
|
+
# you want it).
|
105
|
+
#
|
106
|
+
# @return [void]
|
107
|
+
#
|
108
|
+
def self.create(filename, hash_count:, hash_length:)
|
109
|
+
File.open(filename, File::WRONLY | File::CREAT | File::EXCL) do |fd|
|
110
|
+
header = V1Header.new(hash_count: hash_count, hash_length: hash_length)
|
111
|
+
|
112
|
+
fd.write(header.to_s)
|
113
|
+
fd.seek((2 ** hash_length) / 8 - 1, :CUR)
|
114
|
+
fd.write("\0")
|
115
|
+
end
|
116
|
+
|
117
|
+
nil
|
118
|
+
end
|
119
|
+
|
120
|
+
# Open an existing pwnedkeys bloom filter data file.
|
121
|
+
#
|
122
|
+
# @param filename [String] the file to open.
|
123
|
+
#
|
124
|
+
# @raise [SystemCallError] if anything low-level goes wrong, you will get some
|
125
|
+
# sort of `Errno`-related exception raised, such as `ENOENT` (the file you
|
126
|
+
# specified does not exist) or `EPERM` (you don't have access to the file
|
127
|
+
# specified).
|
128
|
+
#
|
129
|
+
# @raise [Pwnedkeys::Filter::InvalidFileError] if the specified file exists,
|
130
|
+
# but is not recognised as a valid pwnedkeys filter file.
|
131
|
+
#
|
132
|
+
# @return [Pwnedkeys::Filter]
|
133
|
+
#
|
134
|
+
def self.open(filename)
|
135
|
+
filter = Pwnedkeys::Filter.new(filename)
|
136
|
+
|
137
|
+
if block_given?
|
138
|
+
yield filter
|
139
|
+
else
|
140
|
+
filter
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Create a new Pwnedkeys::Filter.
|
145
|
+
#
|
146
|
+
# Equivalent to {.open}, without the possibility of block-style access.
|
147
|
+
#
|
148
|
+
# @see .open
|
149
|
+
#
|
150
|
+
def initialize(filename)
|
151
|
+
@fd = File.open(filename, File::RDWR, binmode: true)
|
152
|
+
@header = Header.from_fd(@fd)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Query the bloom filter.
|
156
|
+
#
|
157
|
+
# @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
|
158
|
+
# to query the filter for.
|
159
|
+
#
|
160
|
+
# @return [Boolean] whether the queried key *probably* exists in the
|
161
|
+
# filter (`true`), or whether it *definitely doesn't* (`false`).
|
162
|
+
#
|
163
|
+
# @raise [Pwnedkeys::Filter::InvalidKeyError] if the object passed in
|
164
|
+
# isn't recognised as a key.
|
165
|
+
#
|
166
|
+
# @raise [Pwnedkeys::Filter::FilterClosedError] if you try to query
|
167
|
+
# a filter object which has had {#close} called on it.
|
168
|
+
#
|
169
|
+
def probably_includes?(key)
|
170
|
+
raise FilterClosedError if @fd.nil?
|
171
|
+
|
172
|
+
spki = spkify(key)
|
173
|
+
filter_bits(spki.to_der).all?
|
174
|
+
end
|
175
|
+
|
176
|
+
# Add a new key (or SPKI) to the filter.
|
177
|
+
#
|
178
|
+
# @param key [OpenSSL::PKey::PKey, OpenSSL::X509::SPKI, String] the key
|
179
|
+
# to add to the filter.
|
180
|
+
#
|
181
|
+
# @return [Boolean] whether the key was added as a new entry. Due to the
|
182
|
+
# probabilistic nature of the bloom filter structure, it is possible to
|
183
|
+
# add two completely different keys and yet it looks like the "same"
|
184
|
+
# key to the bloom filter. Adding two colliding keys isn't a fatal
|
185
|
+
# error, but it is a hint that perhaps the existing filter is getting
|
186
|
+
# a little too full.
|
187
|
+
#
|
188
|
+
# @raise [Pwnedkeys::Filter::FilterClosedError] if you try to add a key
|
189
|
+
# to a filter object which has had {#close} called on it.
|
190
|
+
#
|
191
|
+
def add(key)
|
192
|
+
raise FilterClosedError if @fd.nil?
|
193
|
+
|
194
|
+
return false if probably_includes?(key)
|
195
|
+
|
196
|
+
spki = spkify(key)
|
197
|
+
|
198
|
+
filter_positions(spki.to_der).each do |n|
|
199
|
+
@fd.seek(n / 8 + @header.header_size, :SET)
|
200
|
+
byte = @fd.read(1).ord
|
201
|
+
@fd.seek(-1, :CUR)
|
202
|
+
|
203
|
+
mask = 2 ** (7 - (n % 8))
|
204
|
+
new_byte = byte | mask
|
205
|
+
|
206
|
+
@fd.write(new_byte.chr)
|
207
|
+
end
|
208
|
+
|
209
|
+
@header.entry_added!
|
210
|
+
|
211
|
+
# Only update the revision if this is the first add in this filter,
|
212
|
+
# because otherwise the revision counter would just be the same as the
|
213
|
+
# entry counter, and that would be pointless.
|
214
|
+
unless @already_modified
|
215
|
+
@header.update!
|
216
|
+
end
|
217
|
+
|
218
|
+
@fd.seek(0)
|
219
|
+
@fd.write(@header.to_s)
|
220
|
+
|
221
|
+
@already_modified = true
|
222
|
+
end
|
223
|
+
|
224
|
+
# Signal that the filter should be closed for further querying and manipulation.
|
225
|
+
#
|
226
|
+
# @return [void]
|
227
|
+
#
|
228
|
+
# @raise [SystemCallError] if something filesystem-ish fails.
|
229
|
+
#
|
230
|
+
def close
|
231
|
+
@fd.close
|
232
|
+
@fd = nil
|
233
|
+
end
|
234
|
+
|
235
|
+
# An estimate of the false-positive rate inherent in the filter.
|
236
|
+
#
|
237
|
+
# Given the parameters of the filter, we can estimate roughly what the
|
238
|
+
# false-positive rate will be when querying this filter.
|
239
|
+
#
|
240
|
+
# @return [Float] the approximate probability of a query result being a
|
241
|
+
# false positive, expressed as a floating-point number between 0 and 1.
|
242
|
+
#
|
243
|
+
def false_positive_rate
|
244
|
+
# Taken wholesale from https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
245
|
+
(1 - (1 - 1.0 / filter_bit_count) ** (hash_count * entry_count)) ** hash_count
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
private
|
250
|
+
|
251
|
+
def hash_count
|
252
|
+
@header.hash_count
|
253
|
+
end
|
254
|
+
|
255
|
+
def hash_length
|
256
|
+
@header.hash_length
|
257
|
+
end
|
258
|
+
|
259
|
+
def entry_count
|
260
|
+
@header.entry_count
|
261
|
+
end
|
262
|
+
|
263
|
+
def filter_bit_count
|
264
|
+
@filter_size ||= (2 ** hash_length)
|
265
|
+
end
|
266
|
+
|
267
|
+
def spkify(key)
|
268
|
+
if key.is_a?(OpenSSL::X509::SPKI)
|
269
|
+
key
|
270
|
+
elsif key.is_a?(OpenSSL::PKey::PKey)
|
271
|
+
key.to_spki
|
272
|
+
elsif key.is_a?(String)
|
273
|
+
begin
|
274
|
+
OpenSSL::PKey.read(key).to_spki
|
275
|
+
rescue OpenSSL::ASN1::ASN1Error, OpenSSL::PKey::PKeyError
|
276
|
+
begin
|
277
|
+
OpenSSL::X509::SPKI.new(key)
|
278
|
+
rescue OpenSSL::ASN1::ASN1Error, OpenSSL::X509::SPKIError
|
279
|
+
raise InvalidKeyError,
|
280
|
+
"Could not parse provided key as a key or SPKI structure"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
else
|
284
|
+
raise InvalidKeyError,
|
285
|
+
"Did not recognise the provided key"
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def filter_bits(s)
|
290
|
+
filter_positions(s).map do |n|
|
291
|
+
@fd.seek(n / 8 + @header.header_size, :SET)
|
292
|
+
byte = @fd.read(1).ord
|
293
|
+
mask = 2 ** (7 - (n % 8))
|
294
|
+
|
295
|
+
(byte & mask) > 0
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def filter_positions(s)
|
300
|
+
h1 = XXhash.xxh64(s, 0)
|
301
|
+
h2 = XXhash.xxh64(s, 1)
|
302
|
+
h2 += 1 if h2 % 2 == 0
|
303
|
+
|
304
|
+
(0..hash_count-1).map do |i|
|
305
|
+
(h1 + i * h2 + (i ** 3 - i) / 6) % filter_bit_count
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
begin
|
2
|
+
require 'git-version-bump'
|
3
|
+
rescue LoadError
|
4
|
+
nil
|
5
|
+
end
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "pwnedkeys-filter"
|
9
|
+
|
10
|
+
s.version = GVB.version rescue "0.0.0.1.NOGVB"
|
11
|
+
s.date = GVB.date rescue Time.now.strftime("%Y-%m-%d")
|
12
|
+
|
13
|
+
s.platform = Gem::Platform::RUBY
|
14
|
+
|
15
|
+
s.summary = "Library to query pwnedkeys.com bloom filters"
|
16
|
+
|
17
|
+
s.authors = ["Matt Palmer"]
|
18
|
+
s.email = ["matt@hezmatt.org"]
|
19
|
+
s.homepage = "https://github.com/pwnedkeys/pwnedkeys-filter"
|
20
|
+
|
21
|
+
s.files = `git ls-files -z`.split("\0").reject { |f| f =~ /^(\.|G|spec|Rakefile)/ }
|
22
|
+
|
23
|
+
s.required_ruby_version = ">= 2.5.0"
|
24
|
+
|
25
|
+
s.add_runtime_dependency "openssl-additions"
|
26
|
+
s.add_runtime_dependency "xxhash"
|
27
|
+
|
28
|
+
s.add_development_dependency 'bundler'
|
29
|
+
s.add_development_dependency 'github-release'
|
30
|
+
s.add_development_dependency 'git-version-bump'
|
31
|
+
s.add_development_dependency 'guard-rspec'
|
32
|
+
s.add_development_dependency 'rack-test'
|
33
|
+
s.add_development_dependency 'rake', "~> 12.0"
|
34
|
+
s.add_development_dependency 'redcarpet'
|
35
|
+
s.add_development_dependency 'rspec'
|
36
|
+
s.add_development_dependency 'simplecov'
|
37
|
+
s.add_development_dependency 'yard'
|
38
|
+
end
|