zxcvbn 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,443 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zxcvbn
4
+ module Scoring
5
+ # on qwerty, 'g' has degree 6, being adjacent to 'ftyhbv'. '\' has degree 1.
6
+ # this calculates the average over all keys.
7
+ def self.calc_average_degree(graph)
8
+ average = 0
9
+ graph.each do |key, neighbors|
10
+ average += neighbors.count {|n| n }
11
+ end
12
+ average /= graph.keys.size
13
+ return average
14
+ end
15
+
16
+ BRUTEFORCE_CARDINALITY = 10
17
+
18
+ MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000
19
+
20
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10
21
+
22
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50
23
+
24
+ def self.nCk(n, k)
25
+ # http://blog.plover.com/math/choose.html
26
+ if k > n
27
+ return 0
28
+ end
29
+ if k == 0
30
+ return 1
31
+ end
32
+ r = 1
33
+ (1..k).each do |d|
34
+ r *= n
35
+ r /= d
36
+ n -= 1
37
+ end
38
+ return r
39
+ end
40
+
41
+ def self.factorial(n)
42
+ # unoptimized, called only on small n
43
+ if n < 2
44
+ return 1
45
+ end
46
+ return (2..10).reduce(&:*)
47
+ end
48
+
49
+ # ------------------------------------------------------------------------------
50
+ # search --- most guessable match sequence -------------------------------------
51
+ # ------------------------------------------------------------------------------
52
+
53
+ # takes a sequence of overlapping matches, returns the non-overlapping sequence with
54
+ # minimum guesses. the following is a O(l_max * (n + m)) dynamic programming algorithm
55
+ # for a length-n password with m candidate matches. l_max is the maximum optimal
56
+ # sequence length spanning each prefix of the password. In practice it rarely exceeds 5 and the
57
+ # search terminates rapidly.
58
+
59
+ # the optimal "minimum guesses" sequence is here defined to be the sequence that
60
+ # minimizes the following function:
61
+
62
+ # g = l! * Product(m.guesses for m in sequence) + D^(l - 1)
63
+
64
+ # where l is the length of the sequence.
65
+
66
+ # the factorial term is the number of ways to order l patterns.
67
+
68
+ # the D^(l-1) term is another length penalty, roughly capturing the idea that an
69
+ # attacker will try lower-length sequences first before trying length-l sequences.
70
+
71
+ # for example, consider a sequence that is date-repeat-dictionary.
72
+ # - an attacker would need to try other date-repeat-dictionary combinations,
73
+ # hence the product term.
74
+ # - an attacker would need to try repeat-date-dictionary, dictionary-repeat-date,
75
+ # ..., hence the factorial term.
76
+ # - an attacker would also likely try length-1 (dictionary) and length-2 (dictionary-date)
77
+ # sequences before length-3. assuming at minimum D guesses per pattern type,
78
+ # D^(l-1) approximates Sum(D^i for i in [1..l-1]
79
+
80
+ # ------------------------------------------------------------------------------
81
+ def self.most_guessable_match_sequence(password, matches, _exclude_additive = false)
82
+ n = password.length
83
+ # partition matches into sublists according to ending index j
84
+ matches_by_j = (0...n).map { [] }
85
+ matches.each do |m|
86
+ matches_by_j[m[:j]] << m
87
+ end
88
+
89
+ # small detail: for deterministic output, sort each sublist by i.
90
+ matches_by_j.each do |lst|
91
+ lst.sort_by!{|m| m[:i] }
92
+ end
93
+
94
+ optimal = {
95
+ # optimal.m[k][l] holds final match in the best length-l match sequence covering the
96
+ # password prefix up to k, inclusive.
97
+ # if there is no length-l sequence that scores better (fewer guesses) than
98
+ # a shorter match sequence spanning the same prefix, optimal.m[k][l] is undefined.
99
+ m: (0...n).map { {} },
100
+ # same structure as optimal.m -- holds the product term Prod(m.guesses for m in sequence).
101
+ # optimal.pi allows for fast (non-looping) updates to the minimization function.
102
+ pi: (0...n).map { {} },
103
+ # same structure as optimal.m -- holds the overall metric.
104
+ g: (0...n).map { {} },
105
+ }
106
+
107
+ # helper: considers whether a length-l sequence ending at match m is better (fewer guesses)
108
+ # than previously encountered sequences, updating state if so.
109
+ update = -> (m, l) do
110
+ k = m[:j]
111
+ pi = estimate_guesses(m, password)
112
+ if l > 1
113
+ # we're considering a length-l sequence ending with match m:
114
+ # obtain the product term in the minimization function by multiplying m's guesses
115
+ # by the product of the length-(l-1) sequence ending just before m, at m.i - 1.
116
+ pi *= optimal[:pi][m[:i] - 1][l - 1]
117
+ end
118
+ # calculate the minimization func
119
+ g = factorial(l) * pi
120
+ if !_exclude_additive
121
+ g += MIN_GUESSES_BEFORE_GROWING_SEQUENCE ** (l - 1)
122
+ end
123
+ # update state if new best.
124
+ # first see if any competing sequences covering this prefix, with l or fewer matches,
125
+ # fare better than this sequence. if so, skip it and return.
126
+ optimal[:g][k].each do |competing_l, competing_g|
127
+ if competing_l > l
128
+ next
129
+ end
130
+ if competing_g <= g
131
+ return
132
+ end
133
+ end
134
+ # this sequence might be part of the final optimal sequence.
135
+ optimal[:g][k][l] = g
136
+ optimal[:m][k][l] = m
137
+ optimal[:pi][k][l] = pi
138
+ end
139
+
140
+ # helper: make bruteforce match objects spanning i to j, inclusive.
141
+ make_bruteforce_match = -> (i, j) do
142
+ return {
143
+ pattern: 'bruteforce',
144
+ token: password[i..j],
145
+ i: i,
146
+ j: j
147
+ }
148
+ end
149
+
150
+ # helper: evaluate bruteforce matches ending at k.
151
+ bruteforce_update = -> (k) do
152
+ # see if a single bruteforce match spanning the k-prefix is optimal.
153
+ m = make_bruteforce_match.call(0, k)
154
+ update.call(m, 1)
155
+ (1..k).each do |i|
156
+ # generate k bruteforce matches, spanning from (i=1, j=k) up to (i=k, j=k).
157
+ # see if adding these new matches to any of the sequences in optimal[i-1]
158
+ # leads to new bests.
159
+ m = make_bruteforce_match.call(i, k);
160
+ optimal[:m][i-1].each do |l, last_m|
161
+ l = l.to_i
162
+ # corner: an optimal sequence will never have two adjacent bruteforce matches.
163
+ # it is strictly better to have a single bruteforce match spanning the same region:
164
+ # same contribution to the guess product with a lower length.
165
+ # --> safe to skip those cases.
166
+ if last_m[:pattern] == 'bruteforce'
167
+ next
168
+ end
169
+ # try adding m to this length-l sequence.
170
+ update.call(m, l + 1)
171
+ end
172
+ end
173
+ end
174
+
175
+ # helper: step backwards through optimal.m starting at the end,
176
+ # constructing the final optimal match sequence.
177
+ unwind = -> (n) do
178
+ optimal_match_sequence = []
179
+ k = n - 1
180
+ # find the final best sequence length and score
181
+ l, g = optimal[:g][k].min_by{|candidate_l, candidate_g| candidate_g }
182
+ while k >= 0
183
+ m = optimal[:m][k][l]
184
+ optimal_match_sequence.unshift(m)
185
+ k = m[:i] - 1
186
+ l -= 1
187
+ end
188
+ return optimal_match_sequence
189
+ end
190
+
191
+ (0...n).each do |k|
192
+ matches_by_j[k].each do |m|
193
+ if m[:i] > 0
194
+ optimal[:m][m[:i] - 1].keys.each do |l|
195
+ update.call(m, l + 1)
196
+ end
197
+ else
198
+ update.call(m, 1)
199
+ end
200
+ end
201
+ bruteforce_update.call(k)
202
+ end
203
+
204
+ optimal_match_sequence = unwind.call(n)
205
+ optimal_l = optimal_match_sequence.length
206
+
207
+ # corner: empty password
208
+ if password.length == 0
209
+ guesses = 1
210
+ else
211
+ guesses = optimal[:g][n - 1][optimal_l]
212
+ end
213
+
214
+ # final result object
215
+ return {
216
+ password: password,
217
+ guesses: guesses,
218
+ guesses_log10: Math.log10(guesses),
219
+ sequence: optimal_match_sequence
220
+ }
221
+ end
222
+
223
+ # ------------------------------------------------------------------------------
224
+ # guess estimation -- one function per match pattern ---------------------------
225
+ # ------------------------------------------------------------------------------
226
+ def self.estimate_guesses(match, password)
227
+ if match[:guesses]
228
+ return match[:guesses] # a match's guess estimate doesn't change. cache it.
229
+ end
230
+ min_guesses = 1
231
+ if match[:token].length < password.length
232
+ min_guesses = if match[:token].length == 1
233
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR
234
+ else
235
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR
236
+ end
237
+ end
238
+ estimation_functions = {
239
+ bruteforce: method(:bruteforce_guesses),
240
+ dictionary: method(:dictionary_guesses),
241
+ spatial: method(:spatial_guesses),
242
+ repeat: method(:repeat_guesses),
243
+ sequence: method(:sequence_guesses),
244
+ regex: method(:regex_guesses),
245
+ date: method(:date_guesses),
246
+ }
247
+ guesses = estimation_functions[match[:pattern].to_sym].call(match)
248
+ match[:guesses] = [guesses, min_guesses].max;
249
+ match[:guesses_log10] = Math.log10(match[:guesses])
250
+ return match[:guesses]
251
+ end
252
+
253
+ MAX_VALUE = 2 ** 1024
254
+
255
+ def self.bruteforce_guesses(match)
256
+ guesses = BRUTEFORCE_CARDINALITY ** match[:token].length
257
+ # trying to match JS behaviour here setting a MAX_VALUE to try to acheieve same values as JS library.
258
+ if guesses > MAX_VALUE
259
+ guesses = MAX_VALUE
260
+ end
261
+
262
+ # small detail: make bruteforce matches at minimum one guess bigger than smallest allowed
263
+ # submatch guesses, such that non-bruteforce submatches over the same [i..j] take precedence.
264
+ min_guesses = if match[:token].length == 1
265
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1
266
+ else
267
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1
268
+ end
269
+
270
+ [guesses, min_guesses].max
271
+ end
272
+
273
+ def self.repeat_guesses(match)
274
+ return match[:base_guesses] * match[:repeat_count]
275
+ end
276
+
277
+ def self.sequence_guesses(match)
278
+ first_chr = match[:token][0]
279
+ # lower guesses for obvious starting points
280
+ if ['a', 'A', 'z', 'Z', '0', '1', '9'].include?(first_chr)
281
+ base_guesses = 4
282
+ else
283
+ if first_chr.match?(/\d/)
284
+ base_guesses = 10 # digits
285
+ else
286
+ # could give a higher base for uppercase,
287
+ # assigning 26 to both upper and lower sequences is more conservative.
288
+ base_guesses = 26
289
+ end
290
+ end
291
+ if !match[:ascending]
292
+ # need to try a descending sequence in addition to every ascending sequence ->
293
+ # 2x guesses
294
+ base_guesses *= 2
295
+ end
296
+ return base_guesses * match[:token].length
297
+ end
298
+
299
+ MIN_YEAR_SPACE = 20
300
+ REFERENCE_YEAR = Time.now.year
301
+
302
+ def self.regex_guesses(match)
303
+ char_class_bases = {
304
+ alpha_lower: 26,
305
+ alpha_upper: 26,
306
+ alpha: 52,
307
+ alphanumeric: 62,
308
+ digits: 10,
309
+ symbols: 33
310
+ }
311
+ if char_class_bases.has_key? match[:regex_name]
312
+ return char_class_bases[match[:regex_name]] ** match[:token].length
313
+ elsif match[:regex_name] == 'recent_year'
314
+ # conservative estimate of year space: num years from REFERENCE_YEAR.
315
+ # if year is close to REFERENCE_YEAR, estimate a year space of MIN_YEAR_SPACE.
316
+ year_space = abs(match[:regex_match[0]].to_i - REFERENCE_YEAR).abs
317
+ year_space = [year_space, MIN_YEAR_SPACE].max
318
+ return year_space
319
+ end
320
+ end
321
+
322
+ def self.date_guesses(match)
323
+ # base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
324
+ year_space = [(match[:year] - REFERENCE_YEAR).abs, MIN_YEAR_SPACE].max
325
+ guesses = year_space * 365
326
+ if match[:separator]
327
+ # add factor of 4 for separator selection (one of ~4 choices)
328
+ guesses *= 4
329
+ end
330
+ return guesses
331
+ end
332
+
333
+ KEYBOARD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS[:qwerty])
334
+ # slightly different for keypad/mac keypad, but close enough
335
+ KEYPAD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS[:keypad])
336
+
337
+ KEYBOARD_STARTING_POSITIONS = ADJACENCY_GRAPHS[:qwerty].keys.size
338
+ KEYPAD_STARTING_POSITIONS = ADJACENCY_GRAPHS[:keypad].keys.size
339
+
340
+ def self.spatial_guesses(match)
341
+ if ['qwerty', 'dvorak'].include?(match[:graph])
342
+ s = KEYBOARD_STARTING_POSITIONS;
343
+ d = KEYBOARD_AVERAGE_DEGREE;
344
+ else
345
+ s = KEYPAD_STARTING_POSITIONS;
346
+ d = KEYPAD_AVERAGE_DEGREE;
347
+ end
348
+ guesses = 0
349
+ ll = match[:token].length
350
+ t = match[:turns]
351
+ # estimate the number of possible patterns w/ length ll or less with t turns or less.
352
+ (2..ll).each do |i|
353
+ possible_turns = [t, i - 1].min
354
+ (1..possible_turns).each do |j|
355
+ guesses += nCk(i - 1, j - 1) * s * (d ** j)
356
+ end
357
+ end
358
+ # add extra guesses for shifted keys. (% instead of 5, A instead of a.)
359
+ # math is similar to extra guesses of l33t substitutions in dictionary matches.
360
+ if match[:shifted_count]
361
+ ss = match[:shifted_count]
362
+ uu = match[:token].length - match[:shifted_count] # unshifted count
363
+ if ss == 0 || uu == 0
364
+ guesses *= 2
365
+ else
366
+ shifted_variations = 0
367
+ (1..[ss, uu].min).each do |i|
368
+ shifted_variations += nCk(ss + uu, i)
369
+ end
370
+ guesses *= shifted_variations
371
+ end
372
+ end
373
+ return guesses
374
+ end
375
+
376
+ def self.dictionary_guesses(match)
377
+ match[:base_guesses] = match[:rank] # keep these as properties for display purposes
378
+ match[:uppercase_variations] = uppercase_variations(match)
379
+ match[:l33t_variations] = l33t_variations(match)
380
+ reversed_variations = match[:reversed] && 2 || 1
381
+ return match[:base_guesses] * match[:uppercase_variations] * match[:l33t_variations] * reversed_variations
382
+ end
383
+
384
+ START_UPPER = /^[A-Z][^A-Z]+$/
385
+ END_UPPER = /^[^A-Z]+[A-Z]$/
386
+ ALL_UPPER = /^[^a-z]+$/
387
+ ALL_LOWER = /^[^A-Z]+$/
388
+
389
+ def self.uppercase_variations(match)
390
+ word = match[:token]
391
+ if word.match?(ALL_LOWER) || word.downcase === word
392
+ return 1
393
+ end
394
+ # a capitalized word is the most common capitalization scheme,
395
+ # so it only doubles the search space (uncapitalized + capitalized).
396
+ # allcaps and end-capitalized are common enough too, underestimate as 2x factor to be safe.
397
+ [START_UPPER, END_UPPER, ALL_UPPER].each do |regex|
398
+ if word.match?(regex)
399
+ return 2
400
+ end
401
+ end
402
+ # otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters
403
+ # with U uppercase letters or less. or, if there's more uppercase than lower (for eg. PASSwORD),
404
+ # the number of ways to lowercase U+L letters with L lowercase letters or less.
405
+ uu = word.split("").count{|chr| chr.match?(/[A-Z]/)}
406
+ ll = word.split("").count{|chr| chr.match?(/[a-z]/)}
407
+ variations = 0;
408
+ (1..[uu, ll].min).each do |i|
409
+ variations += nCk(uu + ll, i)
410
+ end
411
+ return variations
412
+ end
413
+
414
+ def self.l33t_variations(match)
415
+ if !match[:l33t]
416
+ return 1
417
+ end
418
+ variations = 1
419
+ match[:sub].each do |subbed, unsubbed|
420
+ # lower-case match.token before calculating: capitalization shouldn't affect l33t calc.
421
+ chrs = match[:token].downcase.split('')
422
+ ss = chrs.count{|chr| chr == subbed }
423
+ uu = chrs.count{|chr| chr == unsubbed }
424
+ if ss === 0 || uu === 0
425
+ # for this sub, password is either fully subbed (444) or fully unsubbed (aaa)
426
+ # treat that as doubling the space (attacker needs to try fully subbed chars in addition to
427
+ # unsubbed.)
428
+ variations *= 2
429
+ else
430
+ # this case is similar to capitalization:
431
+ # with aa44a, uu = 3, ss = 2, attacker needs to try unsubbed + one sub + two subs
432
+ p = [uu, ss].min
433
+ possibilities = 0
434
+ (1..p).each do |i|
435
+ possibilities += nCk(uu + ss, i)
436
+ end
437
+ variations *= possibilities
438
+ end
439
+ end
440
+ return variations
441
+ end
442
+ end
443
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zxcvbn
4
+ module TimeEstimates
5
+ def self.estimate_attack_times(guesses)
6
+ crack_times_seconds = {
7
+ online_throttling_100_per_hour: guesses / (100.0 / 3600.0),
8
+ online_no_throttling_10_per_second: guesses / 10.0,
9
+ offline_slow_hashing_1e4_per_second: guesses / 1e4,
10
+ offline_fast_hashing_1e10_per_second: guesses / 1e10
11
+ }
12
+ crack_times_display = {};
13
+ crack_times_seconds.each do |scenario, seconds|
14
+ crack_times_display[scenario] = display_time(seconds)
15
+ end
16
+
17
+ return {
18
+ crack_times_seconds: crack_times_seconds,
19
+ crack_times_display: crack_times_display,
20
+ score: guesses_to_score(guesses),
21
+ }
22
+ end
23
+
24
+ def self.guesses_to_score(guesses)
25
+ delta = 5;
26
+ if guesses < 1e3 + delta
27
+ # risky password: "too guessable"
28
+ return 0
29
+ elsif guesses < 1e6 + delta
30
+ # modest protection from throttled online attacks: "very guessable"
31
+ return 1
32
+ elsif guesses < 1e8 + delta
33
+ # modest protection from unthrottled online attacks: "somewhat guessable"
34
+ return 2
35
+ elsif guesses < 1e10 + delta
36
+ # modest protection from offline attacks: "safely unguessable"
37
+ # assuming a salted, slow hash function like bcrypt, scrypt, PBKDF2, argon, etc
38
+ return 3
39
+ else
40
+ # strong protection from offline attacks under same scenario: "very unguessable"
41
+ return 4
42
+ end
43
+ end
44
+
45
+ def self.display_time(seconds)
46
+ minute = 60
47
+ hour = minute * 60
48
+ day = hour * 24
49
+ month = day * 31
50
+ year = month * 12
51
+ century = year * 100
52
+ display_num, display_str = if seconds < 1
53
+ [nil, 'less than a second']
54
+ elsif seconds < minute
55
+ base = seconds.round
56
+ [base, "#{base} second"]
57
+ elsif seconds < hour
58
+ base = (seconds / minute).round
59
+ [base, "#{base} minute"]
60
+ elsif seconds < day
61
+ base = (seconds / hour).round
62
+ [base, "#{base} hour"]
63
+ elsif seconds < month
64
+ base = (seconds / day).round
65
+ [base, "#{base} day"]
66
+ elsif seconds < year
67
+ base = (seconds / month).round
68
+ [base, "#{base} month"]
69
+ elsif seconds < century
70
+ base = (seconds / year).round
71
+ [base, "#{base} year"]
72
+ else
73
+ [nil, 'centuries']
74
+ end
75
+ if display_num && display_num != 1
76
+ display_str += 's'
77
+ end
78
+ display_str
79
+ end
80
+ end
81
+ end