zxcvbn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,443 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zxcvbn
4
+ module Scoring
5
+ # on qwerty, 'g' has degree 6, being adjacent to 'ftyhbv'. '\' has degree 1.
6
+ # this calculates the average over all keys.
7
+ def self.calc_average_degree(graph)
8
+ average = 0
9
+ graph.each do |key, neighbors|
10
+ average += neighbors.count {|n| n }
11
+ end
12
+ average /= graph.keys.size
13
+ return average
14
+ end
15
+
16
+ BRUTEFORCE_CARDINALITY = 10
17
+
18
+ MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000
19
+
20
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10
21
+
22
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50
23
+
24
+ def self.nCk(n, k)
25
+ # http://blog.plover.com/math/choose.html
26
+ if k > n
27
+ return 0
28
+ end
29
+ if k == 0
30
+ return 1
31
+ end
32
+ r = 1
33
+ (1..k).each do |d|
34
+ r *= n
35
+ r /= d
36
+ n -= 1
37
+ end
38
+ return r
39
+ end
40
+
41
+ def self.factorial(n)
42
+ # unoptimized, called only on small n
43
+ if n < 2
44
+ return 1
45
+ end
46
+ return (2..10).reduce(&:*)
47
+ end
48
+
49
+ # ------------------------------------------------------------------------------
50
+ # search --- most guessable match sequence -------------------------------------
51
+ # ------------------------------------------------------------------------------
52
+
53
+ # takes a sequence of overlapping matches, returns the non-overlapping sequence with
54
+ # minimum guesses. the following is a O(l_max * (n + m)) dynamic programming algorithm
55
+ # for a length-n password with m candidate matches. l_max is the maximum optimal
56
+ # sequence length spanning each prefix of the password. In practice it rarely exceeds 5 and the
57
+ # search terminates rapidly.
58
+
59
+ # the optimal "minimum guesses" sequence is here defined to be the sequence that
60
+ # minimizes the following function:
61
+
62
+ # g = l! * Product(m.guesses for m in sequence) + D^(l - 1)
63
+
64
+ # where l is the length of the sequence.
65
+
66
+ # the factorial term is the number of ways to order l patterns.
67
+
68
+ # the D^(l-1) term is another length penalty, roughly capturing the idea that an
69
+ # attacker will try lower-length sequences first before trying length-l sequences.
70
+
71
+ # for example, consider a sequence that is date-repeat-dictionary.
72
+ # - an attacker would need to try other date-repeat-dictionary combinations,
73
+ # hence the product term.
74
+ # - an attacker would need to try repeat-date-dictionary, dictionary-repeat-date,
75
+ # ..., hence the factorial term.
76
+ # - an attacker would also likely try length-1 (dictionary) and length-2 (dictionary-date)
77
+ # sequences before length-3. assuming at minimum D guesses per pattern type,
78
+ # D^(l-1) approximates Sum(D^i for i in [1..l-1]
79
+
80
+ # ------------------------------------------------------------------------------
81
+ def self.most_guessable_match_sequence(password, matches, _exclude_additive = false)
82
+ n = password.length
83
+ # partition matches into sublists according to ending index j
84
+ matches_by_j = (0...n).map { [] }
85
+ matches.each do |m|
86
+ matches_by_j[m[:j]] << m
87
+ end
88
+
89
+ # small detail: for deterministic output, sort each sublist by i.
90
+ matches_by_j.each do |lst|
91
+ lst.sort_by!{|m| m[:i] }
92
+ end
93
+
94
+ optimal = {
95
+ # optimal.m[k][l] holds final match in the best length-l match sequence covering the
96
+ # password prefix up to k, inclusive.
97
+ # if there is no length-l sequence that scores better (fewer guesses) than
98
+ # a shorter match sequence spanning the same prefix, optimal.m[k][l] is undefined.
99
+ m: (0...n).map { {} },
100
+ # same structure as optimal.m -- holds the product term Prod(m.guesses for m in sequence).
101
+ # optimal.pi allows for fast (non-looping) updates to the minimization function.
102
+ pi: (0...n).map { {} },
103
+ # same structure as optimal.m -- holds the overall metric.
104
+ g: (0...n).map { {} },
105
+ }
106
+
107
+ # helper: considers whether a length-l sequence ending at match m is better (fewer guesses)
108
+ # than previously encountered sequences, updating state if so.
109
+ update = -> (m, l) do
110
+ k = m[:j]
111
+ pi = estimate_guesses(m, password)
112
+ if l > 1
113
+ # we're considering a length-l sequence ending with match m:
114
+ # obtain the product term in the minimization function by multiplying m's guesses
115
+ # by the product of the length-(l-1) sequence ending just before m, at m.i - 1.
116
+ pi *= optimal[:pi][m[:i] - 1][l - 1]
117
+ end
118
+ # calculate the minimization func
119
+ g = factorial(l) * pi
120
+ if !_exclude_additive
121
+ g += MIN_GUESSES_BEFORE_GROWING_SEQUENCE ** (l - 1)
122
+ end
123
+ # update state if new best.
124
+ # first see if any competing sequences covering this prefix, with l or fewer matches,
125
+ # fare better than this sequence. if so, skip it and return.
126
+ optimal[:g][k].each do |competing_l, competing_g|
127
+ if competing_l > l
128
+ next
129
+ end
130
+ if competing_g <= g
131
+ return
132
+ end
133
+ end
134
+ # this sequence might be part of the final optimal sequence.
135
+ optimal[:g][k][l] = g
136
+ optimal[:m][k][l] = m
137
+ optimal[:pi][k][l] = pi
138
+ end
139
+
140
+ # helper: make bruteforce match objects spanning i to j, inclusive.
141
+ make_bruteforce_match = -> (i, j) do
142
+ return {
143
+ pattern: 'bruteforce',
144
+ token: password[i..j],
145
+ i: i,
146
+ j: j
147
+ }
148
+ end
149
+
150
+ # helper: evaluate bruteforce matches ending at k.
151
+ bruteforce_update = -> (k) do
152
+ # see if a single bruteforce match spanning the k-prefix is optimal.
153
+ m = make_bruteforce_match.call(0, k)
154
+ update.call(m, 1)
155
+ (1..k).each do |i|
156
+ # generate k bruteforce matches, spanning from (i=1, j=k) up to (i=k, j=k).
157
+ # see if adding these new matches to any of the sequences in optimal[i-1]
158
+ # leads to new bests.
159
+ m = make_bruteforce_match.call(i, k);
160
+ optimal[:m][i-1].each do |l, last_m|
161
+ l = l.to_i
162
+ # corner: an optimal sequence will never have two adjacent bruteforce matches.
163
+ # it is strictly better to have a single bruteforce match spanning the same region:
164
+ # same contribution to the guess product with a lower length.
165
+ # --> safe to skip those cases.
166
+ if last_m[:pattern] == 'bruteforce'
167
+ next
168
+ end
169
+ # try adding m to this length-l sequence.
170
+ update.call(m, l + 1)
171
+ end
172
+ end
173
+ end
174
+
175
+ # helper: step backwards through optimal.m starting at the end,
176
+ # constructing the final optimal match sequence.
177
+ unwind = -> (n) do
178
+ optimal_match_sequence = []
179
+ k = n - 1
180
+ # find the final best sequence length and score
181
+ l, g = optimal[:g][k].min_by{|candidate_l, candidate_g| candidate_g }
182
+ while k >= 0
183
+ m = optimal[:m][k][l]
184
+ optimal_match_sequence.unshift(m)
185
+ k = m[:i] - 1
186
+ l -= 1
187
+ end
188
+ return optimal_match_sequence
189
+ end
190
+
191
+ (0...n).each do |k|
192
+ matches_by_j[k].each do |m|
193
+ if m[:i] > 0
194
+ optimal[:m][m[:i] - 1].keys.each do |l|
195
+ update.call(m, l + 1)
196
+ end
197
+ else
198
+ update.call(m, 1)
199
+ end
200
+ end
201
+ bruteforce_update.call(k)
202
+ end
203
+
204
+ optimal_match_sequence = unwind.call(n)
205
+ optimal_l = optimal_match_sequence.length
206
+
207
+ # corner: empty password
208
+ if password.length == 0
209
+ guesses = 1
210
+ else
211
+ guesses = optimal[:g][n - 1][optimal_l]
212
+ end
213
+
214
+ # final result object
215
+ return {
216
+ password: password,
217
+ guesses: guesses,
218
+ guesses_log10: Math.log10(guesses),
219
+ sequence: optimal_match_sequence
220
+ }
221
+ end
222
+
223
+ # ------------------------------------------------------------------------------
224
+ # guess estimation -- one function per match pattern ---------------------------
225
+ # ------------------------------------------------------------------------------
226
+ def self.estimate_guesses(match, password)
227
+ if match[:guesses]
228
+ return match[:guesses] # a match's guess estimate doesn't change. cache it.
229
+ end
230
+ min_guesses = 1
231
+ if match[:token].length < password.length
232
+ min_guesses = if match[:token].length == 1
233
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR
234
+ else
235
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR
236
+ end
237
+ end
238
+ estimation_functions = {
239
+ bruteforce: method(:bruteforce_guesses),
240
+ dictionary: method(:dictionary_guesses),
241
+ spatial: method(:spatial_guesses),
242
+ repeat: method(:repeat_guesses),
243
+ sequence: method(:sequence_guesses),
244
+ regex: method(:regex_guesses),
245
+ date: method(:date_guesses),
246
+ }
247
+ guesses = estimation_functions[match[:pattern].to_sym].call(match)
248
+ match[:guesses] = [guesses, min_guesses].max;
249
+ match[:guesses_log10] = Math.log10(match[:guesses])
250
+ return match[:guesses]
251
+ end
252
+
253
+ MAX_VALUE = 2 ** 1024
254
+
255
+ def self.bruteforce_guesses(match)
256
+ guesses = BRUTEFORCE_CARDINALITY ** match[:token].length
257
+ # trying to match JS behaviour here setting a MAX_VALUE to try to acheieve same values as JS library.
258
+ if guesses > MAX_VALUE
259
+ guesses = MAX_VALUE
260
+ end
261
+
262
+ # small detail: make bruteforce matches at minimum one guess bigger than smallest allowed
263
+ # submatch guesses, such that non-bruteforce submatches over the same [i..j] take precedence.
264
+ min_guesses = if match[:token].length == 1
265
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1
266
+ else
267
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1
268
+ end
269
+
270
+ [guesses, min_guesses].max
271
+ end
272
+
273
+ def self.repeat_guesses(match)
274
+ return match[:base_guesses] * match[:repeat_count]
275
+ end
276
+
277
+ def self.sequence_guesses(match)
278
+ first_chr = match[:token][0]
279
+ # lower guesses for obvious starting points
280
+ if ['a', 'A', 'z', 'Z', '0', '1', '9'].include?(first_chr)
281
+ base_guesses = 4
282
+ else
283
+ if first_chr.match?(/\d/)
284
+ base_guesses = 10 # digits
285
+ else
286
+ # could give a higher base for uppercase,
287
+ # assigning 26 to both upper and lower sequences is more conservative.
288
+ base_guesses = 26
289
+ end
290
+ end
291
+ if !match[:ascending]
292
+ # need to try a descending sequence in addition to every ascending sequence ->
293
+ # 2x guesses
294
+ base_guesses *= 2
295
+ end
296
+ return base_guesses * match[:token].length
297
+ end
298
+
299
+ MIN_YEAR_SPACE = 20
300
+ REFERENCE_YEAR = Time.now.year
301
+
302
+ def self.regex_guesses(match)
303
+ char_class_bases = {
304
+ alpha_lower: 26,
305
+ alpha_upper: 26,
306
+ alpha: 52,
307
+ alphanumeric: 62,
308
+ digits: 10,
309
+ symbols: 33
310
+ }
311
+ if char_class_bases.has_key? match[:regex_name]
312
+ return char_class_bases[match[:regex_name]] ** match[:token].length
313
+ elsif match[:regex_name] == 'recent_year'
314
+ # conservative estimate of year space: num years from REFERENCE_YEAR.
315
+ # if year is close to REFERENCE_YEAR, estimate a year space of MIN_YEAR_SPACE.
316
+ year_space = abs(match[:regex_match[0]].to_i - REFERENCE_YEAR).abs
317
+ year_space = [year_space, MIN_YEAR_SPACE].max
318
+ return year_space
319
+ end
320
+ end
321
+
322
+ def self.date_guesses(match)
323
+ # base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
324
+ year_space = [(match[:year] - REFERENCE_YEAR).abs, MIN_YEAR_SPACE].max
325
+ guesses = year_space * 365
326
+ if match[:separator]
327
+ # add factor of 4 for separator selection (one of ~4 choices)
328
+ guesses *= 4
329
+ end
330
+ return guesses
331
+ end
332
+
333
+ KEYBOARD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS[:qwerty])
334
+ # slightly different for keypad/mac keypad, but close enough
335
+ KEYPAD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS[:keypad])
336
+
337
+ KEYBOARD_STARTING_POSITIONS = ADJACENCY_GRAPHS[:qwerty].keys.size
338
+ KEYPAD_STARTING_POSITIONS = ADJACENCY_GRAPHS[:keypad].keys.size
339
+
340
+ def self.spatial_guesses(match)
341
+ if ['qwerty', 'dvorak'].include?(match[:graph])
342
+ s = KEYBOARD_STARTING_POSITIONS;
343
+ d = KEYBOARD_AVERAGE_DEGREE;
344
+ else
345
+ s = KEYPAD_STARTING_POSITIONS;
346
+ d = KEYPAD_AVERAGE_DEGREE;
347
+ end
348
+ guesses = 0
349
+ ll = match[:token].length
350
+ t = match[:turns]
351
+ # estimate the number of possible patterns w/ length ll or less with t turns or less.
352
+ (2..ll).each do |i|
353
+ possible_turns = [t, i - 1].min
354
+ (1..possible_turns).each do |j|
355
+ guesses += nCk(i - 1, j - 1) * s * (d ** j)
356
+ end
357
+ end
358
+ # add extra guesses for shifted keys. (% instead of 5, A instead of a.)
359
+ # math is similar to extra guesses of l33t substitutions in dictionary matches.
360
+ if match[:shifted_count]
361
+ ss = match[:shifted_count]
362
+ uu = match[:token].length - match[:shifted_count] # unshifted count
363
+ if ss == 0 || uu == 0
364
+ guesses *= 2
365
+ else
366
+ shifted_variations = 0
367
+ (1..[ss, uu].min).each do |i|
368
+ shifted_variations += nCk(ss + uu, i)
369
+ end
370
+ guesses *= shifted_variations
371
+ end
372
+ end
373
+ return guesses
374
+ end
375
+
376
+ def self.dictionary_guesses(match)
377
+ match[:base_guesses] = match[:rank] # keep these as properties for display purposes
378
+ match[:uppercase_variations] = uppercase_variations(match)
379
+ match[:l33t_variations] = l33t_variations(match)
380
+ reversed_variations = match[:reversed] && 2 || 1
381
+ return match[:base_guesses] * match[:uppercase_variations] * match[:l33t_variations] * reversed_variations
382
+ end
383
+
384
+ START_UPPER = /^[A-Z][^A-Z]+$/
385
+ END_UPPER = /^[^A-Z]+[A-Z]$/
386
+ ALL_UPPER = /^[^a-z]+$/
387
+ ALL_LOWER = /^[^A-Z]+$/
388
+
389
+ def self.uppercase_variations(match)
390
+ word = match[:token]
391
+ if word.match?(ALL_LOWER) || word.downcase === word
392
+ return 1
393
+ end
394
+ # a capitalized word is the most common capitalization scheme,
395
+ # so it only doubles the search space (uncapitalized + capitalized).
396
+ # allcaps and end-capitalized are common enough too, underestimate as 2x factor to be safe.
397
+ [START_UPPER, END_UPPER, ALL_UPPER].each do |regex|
398
+ if word.match?(regex)
399
+ return 2
400
+ end
401
+ end
402
+ # otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters
403
+ # with U uppercase letters or less. or, if there's more uppercase than lower (for eg. PASSwORD),
404
+ # the number of ways to lowercase U+L letters with L lowercase letters or less.
405
+ uu = word.split("").count{|chr| chr.match?(/[A-Z]/)}
406
+ ll = word.split("").count{|chr| chr.match?(/[a-z]/)}
407
+ variations = 0;
408
+ (1..[uu, ll].min).each do |i|
409
+ variations += nCk(uu + ll, i)
410
+ end
411
+ return variations
412
+ end
413
+
414
+ def self.l33t_variations(match)
415
+ if !match[:l33t]
416
+ return 1
417
+ end
418
+ variations = 1
419
+ match[:sub].each do |subbed, unsubbed|
420
+ # lower-case match.token before calculating: capitalization shouldn't affect l33t calc.
421
+ chrs = match[:token].downcase.split('')
422
+ ss = chrs.count{|chr| chr == subbed }
423
+ uu = chrs.count{|chr| chr == unsubbed }
424
+ if ss === 0 || uu === 0
425
+ # for this sub, password is either fully subbed (444) or fully unsubbed (aaa)
426
+ # treat that as doubling the space (attacker needs to try fully subbed chars in addition to
427
+ # unsubbed.)
428
+ variations *= 2
429
+ else
430
+ # this case is similar to capitalization:
431
+ # with aa44a, uu = 3, ss = 2, attacker needs to try unsubbed + one sub + two subs
432
+ p = [uu, ss].min
433
+ possibilities = 0
434
+ (1..p).each do |i|
435
+ possibilities += nCk(uu + ss, i)
436
+ end
437
+ variations *= possibilities
438
+ end
439
+ end
440
+ return variations
441
+ end
442
+ end
443
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zxcvbn
4
+ module TimeEstimates
5
+ def self.estimate_attack_times(guesses)
6
+ crack_times_seconds = {
7
+ online_throttling_100_per_hour: guesses / (100.0 / 3600.0),
8
+ online_no_throttling_10_per_second: guesses / 10.0,
9
+ offline_slow_hashing_1e4_per_second: guesses / 1e4,
10
+ offline_fast_hashing_1e10_per_second: guesses / 1e10
11
+ }
12
+ crack_times_display = {};
13
+ crack_times_seconds.each do |scenario, seconds|
14
+ crack_times_display[scenario] = display_time(seconds)
15
+ end
16
+
17
+ return {
18
+ crack_times_seconds: crack_times_seconds,
19
+ crack_times_display: crack_times_display,
20
+ score: guesses_to_score(guesses),
21
+ }
22
+ end
23
+
24
+ def self.guesses_to_score(guesses)
25
+ delta = 5;
26
+ if guesses < 1e3 + delta
27
+ # risky password: "too guessable"
28
+ return 0
29
+ elsif guesses < 1e6 + delta
30
+ # modest protection from throttled online attacks: "very guessable"
31
+ return 1
32
+ elsif guesses < 1e8 + delta
33
+ # modest protection from unthrottled online attacks: "somewhat guessable"
34
+ return 2
35
+ elsif guesses < 1e10 + delta
36
+ # modest protection from offline attacks: "safely unguessable"
37
+ # assuming a salted, slow hash function like bcrypt, scrypt, PBKDF2, argon, etc
38
+ return 3
39
+ else
40
+ # strong protection from offline attacks under same scenario: "very unguessable"
41
+ return 4
42
+ end
43
+ end
44
+
45
+ def self.display_time(seconds)
46
+ minute = 60
47
+ hour = minute * 60
48
+ day = hour * 24
49
+ month = day * 31
50
+ year = month * 12
51
+ century = year * 100
52
+ display_num, display_str = if seconds < 1
53
+ [nil, 'less than a second']
54
+ elsif seconds < minute
55
+ base = seconds.round
56
+ [base, "#{base} second"]
57
+ elsif seconds < hour
58
+ base = (seconds / minute).round
59
+ [base, "#{base} minute"]
60
+ elsif seconds < day
61
+ base = (seconds / hour).round
62
+ [base, "#{base} hour"]
63
+ elsif seconds < month
64
+ base = (seconds / day).round
65
+ [base, "#{base} day"]
66
+ elsif seconds < year
67
+ base = (seconds / month).round
68
+ [base, "#{base} month"]
69
+ elsif seconds < century
70
+ base = (seconds / year).round
71
+ [base, "#{base} year"]
72
+ else
73
+ [nil, 'centuries']
74
+ end
75
+ if display_num && display_num != 1
76
+ display_str += 's'
77
+ end
78
+ display_str
79
+ end
80
+ end
81
+ end