zxcvbn-ruby 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -1
  3. data/README.md +322 -75
  4. data/data/frequency_lists/english_wikipedia.txt +30000 -0
  5. data/data/frequency_lists/female_names.txt +11 -114
  6. data/data/frequency_lists/male_names.txt +3 -24
  7. data/data/frequency_lists/passwords.txt +29623 -6764
  8. data/data/frequency_lists/surnames.txt +28 -30611
  9. data/data/frequency_lists/{english.txt → us_tv_and_film.txt} +147 -13532
  10. data/lib/zxcvbn/clock.rb +6 -0
  11. data/lib/zxcvbn/crack_time.rb +52 -18
  12. data/lib/zxcvbn/data.rb +61 -21
  13. data/lib/zxcvbn/dictionary_ranker.rb +10 -0
  14. data/lib/zxcvbn/feedback.rb +11 -6
  15. data/lib/zxcvbn/feedback_giver.rb +75 -50
  16. data/lib/zxcvbn/guesses.rb +208 -0
  17. data/lib/zxcvbn/match.rb +95 -15
  18. data/lib/zxcvbn/match_builder.rb +15 -0
  19. data/lib/zxcvbn/matchers/date.rb +171 -106
  20. data/lib/zxcvbn/matchers/dictionary.rb +15 -8
  21. data/lib/zxcvbn/matchers/digits.rb +6 -1
  22. data/lib/zxcvbn/matchers/l33t.rb +30 -34
  23. data/lib/zxcvbn/matchers/regex_helpers.rb +14 -6
  24. data/lib/zxcvbn/matchers/repeat.rb +47 -16
  25. data/lib/zxcvbn/matchers/sequences.rb +58 -48
  26. data/lib/zxcvbn/matchers/spatial.rb +22 -6
  27. data/lib/zxcvbn/matchers/year.rb +6 -1
  28. data/lib/zxcvbn/math.rb +15 -28
  29. data/lib/zxcvbn/omnimatch.rb +70 -22
  30. data/lib/zxcvbn/ruby.rb +3 -0
  31. data/lib/zxcvbn/score.rb +34 -10
  32. data/lib/zxcvbn/scorer.rb +142 -75
  33. data/lib/zxcvbn/tester.rb +58 -23
  34. data/lib/zxcvbn/tester_builder.rb +83 -0
  35. data/lib/zxcvbn/trie.rb +21 -0
  36. data/lib/zxcvbn/version.rb +1 -1
  37. data/lib/zxcvbn.rb +47 -7
  38. data/sig/zxcvbn/clock.rbs +5 -0
  39. data/sig/zxcvbn/crack_time.rbs +3 -5
  40. data/sig/zxcvbn/data.rbs +17 -8
  41. data/sig/zxcvbn/feedback.rbs +6 -4
  42. data/sig/zxcvbn/guesses.rbs +36 -0
  43. data/sig/zxcvbn/match.rbs +35 -33
  44. data/sig/zxcvbn/match_builder.rbs +36 -0
  45. data/sig/zxcvbn/matchers/date.rbs +23 -0
  46. data/sig/zxcvbn/matchers/dictionary.rbs +21 -0
  47. data/sig/zxcvbn/matchers/digits.rbs +11 -0
  48. data/sig/zxcvbn/matchers/l33t.rbs +27 -0
  49. data/sig/zxcvbn/matchers/regex_helpers.rbs +7 -0
  50. data/sig/zxcvbn/matchers/repeat.rbs +11 -0
  51. data/sig/zxcvbn/matchers/sequences.rbs +16 -0
  52. data/sig/zxcvbn/matchers/spatial.rbs +15 -0
  53. data/sig/zxcvbn/matchers/year.rbs +11 -0
  54. data/sig/zxcvbn/math.rbs +0 -4
  55. data/sig/zxcvbn/omnimatch.rbs +5 -2
  56. data/sig/zxcvbn/score.rbs +22 -11
  57. data/sig/zxcvbn/scorer.rbs +7 -8
  58. data/sig/zxcvbn/tester.rbs +5 -7
  59. data/sig/zxcvbn/tester_builder.rbs +16 -0
  60. data/sig/zxcvbn/trie.rbs +4 -0
  61. data/sig/zxcvbn.rbs +6 -4
  62. metadata +30 -13
  63. data/lib/zxcvbn/entropy.rb +0 -158
  64. data/lib/zxcvbn/matchers/new_l33t.rb +0 -118
  65. data/lib/zxcvbn/password_strength.rb +0 -27
  66. data/sig/zxcvbn/entropy.rbs +0 -33
  67. data/sig/zxcvbn/password_strength.rbs +0 -10
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'zxcvbn/math'
4
+
5
+ module Zxcvbn
6
+ # Mixin that provides guesses estimation for each match pattern.
7
+ #
8
+ # Each pattern-specific method returns a raw guess count; {estimate_guesses}
9
+ # applies a per-token minimum and memoises the result on the match object.
10
+ # Mirrors the guesses estimation logic from zxcvbn.js v4.
11
+ # @api private
12
+ module Guesses
13
+ include Zxcvbn::Math
14
+
15
+ MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10_000
16
+ MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10
17
+ MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50
18
+ BRUTEFORCE_CARDINALITY = 10
19
+ MIN_YEAR_SPACE = 20
20
+
21
+ START_UPPER = /^[A-Z][^A-Z]+$/
22
+ END_UPPER = /^[^A-Z]+[A-Z]$/
23
+ ALL_UPPER = /^[^a-z]+$/
24
+ ALL_LOWER = /^[^A-Z]+$/
25
+
26
+ # Estimate the number of guesses required to crack a match.
27
+ #
28
+ # Mutates the builder in place: sets guesses, guesses_log10, and any
29
+ # pattern-specific fields (base_guesses, uppercase_variations, l33t_variations).
30
+ # Returns immediately if guesses are already set.
31
+ #
32
+ # @param match [MatchBuilder] the builder to estimate
33
+ # @param password [String] the full password being evaluated
34
+ # @return [Numeric] the estimated guess count
35
+ def estimate_guesses(match, password)
36
+ return match.guesses if match.guesses
37
+
38
+ token_length = match.token ? match.token.length : match.j - match.i + 1
39
+ min_guesses =
40
+ if token_length < password.length
41
+ token_length == 1 ? MIN_SUBMATCH_GUESSES_SINGLE_CHAR : MIN_SUBMATCH_GUESSES_MULTI_CHAR
42
+ else
43
+ 1
44
+ end
45
+
46
+ guesses =
47
+ case match.pattern
48
+ in 'bruteforce' then bruteforce_guesses(match)
49
+ in 'dictionary' then dictionary_guesses(match)
50
+ in 'spatial' then spatial_guesses(match)
51
+ in 'repeat' then repeat_guesses(match)
52
+ in 'sequence' then sequence_guesses(match)
53
+ in 'digits' then digits_guesses(match)
54
+ in 'year' then year_guesses(match)
55
+ in 'date' then date_guesses(match)
56
+ else 1
57
+ end
58
+
59
+ match.guesses = [guesses, min_guesses].max
60
+ match.guesses_log10 = ::Math.log10(match.guesses)
61
+ match.guesses
62
+ end
63
+
64
+ # @param match [MatchBuilder] a bruteforce match
65
+ # @return [Numeric] guesses based on token length and assumed cardinality
66
+ def bruteforce_guesses(match)
67
+ length = match.token ? match.token.length : match.j - match.i + 1
68
+ guesses = BRUTEFORCE_CARDINALITY**length.to_f
69
+ guesses = Float::MAX if guesses.infinite?
70
+ min = length == 1 ? MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1.0 : MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1.0
71
+ [guesses, min].max
72
+ end
73
+
74
+ # @param match [MatchBuilder] a sequence match (e.g. "abc", "6543")
75
+ # @return [Integer] guesses based on sequence type and direction
76
+ def sequence_guesses(match)
77
+ first_char = match.token[0]
78
+ base_guesses =
79
+ if %w[a A z Z 0 1 9].include?(first_char)
80
+ 4
81
+ elsif first_char.match?(/\d/)
82
+ 10
83
+ else
84
+ 26
85
+ end
86
+ base_guesses *= 2 unless match.ascending
87
+ base_guesses * match.token.length
88
+ end
89
+
90
+ # @param match [MatchBuilder] a digits match
91
+ # @return [Integer] 10^length (all possible digit strings of that length)
92
+ def digits_guesses(match)
93
+ 10**match.token.length
94
+ end
95
+
96
+ # @param match [MatchBuilder] a year match
97
+ # @return [Integer] distance from the current year, floored at {MIN_YEAR_SPACE}
98
+ def year_guesses(match)
99
+ [(match.token.to_i - reference_year).abs, MIN_YEAR_SPACE].max
100
+ end
101
+
102
+ # @param match [MatchBuilder] a date match with year and separator set
103
+ # @return [Integer] 365 * year_space, multiplied by 4 if a separator is present
104
+ def date_guesses(match)
105
+ year_space = [(match.year - reference_year).abs, MIN_YEAR_SPACE].max
106
+ guesses = 365 * year_space
107
+ guesses *= 4 if match.separator && !match.separator.empty?
108
+ guesses
109
+ end
110
+
111
+ # @param match [MatchBuilder] a spatial (keyboard pattern) match
112
+ # @return [Numeric] guesses based on graph topology, turns, and shifted keys
113
+ def spatial_guesses(match)
114
+ if %w[qwerty dvorak].include?(match.graph)
115
+ s = starting_positions_for_graph('qwerty')
116
+ d = average_degree_for_graph('qwerty')
117
+ else
118
+ s = starting_positions_for_graph('keypad')
119
+ d = average_degree_for_graph('keypad')
120
+ end
121
+
122
+ guesses = 0
123
+ token_length = match.token.length
124
+ turns = match.turns
125
+ (2..token_length).each do |i|
126
+ possible_turns = [turns, i - 1].min
127
+ (1..possible_turns).each do |j|
128
+ guesses += nCk(i - 1, j - 1) * s * (d**j)
129
+ end
130
+ end
131
+
132
+ if match.shifted_count&.positive?
133
+ shifted = match.shifted_count
134
+ unshifted = token_length - match.shifted_count
135
+ if unshifted.zero?
136
+ guesses *= 2
137
+ else
138
+ shift_variations = 0
139
+ (1..[shifted, unshifted].min).each { |i| shift_variations += nCk(shifted + unshifted, i) }
140
+ guesses *= shift_variations
141
+ end
142
+ end
143
+
144
+ guesses
145
+ end
146
+
147
+ # @param match [MatchBuilder] a dictionary match
148
+ # @return [Integer] rank multiplied by uppercase and l33t variation counts,
149
+ # plus a factor of 2 if the word was matched in reverse
150
+ def dictionary_guesses(match)
151
+ match.base_guesses = match.rank
152
+ match.uppercase_variations = uppercase_variations(match)
153
+ match.l33t_variations = l33t_variations(match)
154
+ reversed_multiplier = match.reversed ? 2 : 1
155
+ match.base_guesses * match.uppercase_variations * match.l33t_variations * reversed_multiplier
156
+ end
157
+
158
+ # Count the number of ways the token's capitalisation could have been chosen.
159
+ #
160
+ # Returns 1 for all-lowercase or already-lowercase words. Returns 2 for
161
+ # simple patterns (StartUpper, endUPPER, ALLCAPS). Otherwise returns the
162
+ # sum of combinations for mixed-case tokens.
163
+ #
164
+ # @param match [MatchBuilder] a dictionary match
165
+ # @return [Integer] uppercase variation multiplier
166
+ def uppercase_variations(match)
167
+ word = match.token
168
+ return 1 if word.match?(ALL_LOWER) || word.downcase == word
169
+
170
+ [START_UPPER, END_UPPER, ALL_UPPER].each { |r| return 2 if word.match?(r) }
171
+
172
+ num_upper = word.chars.count { |c| c.match?(/[A-Z]/) }
173
+ num_lower = word.chars.count { |c| c.match?(/[a-z]/) }
174
+ variations = 0
175
+ (1..[num_upper, num_lower].min).each { |i| variations += nCk(num_upper + num_lower, i) }
176
+ variations
177
+ end
178
+
179
+ # Count the number of ways the token's l33t substitutions could have been chosen.
180
+ #
181
+ # Returns 1 if the match has no l33t substitutions. Otherwise multiplies
182
+ # the variation count for each substituted character pair using combinations.
183
+ #
184
+ # @param match [MatchBuilder] a dictionary match, possibly with l33t substitutions
185
+ # @return [Integer] l33t variation multiplier
186
+ def l33t_variations(match)
187
+ return 1 unless match.l33t && match.sub
188
+
189
+ variations = 1
190
+ match.sub.each do |subbed, unsubbed|
191
+ chars = match.token.downcase.chars
192
+ num_subbed = chars.count { |c| c == subbed }
193
+ num_unsubbed = chars.count { |c| c == unsubbed }
194
+ if num_subbed.zero? || num_unsubbed.zero?
195
+ variations *= 2
196
+ else
197
+ p = [num_subbed, num_unsubbed].min
198
+ sub_variations = 0
199
+ (1..p).each { |i| sub_variations += nCk(num_subbed + num_unsubbed, i) }
200
+ variations *= sub_variations
201
+ end
202
+ end
203
+ variations
204
+ end
205
+
206
+ attr_reader :reference_year
207
+ end
208
+ end
data/lib/zxcvbn/match.rb CHANGED
@@ -1,24 +1,104 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zxcvbn
4
- class Match
5
- attr_accessor :pattern, :i, :j, :token, :matched_word, :rank,
6
- :dictionary_name, :reversed, :l33t, :sub, :sub_display,
7
- :l, :entropy, :base_entropy, :uppercase_entropy, :l33t_entropy,
8
- :repeated_char, :sequence_name, :sequence_space, :ascending,
9
- :graph, :turns, :shifted_count, :shiffted_count,
10
- :year, :month, :day, :separator, :cardinality, :offset
4
+ # A substring match found by one of the pattern matchers.
5
+ #
6
+ # All attributes are optional and default to +nil+. Use {#with} to derive a
7
+ # new match with changed attributes.
8
+ #
9
+ # @!attribute [r] pattern
10
+ # @return [String, nil] the matcher that produced this match
11
+ # @!attribute [r] i
12
+ # @return [Integer, nil] start index in the password (inclusive)
13
+ # @!attribute [r] j
14
+ # @return [Integer, nil] end index in the password (inclusive)
15
+ # @!attribute [r] token
16
+ # @return [String, nil] the matched substring
17
+ # @!attribute [r] matched_word
18
+ # @return [String, nil] lowercased dictionary word (dictionary matches)
19
+ # @!attribute [r] rank
20
+ # @return [Integer, nil] frequency rank of the matched word (dictionary matches)
21
+ # @!attribute [r] dictionary_name
22
+ # @return [String, nil] source dictionary name (dictionary matches)
23
+ # @!attribute [r] reversed
24
+ # @return [Boolean, nil] true when matched in the reversed password
25
+ # @!attribute [r] l33t
26
+ # @return [Boolean, nil] true when the match required l33t substitution
27
+ # @!attribute [r] sub
28
+ # @return [Hash, nil] map of l33t characters to their substituted letters
29
+ # @!attribute [r] sub_display
30
+ # @return [String, nil] human-readable substitution summary
31
+ # @!attribute [r] guesses
32
+ # @return [Numeric, nil] estimated guess count
33
+ # @!attribute [r] guesses_log10
34
+ # @return [Float, nil] log10 of {#guesses}
35
+ # @!attribute [r] base_guesses
36
+ # @return [Numeric, nil] guesses for the base token (repeat matches) or
37
+ # rank before variation multipliers (dictionary matches)
38
+ # @!attribute [r] uppercase_variations
39
+ # @return [Numeric, nil] capitalisation variant count (dictionary matches)
40
+ # @!attribute [r] l33t_variations
41
+ # @return [Numeric, nil] l33t substitution variant count (dictionary matches)
42
+ # @!attribute [r] base_token
43
+ # @return [String, nil] the minimal repeating unit (repeat matches)
44
+ # @!attribute [r] repeat_count
45
+ # @return [Integer, nil] number of repetitions (repeat matches)
46
+ # @!attribute [r] sequence_name
47
+ # @return [String, nil] sequence type: "lower", "upper", "digits", or "unicode"
48
+ # @!attribute [r] sequence_space
49
+ # @return [Integer, nil] size of the character set for the sequence
50
+ # @!attribute [r] ascending
51
+ # @return [Boolean, nil] true if the sequence is ascending
52
+ # @!attribute [r] graph
53
+ # @return [String, nil] keyboard graph name (spatial matches)
54
+ # @!attribute [r] turns
55
+ # @return [Integer, nil] number of direction changes (spatial matches)
56
+ # @!attribute [r] shifted_count
57
+ # @return [Integer, nil] number of shifted characters (spatial matches)
58
+ # @!attribute [r] year
59
+ # @return [Integer, nil] matched year (date/year matches)
60
+ # @!attribute [r] month
61
+ # @return [Integer, nil] matched month (date matches)
62
+ # @!attribute [r] day
63
+ # @return [Integer, nil] matched day (date matches)
64
+ # @!attribute [r] separator
65
+ # @return [String, nil] date separator character (date matches)
66
+ Match = ::Data.define(
67
+ :pattern, :i, :j, :token, :matched_word, :rank, :dictionary_name, :reversed,
68
+ :l33t, :sub, :sub_display, :guesses, :guesses_log10, :base_guesses,
69
+ :uppercase_variations, :l33t_variations, :base_token, :repeat_count,
70
+ :sequence_name, :sequence_space, :ascending, :graph, :turns, :shifted_count,
71
+ :year, :month, :day, :separator
72
+ ) do
73
+ def initialize(
74
+ pattern: nil, i: nil, j: nil, token: nil, matched_word: nil, rank: nil,
75
+ dictionary_name: nil, reversed: nil, l33t: nil, sub: nil,
76
+ sub_display: nil, guesses: nil, guesses_log10: nil, base_guesses: nil,
77
+ uppercase_variations: nil, l33t_variations: nil, base_token: nil,
78
+ repeat_count: nil, sequence_name: nil, sequence_space: nil,
79
+ ascending: nil, graph: nil, turns: nil, shifted_count: nil,
80
+ year: nil, month: nil, day: nil, separator: nil
81
+ )
82
+ super
83
+ end
11
84
 
12
- def initialize(**attributes)
13
- attributes.each do |key, value|
14
- instance_variable_set("@#{key}", value)
15
- end
85
+ # @return [String] a human-readable representation omitting nil fields and token
86
+ def inspect
87
+ fields = to_h.reject { |k, v| v.nil? || k == :token }.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
88
+ "#<data #{self.class} #{fields}>"
16
89
  end
17
90
 
18
- def to_hash
19
- instance_variables.sort.each_with_object({}) do |var, hash|
20
- key = var.to_s.delete_prefix('@')
21
- hash[key] = instance_variable_get(var)
91
+ # @param pp [PP] the pretty-printer instance
92
+ # @return [void]
93
+ def pretty_print(pp)
94
+ fields = to_h.reject { |_, v| v.nil? }
95
+ pp.group(1, "#<data #{self.class}", '>') do
96
+ fields.each_with_index do |(k, v), i|
97
+ pp.text(',') if i.positive?
98
+ pp.breakable ' '
99
+ pp.text("#{k}=")
100
+ v.pretty_print(pp)
101
+ end
22
102
  end
23
103
  end
24
104
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'zxcvbn/match'
4
+
5
+ module Zxcvbn
6
+ # Mutable accumulator for match attributes. Matchers populate a builder
7
+ # incrementally; {#build} seals it into an immutable {Match}.
8
+ # @api private
9
+ MatchBuilder = Struct.new(*Match.members, keyword_init: true) do
10
+ # @return [Match] immutable match with the current attribute values
11
+ def build
12
+ Match.new(**to_h.tap { |h| h[:sub]&.freeze })
13
+ end
14
+ end
15
+ end
@@ -1,139 +1,204 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'zxcvbn/matchers/regex_helpers'
4
-
5
3
  module Zxcvbn
6
4
  module Matchers
5
+ # Matches date patterns in passwords, both with and without separators.
6
+ # Ported from the zxcvbn v4 JavaScript implementation's +date_match+ function.
7
+ # @api private
7
8
  class Date
8
- include RegexHelpers
9
-
10
- YEAR_SUFFIX = %r{
11
- ( \d{1,2} ) # day or month
12
- ( \s | - | / | \\ | _ | \. ) # separator
13
- ( \d{1,2} ) # month or day
14
- \2 # same separator
15
- ( 19\d{2} | 200\d | 201\d | \d{2} ) # year
16
- }x.freeze
17
-
18
- YEAR_PREFIX = %r{
19
- ( 19\d{2} | 200\d | 201\d | \d{2} ) # year
20
- ( \s | - | / | \\ | _ | \. ) # separator
21
- ( \d{1,2} ) # day or month
22
- \2 # same separator
23
- ( \d{1,2} ) # month or day
24
- }x.freeze
25
-
26
- WITHOUT_SEPARATOR = /\d{4,8}/.freeze
27
-
28
- def matches(password)
29
- match_with_separator(password) + match_without_separator(password)
9
+ # Matches a separator-based date substring (e.g. "02/12/1997", "97-12-02").
10
+ # The first and last groups each allow 1–4 digits so the year may appear in
11
+ # either position; {map_ints_to_dmy} resolves which group is the year.
12
+ MAYBE_DATE_WITH_SEP = %r{\A(\d{1,4})([\s/\\_.-])(\d{1,2})\2(\d{1,4})\z}
13
+
14
+ # Matches a run of digits that could be a date without separators.
15
+ MAYBE_DATE_WITHOUT_SEP = /\A\d+\z/
16
+
17
+ # Maps token length to split-point pairs for separator-free date parsing.
18
+ # Each pair +[a, b]+ divides the token into three parts: +[0...a]+, +[a...b]+, +[b..]+.
19
+ # Mirrors +DATE_SPLITS+ in the JS v4 source.
20
+ DATE_SPLITS = {
21
+ 4 => [[1, 2], [2, 3]],
22
+ 5 => [[1, 3], [2, 3]],
23
+ 6 => [[1, 2], [2, 4], [4, 5]],
24
+ 7 => [[1, 3], [2, 3], [4, 5], [4, 6]],
25
+ 8 => [[2, 4], [4, 6]]
26
+ }.freeze
27
+
28
+ # Earliest year accepted as a valid date year.
29
+ DATE_MIN_YEAR = 1000
30
+
31
+ # Latest year accepted as a valid date year.
32
+ DATE_MAX_YEAR = 2050
33
+
34
+ # Returns all date matches found in +password+, deduplicating any match
35
+ # whose character span is fully contained within another match's span.
36
+ #
37
+ # @param password [String] the password to search
38
+ # @param reference_year [Integer] year used to pick the closest candidate for
39
+ # separator-free dates; defaults to the current year
40
+ # @return [Array<MatchBuilder>] matches with pattern 'date', each containing
41
+ # +year+, +month+, +day+, and +separator+
42
+ def matches(password, reference_year: Time.now.year)
43
+ all = match_with_separator(password) + match_without_separator(password, reference_year:)
44
+ all.reject do |match|
45
+ all.any? { |other| !other.equal?(match) && other.i <= match.i && other.j >= match.j }
46
+ end
30
47
  end
31
48
 
49
+ # Finds date matches that use a separator character (space, slash, hyphen, etc.).
50
+ # Iterates over all substrings of length 6–10 and tests each against
51
+ # {MAYBE_DATE_WITH_SEP}, then resolves day/month/year via {map_ints_to_dmy}.
52
+ #
53
+ # @param password [String] the password to search
54
+ # @return [Array<MatchBuilder>] separator-based date matches
32
55
  def match_with_separator(password)
33
56
  result = []
34
- re_match_all(YEAR_SUFFIX, password) do |match, re_match|
35
- match.pattern = 'date'
36
- match.separator = re_match[2]
37
- match.year = re_match[4].to_i
38
-
39
- day = re_match[1].to_i
40
- month = re_match[3].to_i
41
-
42
- if month <= 12
43
- match.day = day
44
- match.month = month
45
- else
46
- match.day = month
47
- match.month = day
57
+ return result if password.length < 6
58
+
59
+ (0..(password.length - 6)).each do |i|
60
+ ((i + 5)..[i + 9, password.length - 1].min).each do |j|
61
+ token = password[i..j]
62
+ m = MAYBE_DATE_WITH_SEP.match(token)
63
+ next unless m
64
+
65
+ date = map_ints_to_dmy(m[1].to_i, m[3].to_i, m[4].to_i)
66
+ next unless date
67
+
68
+ result << MatchBuilder.new(
69
+ i:, j:, token:,
70
+ pattern: 'date',
71
+ separator: m[2],
72
+ year: date[:year],
73
+ month: date[:month],
74
+ day: date[:day]
75
+ )
48
76
  end
49
-
50
- result << match if valid_date?(match.day, match.month, match.year)
51
77
  end
52
78
  result
53
79
  end
54
80
 
55
- def match_without_separator(password)
81
+ # Finds date matches in runs of digits that contain no separator character.
82
+ # Iterates over all digit-only substrings of length 4–8, applies {DATE_SPLITS}
83
+ # to generate day/month/year candidates via {map_ints_to_dmy}, and picks the
84
+ # candidate whose year is closest to the current year.
85
+ #
86
+ # 4-digit tokens that look like standalone years (matched by {Year::YEAR_REGEX})
87
+ # are skipped to avoid treating a year token as a date.
88
+ #
89
+ # @param password [String] the password to search
90
+ # @param reference_year [Integer] year used to pick the closest candidate;
91
+ # defaults to the current year
92
+ # @return [Array<MatchBuilder>] separator-free date matches
93
+ def match_without_separator(password, reference_year: Time.now.year)
56
94
  result = []
57
- re_match_all(WITHOUT_SEPARATOR, password) do |match, _re_match|
58
- extract_dates(match.token).each do |candidate|
59
- day = candidate[:day]
60
- month = candidate[:month]
61
- year = candidate[:year]
62
-
63
- match.pattern = 'date'
64
- match.day = day
65
- match.month = month
66
- match.year = year
67
- match.separator = ''
68
- result << match
95
+ return result if password.length < 4
96
+
97
+ (0..(password.length - 4)).each do |i|
98
+ ((i + 3)..[i + 7, password.length - 1].min).each do |j|
99
+ token = password[i..j]
100
+ next unless MAYBE_DATE_WITHOUT_SEP.match?(token)
101
+
102
+ splits = DATE_SPLITS[token.length]
103
+ next unless splits
104
+ next if token.length == 4 && Year::YEAR_REGEX.match?(token)
105
+
106
+ candidates = splits.filter_map do |a, b|
107
+ map_ints_to_dmy(token[0...a].to_i, token[a...b].to_i, token[b..].to_i)
108
+ end
109
+ next if candidates.empty?
110
+
111
+ best = candidates.min_by { |c| (c[:year] - reference_year).abs }
112
+
113
+ result << MatchBuilder.new(
114
+ i:, j:, token:,
115
+ pattern: 'date',
116
+ separator: '',
117
+ year: best[:year],
118
+ month: best[:month],
119
+ day: best[:day]
120
+ )
69
121
  end
70
122
  end
71
123
  result
72
124
  end
73
125
 
74
- def extract_dates(token)
75
- dates = []
76
- date_patterns_for_length(token.length).map do |pattern|
77
- candidate = {
78
- year: +'',
79
- month: +'',
80
- day: +''
81
- }
82
- (0...token.length).each do |i|
83
- candidate[PATTERN_CHAR_TO_SYM[pattern[i]]] << token[i]
84
- end
85
- candidate.each do |component, value|
86
- candidate[component] = value.to_i
87
- end
88
-
89
- candidate[:year] = expand_year(candidate[:year])
90
-
91
- if valid_date?(candidate[:day], candidate[:month], candidate[:year]) && !matches_year?(token)
92
- dates << candidate
93
- end
126
+ # Resolves three integers into a +{year:, month:, day:}+ hash, or +nil+ if
127
+ # no valid assignment exists. Mirrors +map_ints_to_dmy+ in the JS v4 source.
128
+ #
129
+ # The middle value (+int2+) is always treated as the non-year component (it
130
+ # comes from the +\d{1,2}+ capture group in the separator regex, or the
131
+ # middle split in the no-separator path). The outer two values are tried as
132
+ # the year: first +int3+, then +int1+. A value in +[DATE_MIN_YEAR, DATE_MAX_YEAR]+
133
+ # is treated as a 4-digit year (takes priority); otherwise both are tried as
134
+ # 2-digit years via {expand_year}.
135
+ #
136
+ # @param int1 [Integer] first integer (leading digits)
137
+ # @param int2 [Integer] middle integer (always the non-year component)
138
+ # @param int3 [Integer] last integer (trailing digits)
139
+ # @return [Hash, nil] +{year:, month:, day:}+ or +nil+ if no valid date
140
+ def map_ints_to_dmy(int1, int2, int3)
141
+ return nil if int2 > 31 || int2 <= 0
142
+
143
+ [int1, int2, int3].each do |n|
144
+ return nil if n > 99 && n < DATE_MIN_YEAR
145
+ return nil if n > DATE_MAX_YEAR
94
146
  end
95
- dates
96
- end
97
147
 
98
- DATE_PATTERN_FOR_LENGTH = {
99
- 8 => %w[yyyymmdd ddmmyyyy mmddyyyy].freeze,
100
- 7 => %w[yyyymdd yyyymmd ddmyyyy dmmyyyy].freeze,
101
- 6 => %w[yymmdd ddmmyy mmddyy].freeze,
102
- 5 => %w[yymdd yymmd ddmyy dmmyy mmdyy mddyy].freeze,
103
- 4 => %w[yymd dmyy mdyy].freeze
104
- }.freeze
148
+ num_over_thirty_one = [int1, int2, int3].count { |n| n > 31 }
149
+ num_over_twelve = [int1, int2, int3].count { |n| n > 12 }
150
+ num_under_one = [int1, int2, int3].count { |n| n <= 0 }
151
+ return nil if num_over_thirty_one >= 2 || num_over_twelve == 3 || num_under_one >= 2
152
+
153
+ # Try int3 then int1 as the year; 4-digit range takes priority over 2-digit.
154
+ # If a 4-digit candidate is found but day/month are invalid, return nil immediately
155
+ # rather than falling through to the 2-digit pass.
156
+ pairs = [[int3, int1, int2], [int1, int2, int3]]
157
+ four_digit = pairs.find { |yc, _dm1, _dm2| yc.between?(DATE_MIN_YEAR, DATE_MAX_YEAR) }
158
+ if four_digit
159
+ year_candidate, dm1, dm2 = four_digit
160
+ dm = map_ints_to_dm(dm1, dm2)
161
+ return dm ? { year: year_candidate, month: dm[:month], day: dm[:day] } : nil
162
+ end
105
163
 
106
- PATTERN_CHAR_TO_SYM = {
107
- 'y' => :year,
108
- 'm' => :month,
109
- 'd' => :day
110
- }.freeze
164
+ # Fall back to 2-digit year
165
+ pairs.each do |year_candidate, dm1, dm2|
166
+ dm = map_ints_to_dm(dm1, dm2)
167
+ next unless dm
111
168
 
112
- def date_patterns_for_length(length)
113
- DATE_PATTERN_FOR_LENGTH[length] || []
114
- end
115
-
116
- def valid_date?(day, month, year)
117
- return false if day > 31 || month > 12
118
- return false unless year >= 1900 && year <= 2019
169
+ return { year: expand_year(year_candidate), month: dm[:month], day: dm[:day] }
170
+ end
119
171
 
120
- true
172
+ nil
121
173
  end
122
174
 
123
- def matches_year?(token)
124
- token.size == 4 && Year::YEAR_REGEX.match(token)
175
+ # Tries to assign two integers to day and month. Attempts both orderings
176
+ # and returns the first that satisfies +1 ≤ day ≤ 31+ and +1 ≤ month ≤ 12+.
177
+ # Mirrors +map_ints_to_dm+ in the JS v4 source.
178
+ #
179
+ # @param day_val [Integer] candidate day value
180
+ # @param month_val [Integer] candidate month value
181
+ # @return [Hash, nil] +{day:, month:}+ or +nil+ if neither ordering is valid
182
+ def map_ints_to_dm(day_val, month_val)
183
+ [[day_val, month_val], [month_val, day_val]].each do |day, month|
184
+ return { day:, month: } if day.between?(1, 31) && month >= 1 && month <= 12
185
+ end
186
+ nil
125
187
  end
126
188
 
189
+ # Expands a 2-digit year to 4 digits. Values above 99 are returned unchanged.
190
+ # Mirrors +two_to_four_digit_year+ in the JS v4 source.
191
+ #
192
+ # Threshold is strictly +> 50+, matching JS: 50 → 2050, 51 → 1951.
193
+ # Negative values are treated as 1900s (e.g. -5 → 1995) — this is an
194
+ # edge case inherited from the JS implementation.
195
+ #
196
+ # @param year [Integer] the year value to expand
197
+ # @return [Integer] 4-digit year
127
198
  def expand_year(year)
128
- year
129
- # Block dates with 2 digit years for now to be compatible with the JS version
130
- # return year unless year < 100
131
- # now = Time.now.year
132
- # if year <= 19
133
- # year + 2000
134
- # else
135
- # year + 1900
136
- # end
199
+ return year if year > 99
200
+
201
+ year > 50 ? year + 1900 : year + 2000
137
202
  end
138
203
  end
139
204
  end