spellr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +186 -0
  5. data/.ruby-version +1 -0
  6. data/.spellr.yml +23 -0
  7. data/.spellr_wordlists/dictionary.txt +120 -0
  8. data/.spellr_wordlists/english.txt +3 -0
  9. data/.spellr_wordlists/lorem.txt +4 -0
  10. data/.spellr_wordlists/ruby.txt +2 -0
  11. data/.travis.yml +7 -0
  12. data/Gemfile +8 -0
  13. data/Gemfile.lock +67 -0
  14. data/LICENSE.txt +21 -0
  15. data/README.md +64 -0
  16. data/Rakefile +8 -0
  17. data/bin/console +8 -0
  18. data/bin/fetch_wordlist/english +65 -0
  19. data/bin/fetch_wordlist/ruby +150 -0
  20. data/bin/setup +3 -0
  21. data/exe/spellr +5 -0
  22. data/lib/.spellr.yml +93 -0
  23. data/lib/spellr.rb +26 -0
  24. data/lib/spellr/check.rb +56 -0
  25. data/lib/spellr/cli.rb +205 -0
  26. data/lib/spellr/column_location.rb +49 -0
  27. data/lib/spellr/config.rb +105 -0
  28. data/lib/spellr/file.rb +27 -0
  29. data/lib/spellr/file_list.rb +45 -0
  30. data/lib/spellr/interactive.rb +191 -0
  31. data/lib/spellr/language.rb +104 -0
  32. data/lib/spellr/line_location.rb +29 -0
  33. data/lib/spellr/line_tokenizer.rb +181 -0
  34. data/lib/spellr/reporter.rb +27 -0
  35. data/lib/spellr/string_format.rb +43 -0
  36. data/lib/spellr/token.rb +83 -0
  37. data/lib/spellr/tokenizer.rb +72 -0
  38. data/lib/spellr/version.rb +5 -0
  39. data/lib/spellr/wordlist.rb +100 -0
  40. data/lib/spellr/wordlist_reporter.rb +21 -0
  41. data/spellr.gemspec +35 -0
  42. data/wordlist +2 -0
  43. data/wordlists/dockerfile.txt +21 -0
  44. data/wordlists/html.txt +340 -0
  45. data/wordlists/javascript.txt +64 -0
  46. data/wordlists/ruby.txt +2344 -0
  47. data/wordlists/shell.txt +2 -0
  48. metadata +217 -0
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'io/console'
4
+ require 'readline'
5
+ require_relative '../spellr'
6
+ require_relative 'reporter'
7
+ require_relative 'string_format'
8
+
9
+ module Spellr
10
+ class Interactive # rubocop:disable Metrics/ClassLength
11
+ include Spellr::StringFormat
12
+
13
+ attr_reader :global_replacements, :global_skips
14
+ attr_reader :global_insensitive_replacements
15
+ attr_reader :global_insensitive_skips
16
+ attr_accessor :total_skipped
17
+ attr_accessor :total_fixed
18
+ attr_accessor :total_added
19
+
20
+ def finish(checked) # rubocop:disable Metrics/AbcSize
21
+ puts "\n"
22
+ puts "#{pluralize 'file', checked} checked"
23
+ puts "#{pluralize 'error', total} found"
24
+ puts "#{pluralize 'error', total_skipped} skipped" if total_skipped.positive?
25
+ puts "#{pluralize 'error', total_fixed} fixed" if total_fixed.positive?
26
+ puts "#{pluralize 'word', total_added} added" if total_added.positive?
27
+ end
28
+
29
+ def total
30
+ total_skipped + total_fixed + total_added
31
+ end
32
+
33
+ def initialize
34
+ @global_replacements = {}
35
+ @global_insensitive_replacements = {}
36
+ @global_skips = []
37
+ @global_insensitive_skips = []
38
+ @total_skipped = 0
39
+ @total_fixed = 0
40
+ @total_added = 0
41
+ end
42
+
43
+ def call(token)
44
+ return if attempt_global_replacement(token)
45
+ return if attempt_global_skip(token)
46
+
47
+ Spellr::Reporter.new.call(token)
48
+
49
+ prompt(token)
50
+ end
51
+
52
+ def prompt(token)
53
+ print bold('[a,s,S,r,R,e,?]')
54
+
55
+ handle_response(token)
56
+ rescue Interrupt
57
+ puts '^C again to exit'
58
+ end
59
+
60
+ def attempt_global_skip(token)
61
+ return unless global_skips.include?(token.to_s) ||
62
+ global_insensitive_skips.include?(token.normalize)
63
+
64
+ self.total_skipped += 1
65
+ end
66
+
67
+ def attempt_global_replacement(token)
68
+ global_replacement = global_replacements[token.to_s]
69
+ global_replacement ||= global_insensitive_replacements[token.normalize]
70
+ return unless global_replacement
71
+
72
+ token.replace(global_replacement)
73
+ self.total_fixed += 1
74
+ raise Spellr::DidReplacement, token
75
+ end
76
+
77
+ def clear_current_line
78
+ print "\r\e[K"
79
+ end
80
+
81
+ def handle_response(token) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
82
+ task = STDIN.getch
83
+ clear_current_line
84
+
85
+ case task
86
+ when "\u0003" # ctrl c
87
+ exit 1
88
+ when 'a'
89
+ handle_add(token)
90
+ when 's', "\u0004" # ctrl d
91
+ handle_skip(token)
92
+ when 'S'
93
+ handle_skip(token) { |skip_token| global_skips << skip_token.to_s }
94
+ when 'i'
95
+ handle_skip(token) { |skip_token| global_insensitive_skips << skip_token.downcase }
96
+ when 'R'
97
+ handle_replacement(token) { |replacement| global_replacements[token.to_s] = replacement }
98
+ when 'I'
99
+ handle_replacement(token) { |replacement| global_insensitive_replacements[token.normalize] = replacement }
100
+ when 'r'
101
+ handle_replacement(token)
102
+ when 'e'
103
+ handle_replace_line(token)
104
+ when '?'
105
+ handle_help(token)
106
+ else
107
+ clear_current_line
108
+ call(token)
109
+ end
110
+ end
111
+
112
+ def handle_skip(token)
113
+ self.total_skipped += 1
114
+ yield token if block_given?
115
+ end
116
+
117
+ # TODO: handle more than 16 options
118
+ def handle_add(token) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
119
+ puts "Add #{red(token)} to wordlist:"
120
+ wordlists = Spellr.config.languages_for(token.location.file).flat_map(&:addable_wordlists)
121
+
122
+ wordlists.each_with_index do |wordlist, i|
123
+ puts "[#{i.to_s(16)}] #{wordlist.name}"
124
+ end
125
+ choice = STDIN.getch
126
+ clear_current_line
127
+ case choice
128
+ when "\u0003" # ctrl c
129
+ puts '^C again to exit'
130
+ call(token)
131
+ when /\h/
132
+ wl = wordlists[choice.to_i(16)]
133
+ return handle_add(token) unless wl
134
+
135
+ wl.add(token)
136
+ self.total_added += 1
137
+ raise Spellr::DidAdd, token
138
+ else
139
+ handle_add(token)
140
+ end
141
+ end
142
+
143
+ def handle_replacement(token, original_token: token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
144
+ readline_editable_print(token.chomp)
145
+ highlighted_token = token == original_token ? red(token) : token.highlight(original_token.char_range)
146
+ prompt = "#{aqua '>>'} #{highlighted_token.chomp}\n#{aqua '=>'} "
147
+ replacement = Readline.readline(prompt)
148
+ if replacement.empty?
149
+ call(token)
150
+ else
151
+ full_replacement = token == original_token ? replacement : replacement + "\n"
152
+ token.replace(full_replacement)
153
+ yield replacement if block_given?
154
+ self.total_fixed += 1
155
+ raise Spellr::DidReplacement, token
156
+ end
157
+ rescue Interrupt
158
+ puts '^C again to exit'
159
+ call(original_token)
160
+ end
161
+
162
+ def handle_replace_line(token)
163
+ handle_replacement(
164
+ token.line,
165
+ original_token: token
166
+ )
167
+ end
168
+
169
+ def handle_help(token) # rubocop:disable Metrics/AbcSize
170
+ puts "#{bold '[r]'} Replace #{red token}"
171
+ puts "#{bold '[R]'} Replace all future instances of #{red token}"
172
+ puts "#{bold '[s]'} Skip #{red token}"
173
+ puts "#{bold '[S]'} Skip all future instances of #{red token}"
174
+ puts "#{bold '[a]'} Add #{red token} to a word list"
175
+ puts "#{bold '[e]'} Edit the whole line"
176
+ puts "#{bold '[?]'} Show this help"
177
+ handle_response(token)
178
+ end
179
+
180
+ def readline_editable_print(string)
181
+ Readline.pre_input_hook = lambda {
182
+ Readline.refresh_line
183
+ Readline.insert_text string.to_s
184
+ Readline.redisplay
185
+
186
+ # Remove the hook right away.
187
+ Readline.pre_input_hook = nil
188
+ }
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'wordlist'
4
+
5
+ module Spellr
6
+ class Language
7
+ attr_reader :name
8
+
9
+ def initialize(name, # rubocop:disable Metrics/ParameterLists
10
+ wordlists: [],
11
+ generate: nil,
12
+ only: [],
13
+ description: '',
14
+ hashbangs: [])
15
+ @name = name
16
+ @description = description
17
+ @generate = generate
18
+ @wordlist_paths = wordlists
19
+ @only = only
20
+ @hashbangs = hashbangs
21
+ end
22
+
23
+ def matches?(file)
24
+ return true if @only.empty?
25
+
26
+ file = Spellr::File.wrap(file)
27
+ return true if @only.any? { |o| file.fnmatch?(o) }
28
+ return true if file.hashbang && @hashbangs.any? { |h| file.hashbang.include?(h) }
29
+ end
30
+
31
+ def config_wordlists
32
+ @config_wordlists ||= @wordlist_paths.map(&Spellr::Wordlist.method(:new))
33
+ end
34
+
35
+ def all_wordlist_paths
36
+ @wordlist_paths + default_wordlists.map(&:path)
37
+ end
38
+
39
+ def wordlists
40
+ w = config_wordlists + default_wordlists.select(&:exist?)
41
+ return generate_wordlist if w.empty?
42
+
43
+ w
44
+ end
45
+
46
+ def generate_wordlist
47
+ return [] unless generate
48
+
49
+ require_relative 'cli'
50
+ require 'shellwords'
51
+ warn "Generating wordlist for #{name}"
52
+
53
+ Spellr::CLI.new(generate.shellsplit)
54
+
55
+ config_wordlists + default_wordlists
56
+ end
57
+
58
+ def addable_wordlists
59
+ ((config_wordlists - default_wordlists) + [project_wordlist]).uniq(&:path)
60
+ end
61
+
62
+ def gem_wordlist
63
+ @gem_wordlist ||= Spellr::Wordlist.new(
64
+ Pathname.new(__dir__).parent.parent.join('wordlists', "#{name}.txt")
65
+ )
66
+ end
67
+
68
+ def project_wordlist
69
+ @project_wordlist ||= Spellr::Wordlist.new(
70
+ Pathname.pwd.join('.spellr_wordlists', "#{name}.txt"),
71
+ name: name
72
+ )
73
+ end
74
+
75
+ def generated_project_wordlist
76
+ @generated_project_wordlist ||= Spellr::Wordlist.new(
77
+ Pathname.pwd.join('.spellr_wordlists', 'generated', "#{name}.txt")
78
+ )
79
+ end
80
+
81
+ private
82
+
83
+ attr_reader :generate
84
+
85
+ def load_wordlists(name, paths, _generate)
86
+ wordlists = paths + default_wordlist_paths(name)
87
+
88
+ wordlists.map(&Spellr::Wordlist.method(:new))
89
+ end
90
+
91
+ def custom_addable_wordlists(wordlists)
92
+ default_paths = default_wordlist_paths
93
+ wordlists.map { |w| Spellr::Wordlist.new(w) }.reject { |w| default_paths.include?(w.path) }
94
+ end
95
+
96
+ def default_wordlists
97
+ [
98
+ gem_wordlist,
99
+ generated_project_wordlist,
100
+ project_wordlist
101
+ ]
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spellr
4
+ class LineLocation
5
+ attr_reader :file
6
+ attr_reader :line_number
7
+ attr_reader :char_offset
8
+ attr_reader :byte_offset
9
+
10
+ def initialize(file = '[String]', line_number = 1, char_offset: 0, byte_offset: 0)
11
+ @file = file
12
+ @line_number = line_number
13
+ @char_offset = char_offset
14
+ @byte_offset = byte_offset
15
+ end
16
+
17
+ def to_s
18
+ "#{relative_file_name}:#{line_number}"
19
+ end
20
+
21
+ def file_name
22
+ file.respond_to?(:to_path) ? file.to_path : file
23
+ end
24
+
25
+ def relative_file_name
26
+ Pathname.new(file_name).relative_path_from(Pathname.pwd)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+ require_relative '../spellr'
5
+ require_relative 'column_location'
6
+ require_relative 'token'
7
+
8
+ module Spellr
9
+ class LineTokenizer < StringScanner # rubocop:disable Metrics/ClassLength
10
+ attr_reader :line
11
+ attr_accessor :disabled
12
+ alias_method :disabled?, :disabled
13
+ attr_accessor :skip_uri
14
+ alias_method :skip_uri?, :skip_uri
15
+ attr_accessor :skip_key
16
+ alias_method :skip_key?, :skip_key
17
+
18
+ def initialize(*line, skip_uri: true, skip_key: true)
19
+ @line = Spellr::Token.wrap(line.first)
20
+ @skip_uri = skip_uri
21
+ @skip_key = skip_key
22
+
23
+ super(@line.to_s)
24
+ end
25
+
26
+ def string=(line)
27
+ @line = Token.wrap(line)
28
+ super(@line.to_s)
29
+ end
30
+
31
+ def each_term
32
+ until eos?
33
+ term = next_term
34
+ next unless term
35
+ next if disabled?
36
+
37
+ yield term
38
+ end
39
+ end
40
+
41
+ def each_token
42
+ until eos?
43
+ term = next_term
44
+ next unless term
45
+ next if disabled?
46
+
47
+ yield Token.new(term, line: line, location: column_location(term))
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def column_location(term)
54
+ ColumnLocation.new(
55
+ byte_offset: pos - term.bytesize,
56
+ char_offset: charpos - term.length,
57
+ line_location: line.location.line_location
58
+ )
59
+ end
60
+
61
+ def skip_nonwords_and_flags
62
+ skip_nonwords || skip_and_track_enable || skip_and_track_disable
63
+ end
64
+
65
+ def next_term
66
+ return if eos?
67
+
68
+ (skip_nonwords_and_flags && next_term) || scan_term || next_term
69
+ end
70
+
71
+ def scan_term
72
+ term = title_case || lower_case || upper_case || other_case
73
+
74
+ return term if term && term.length >= Spellr.config.word_minimum_length
75
+ end
76
+
77
+ NOT_EVEN_NON_WORDS_RE = %r{[^[:alpha:]/%#0-9\\]+}.freeze # everything not covered by more specific skips/scans
78
+ LEFTOVER_NON_WORD_BITS_RE = %r{[/%#0-9\\]}.freeze # e.g. a / not starting //a-url.com
79
+ HEX_RE = /(?:#(?:\h{6}|\h{3})|0x\h+)(?![[:alpha:]])/.freeze
80
+ SHELL_COLOR_ESCAPE_RE = /\\(e|033)\[\d+(;\d+)*m/.freeze
81
+ BACKSLASH_ESCAPE_RE = /\\[a-zA-Z]/.freeze # TODO: hex escapes e.g. \xAA. TODO: language aware escapes
82
+ REPEATED_SINGLE_LETTERS_RE = /(?:([[:alpha:]])\1+)(?![[:alpha:]])/.freeze # e.g. xxxxxxxx (it's not a word)
83
+ # https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding
84
+ # Only the necessary percent encoding that actually ends in letters
85
+ # URL_ENCODED_ENTITIES_RE = /%(3A|2F|3F|5B|5D|%2A|%2B|%2C|%3B|%3D)/i.freeze
86
+ URL_ENCODED_ENTITIES_RE = /%[0-8A-F]{2}/.freeze
87
+ # There's got to be a better way of writing this
88
+ SEQUENTIAL_LETTERS_RE = /a(b(c(d(e(f(g(h(i(j(k(l(m(n(o(p(q(r(s(t(u(v(w(x(y(z)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?(?![[:alpha:]])/i.freeze # rubocop:disable Metrics/LineLength
89
+
90
+ def skip_nonwords # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
91
+ skip(NOT_EVEN_NON_WORDS_RE) ||
92
+ skip_uri_heuristically ||
93
+ skip_key_heuristically ||
94
+ skip(HEX_RE) ||
95
+ skip(URL_ENCODED_ENTITIES_RE) ||
96
+ skip(SHELL_COLOR_ESCAPE_RE) ||
97
+ skip(BACKSLASH_ESCAPE_RE) ||
98
+ skip(LEFTOVER_NON_WORD_BITS_RE) ||
99
+ skip(REPEATED_SINGLE_LETTERS_RE) ||
100
+ skip(SEQUENTIAL_LETTERS_RE)
101
+ end
102
+
103
+ # I didn't want to do this myself. BUT i need something to heuristically match on, and it's difficult
104
+ URL_RE = %r{
105
+ (?<scheme>//|https?://|s?ftp://|mailto:)?
106
+ (?<userinfo>[[:alnum:]]+(?::[[:alnum:]]+)?@)?
107
+ (?<hostname>(?:[[:alnum:]-]+(?:\\?\.[[:alnum:]-]+)+|localhost|\d{1,3}(?:.\d{1,3}){3}))
108
+ (?<port>:\d+)?
109
+ (?<path>/(?:[[:alnum:]=!$&\-/._\\]|%\h{2})+)?
110
+ (?<query>\?(?:[[:alnum:]=!$\-/.\\]|%\h{2})+(?:&(?:[[:alnum:]=!$\-/.\\]|%\h{2})+)*)?
111
+ (?<fragment>\#(?:[[:alnum:]=!$&\-/.\\]|%\h{2})+)?
112
+ }x.freeze
113
+ # unfortunately i have to match this regex a couple times because stringscanner doesn't give me matchdata
114
+ def skip_uri_heuristically
115
+ return unless skip_uri?
116
+ return unless match?(URL_RE)
117
+
118
+ captures = URL_RE.match(matched).named_captures
119
+ skip(URL_RE) if captures['scheme'] || captures['userinfo'] || captures['path']
120
+ end
121
+
122
+ # url unsafe base64 or url safe base64
123
+ # TODO: character distribution heuristic
124
+ KEY_FULL_RE = %r{([A-Za-z\d+/]|[A-Za-z\d\-_])+[=.]*}.freeze
125
+ KEY_RE = %r{
126
+ (?:
127
+ [A-Za-z\-_+/=]+|
128
+ [\d\-_+/=]+
129
+ )
130
+ }x.freeze
131
+ def skip_key_heuristically
132
+ return unless skip_key?
133
+ return unless match?(KEY_FULL_RE)
134
+
135
+ # can't use regular captures because repeated capture groups don't
136
+ matches = matched.scan(KEY_RE)
137
+ return unless matches.length >= 3 # number chosen arbitrarily
138
+
139
+ skip(KEY_FULL_RE)
140
+ end
141
+
142
+ # jump to character-aware position
143
+ def charpos=(new_charpos)
144
+ skip(/.{#{new_charpos - charpos}}/m)
145
+ end
146
+
147
+ # [Word], [Word]Word [Word]'s [Wordn't]
148
+ TITLE_CASE_RE = /[[:upper:]][[:lower:]]+(?:['’][[:lower:]]+(?<!['’]s))*/.freeze
149
+ def title_case
150
+ scan(TITLE_CASE_RE)
151
+ end
152
+
153
+ # [word] [word]'s [wordn't]
154
+ LOWER_CASE_RE = /[[:lower:]]+(?:['’][[:lower:]]+(?<!['’]s))*/.freeze
155
+ def lower_case
156
+ scan(LOWER_CASE_RE)
157
+ end
158
+
159
+ # [WORD] [WORD]Word [WORDN'T] [WORD]'S [WORD]'s [WORD]s
160
+ UPPER_CASE_RE = /[[:upper:]]+(?:['’][[:upper:]]+(?<!['’][Ss]))*((?![[:lower:]])|(?=s(?![[:lower:]])))/.freeze
161
+ def upper_case
162
+ scan(UPPER_CASE_RE)
163
+ end
164
+
165
+ # for characters in [:alpha:] that aren't in [:lower:] or [:upper:] e.g. Arabic
166
+ OTHER_CASE_RE = /[[:alpha:]]+/.freeze
167
+ def other_case
168
+ scan(OTHER_CASE_RE)
169
+ end
170
+
171
+ SPELLR_DISABLE_RE = /spellr:disable/.freeze
172
+ def skip_and_track_disable
173
+ skip(SPELLR_DISABLE_RE) && self.disabled = true
174
+ end
175
+
176
+ SPELLR_ENABLE_RE = /spellr:enable/.freeze
177
+ def skip_and_track_enable
178
+ skip(SPELLR_ENABLE_RE) && self.disabled = false
179
+ end
180
+ end
181
+ end