spellr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +186 -0
  5. data/.ruby-version +1 -0
  6. data/.spellr.yml +23 -0
  7. data/.spellr_wordlists/dictionary.txt +120 -0
  8. data/.spellr_wordlists/english.txt +3 -0
  9. data/.spellr_wordlists/lorem.txt +4 -0
  10. data/.spellr_wordlists/ruby.txt +2 -0
  11. data/.travis.yml +7 -0
  12. data/Gemfile +8 -0
  13. data/Gemfile.lock +67 -0
  14. data/LICENSE.txt +21 -0
  15. data/README.md +64 -0
  16. data/Rakefile +8 -0
  17. data/bin/console +8 -0
  18. data/bin/fetch_wordlist/english +65 -0
  19. data/bin/fetch_wordlist/ruby +150 -0
  20. data/bin/setup +3 -0
  21. data/exe/spellr +5 -0
  22. data/lib/.spellr.yml +93 -0
  23. data/lib/spellr.rb +26 -0
  24. data/lib/spellr/check.rb +56 -0
  25. data/lib/spellr/cli.rb +205 -0
  26. data/lib/spellr/column_location.rb +49 -0
  27. data/lib/spellr/config.rb +105 -0
  28. data/lib/spellr/file.rb +27 -0
  29. data/lib/spellr/file_list.rb +45 -0
  30. data/lib/spellr/interactive.rb +191 -0
  31. data/lib/spellr/language.rb +104 -0
  32. data/lib/spellr/line_location.rb +29 -0
  33. data/lib/spellr/line_tokenizer.rb +181 -0
  34. data/lib/spellr/reporter.rb +27 -0
  35. data/lib/spellr/string_format.rb +43 -0
  36. data/lib/spellr/token.rb +83 -0
  37. data/lib/spellr/tokenizer.rb +72 -0
  38. data/lib/spellr/version.rb +5 -0
  39. data/lib/spellr/wordlist.rb +100 -0
  40. data/lib/spellr/wordlist_reporter.rb +21 -0
  41. data/spellr.gemspec +35 -0
  42. data/wordlist +2 -0
  43. data/wordlists/dockerfile.txt +21 -0
  44. data/wordlists/html.txt +340 -0
  45. data/wordlists/javascript.txt +64 -0
  46. data/wordlists/ruby.txt +2344 -0
  47. data/wordlists/shell.txt +2 -0
  48. metadata +217 -0
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'io/console'
4
+ require 'readline'
5
+ require_relative '../spellr'
6
+ require_relative 'reporter'
7
+ require_relative 'string_format'
8
+
9
+ module Spellr
10
+ class Interactive # rubocop:disable Metrics/ClassLength
11
+ include Spellr::StringFormat
12
+
13
+ attr_reader :global_replacements, :global_skips
14
+ attr_reader :global_insensitive_replacements
15
+ attr_reader :global_insensitive_skips
16
+ attr_accessor :total_skipped
17
+ attr_accessor :total_fixed
18
+ attr_accessor :total_added
19
+
20
+ def finish(checked) # rubocop:disable Metrics/AbcSize
21
+ puts "\n"
22
+ puts "#{pluralize 'file', checked} checked"
23
+ puts "#{pluralize 'error', total} found"
24
+ puts "#{pluralize 'error', total_skipped} skipped" if total_skipped.positive?
25
+ puts "#{pluralize 'error', total_fixed} fixed" if total_fixed.positive?
26
+ puts "#{pluralize 'word', total_added} added" if total_added.positive?
27
+ end
28
+
29
+ def total
30
+ total_skipped + total_fixed + total_added
31
+ end
32
+
33
+ def initialize
34
+ @global_replacements = {}
35
+ @global_insensitive_replacements = {}
36
+ @global_skips = []
37
+ @global_insensitive_skips = []
38
+ @total_skipped = 0
39
+ @total_fixed = 0
40
+ @total_added = 0
41
+ end
42
+
43
+ def call(token)
44
+ return if attempt_global_replacement(token)
45
+ return if attempt_global_skip(token)
46
+
47
+ Spellr::Reporter.new.call(token)
48
+
49
+ prompt(token)
50
+ end
51
+
52
+ def prompt(token)
53
+ print bold('[a,s,S,r,R,e,?]')
54
+
55
+ handle_response(token)
56
+ rescue Interrupt
57
+ puts '^C again to exit'
58
+ end
59
+
60
+ def attempt_global_skip(token)
61
+ return unless global_skips.include?(token.to_s) ||
62
+ global_insensitive_skips.include?(token.normalize)
63
+
64
+ self.total_skipped += 1
65
+ end
66
+
67
+ def attempt_global_replacement(token)
68
+ global_replacement = global_replacements[token.to_s]
69
+ global_replacement ||= global_insensitive_replacements[token.normalize]
70
+ return unless global_replacement
71
+
72
+ token.replace(global_replacement)
73
+ self.total_fixed += 1
74
+ raise Spellr::DidReplacement, token
75
+ end
76
+
77
+ def clear_current_line
78
+ print "\r\e[K"
79
+ end
80
+
81
+ def handle_response(token) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
82
+ task = STDIN.getch
83
+ clear_current_line
84
+
85
+ case task
86
+ when "\u0003" # ctrl c
87
+ exit 1
88
+ when 'a'
89
+ handle_add(token)
90
+ when 's', "\u0004" # ctrl d
91
+ handle_skip(token)
92
+ when 'S'
93
+ handle_skip(token) { |skip_token| global_skips << skip_token.to_s }
94
+ when 'i'
95
+ handle_skip(token) { |skip_token| global_insensitive_skips << skip_token.downcase }
96
+ when 'R'
97
+ handle_replacement(token) { |replacement| global_replacements[token.to_s] = replacement }
98
+ when 'I'
99
+ handle_replacement(token) { |replacement| global_insensitive_replacements[token.normalize] = replacement }
100
+ when 'r'
101
+ handle_replacement(token)
102
+ when 'e'
103
+ handle_replace_line(token)
104
+ when '?'
105
+ handle_help(token)
106
+ else
107
+ clear_current_line
108
+ call(token)
109
+ end
110
+ end
111
+
112
+ def handle_skip(token)
113
+ self.total_skipped += 1
114
+ yield token if block_given?
115
+ end
116
+
117
+ # TODO: handle more than 16 options
118
+ def handle_add(token) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
119
+ puts "Add #{red(token)} to wordlist:"
120
+ wordlists = Spellr.config.languages_for(token.location.file).flat_map(&:addable_wordlists)
121
+
122
+ wordlists.each_with_index do |wordlist, i|
123
+ puts "[#{i.to_s(16)}] #{wordlist.name}"
124
+ end
125
+ choice = STDIN.getch
126
+ clear_current_line
127
+ case choice
128
+ when "\u0003" # ctrl c
129
+ puts '^C again to exit'
130
+ call(token)
131
+ when /\h/
132
+ wl = wordlists[choice.to_i(16)]
133
+ return handle_add(token) unless wl
134
+
135
+ wl.add(token)
136
+ self.total_added += 1
137
+ raise Spellr::DidAdd, token
138
+ else
139
+ handle_add(token)
140
+ end
141
+ end
142
+
143
+ def handle_replacement(token, original_token: token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
144
+ readline_editable_print(token.chomp)
145
+ highlighted_token = token == original_token ? red(token) : token.highlight(original_token.char_range)
146
+ prompt = "#{aqua '>>'} #{highlighted_token.chomp}\n#{aqua '=>'} "
147
+ replacement = Readline.readline(prompt)
148
+ if replacement.empty?
149
+ call(token)
150
+ else
151
+ full_replacement = token == original_token ? replacement : replacement + "\n"
152
+ token.replace(full_replacement)
153
+ yield replacement if block_given?
154
+ self.total_fixed += 1
155
+ raise Spellr::DidReplacement, token
156
+ end
157
+ rescue Interrupt
158
+ puts '^C again to exit'
159
+ call(original_token)
160
+ end
161
+
162
+ def handle_replace_line(token)
163
+ handle_replacement(
164
+ token.line,
165
+ original_token: token
166
+ )
167
+ end
168
+
169
+ def handle_help(token) # rubocop:disable Metrics/AbcSize
170
+ puts "#{bold '[r]'} Replace #{red token}"
171
+ puts "#{bold '[R]'} Replace all future instances of #{red token}"
172
+ puts "#{bold '[s]'} Skip #{red token}"
173
+ puts "#{bold '[S]'} Skip all future instances of #{red token}"
174
+ puts "#{bold '[a]'} Add #{red token} to a word list"
175
+ puts "#{bold '[e]'} Edit the whole line"
176
+ puts "#{bold '[?]'} Show this help"
177
+ handle_response(token)
178
+ end
179
+
180
+ def readline_editable_print(string)
181
+ Readline.pre_input_hook = lambda {
182
+ Readline.refresh_line
183
+ Readline.insert_text string.to_s
184
+ Readline.redisplay
185
+
186
+ # Remove the hook right away.
187
+ Readline.pre_input_hook = nil
188
+ }
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'wordlist'
4
+
5
+ module Spellr
6
+ class Language
7
+ attr_reader :name
8
+
9
+ def initialize(name, # rubocop:disable Metrics/ParameterLists
10
+ wordlists: [],
11
+ generate: nil,
12
+ only: [],
13
+ description: '',
14
+ hashbangs: [])
15
+ @name = name
16
+ @description = description
17
+ @generate = generate
18
+ @wordlist_paths = wordlists
19
+ @only = only
20
+ @hashbangs = hashbangs
21
+ end
22
+
23
+ def matches?(file)
24
+ return true if @only.empty?
25
+
26
+ file = Spellr::File.wrap(file)
27
+ return true if @only.any? { |o| file.fnmatch?(o) }
28
+ return true if file.hashbang && @hashbangs.any? { |h| file.hashbang.include?(h) }
29
+ end
30
+
31
+ def config_wordlists
32
+ @config_wordlists ||= @wordlist_paths.map(&Spellr::Wordlist.method(:new))
33
+ end
34
+
35
+ def all_wordlist_paths
36
+ @wordlist_paths + default_wordlists.map(&:path)
37
+ end
38
+
39
+ def wordlists
40
+ w = config_wordlists + default_wordlists.select(&:exist?)
41
+ return generate_wordlist if w.empty?
42
+
43
+ w
44
+ end
45
+
46
+ def generate_wordlist
47
+ return [] unless generate
48
+
49
+ require_relative 'cli'
50
+ require 'shellwords'
51
+ warn "Generating wordlist for #{name}"
52
+
53
+ Spellr::CLI.new(generate.shellsplit)
54
+
55
+ config_wordlists + default_wordlists
56
+ end
57
+
58
+ def addable_wordlists
59
+ ((config_wordlists - default_wordlists) + [project_wordlist]).uniq(&:path)
60
+ end
61
+
62
+ def gem_wordlist
63
+ @gem_wordlist ||= Spellr::Wordlist.new(
64
+ Pathname.new(__dir__).parent.parent.join('wordlists', "#{name}.txt")
65
+ )
66
+ end
67
+
68
+ def project_wordlist
69
+ @project_wordlist ||= Spellr::Wordlist.new(
70
+ Pathname.pwd.join('.spellr_wordlists', "#{name}.txt"),
71
+ name: name
72
+ )
73
+ end
74
+
75
+ def generated_project_wordlist
76
+ @generated_project_wordlist ||= Spellr::Wordlist.new(
77
+ Pathname.pwd.join('.spellr_wordlists', 'generated', "#{name}.txt")
78
+ )
79
+ end
80
+
81
+ private
82
+
83
+ attr_reader :generate
84
+
85
+ def load_wordlists(name, paths, _generate)
86
+ wordlists = paths + default_wordlist_paths(name)
87
+
88
+ wordlists.map(&Spellr::Wordlist.method(:new))
89
+ end
90
+
91
+ def custom_addable_wordlists(wordlists)
92
+ default_paths = default_wordlist_paths
93
+ wordlists.map { |w| Spellr::Wordlist.new(w) }.reject { |w| default_paths.include?(w.path) }
94
+ end
95
+
96
+ def default_wordlists
97
+ [
98
+ gem_wordlist,
99
+ generated_project_wordlist,
100
+ project_wordlist
101
+ ]
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spellr
4
+ class LineLocation
5
+ attr_reader :file
6
+ attr_reader :line_number
7
+ attr_reader :char_offset
8
+ attr_reader :byte_offset
9
+
10
+ def initialize(file = '[String]', line_number = 1, char_offset: 0, byte_offset: 0)
11
+ @file = file
12
+ @line_number = line_number
13
+ @char_offset = char_offset
14
+ @byte_offset = byte_offset
15
+ end
16
+
17
+ def to_s
18
+ "#{relative_file_name}:#{line_number}"
19
+ end
20
+
21
+ def file_name
22
+ file.respond_to?(:to_path) ? file.to_path : file
23
+ end
24
+
25
+ def relative_file_name
26
+ Pathname.new(file_name).relative_path_from(Pathname.pwd)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+ require_relative '../spellr'
5
+ require_relative 'column_location'
6
+ require_relative 'token'
7
+
8
+ module Spellr
9
+ class LineTokenizer < StringScanner # rubocop:disable Metrics/ClassLength
10
+ attr_reader :line
11
+ attr_accessor :disabled
12
+ alias_method :disabled?, :disabled
13
+ attr_accessor :skip_uri
14
+ alias_method :skip_uri?, :skip_uri
15
+ attr_accessor :skip_key
16
+ alias_method :skip_key?, :skip_key
17
+
18
+ def initialize(*line, skip_uri: true, skip_key: true)
19
+ @line = Spellr::Token.wrap(line.first)
20
+ @skip_uri = skip_uri
21
+ @skip_key = skip_key
22
+
23
+ super(@line.to_s)
24
+ end
25
+
26
+ def string=(line)
27
+ @line = Token.wrap(line)
28
+ super(@line.to_s)
29
+ end
30
+
31
+ def each_term
32
+ until eos?
33
+ term = next_term
34
+ next unless term
35
+ next if disabled?
36
+
37
+ yield term
38
+ end
39
+ end
40
+
41
+ def each_token
42
+ until eos?
43
+ term = next_term
44
+ next unless term
45
+ next if disabled?
46
+
47
+ yield Token.new(term, line: line, location: column_location(term))
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def column_location(term)
54
+ ColumnLocation.new(
55
+ byte_offset: pos - term.bytesize,
56
+ char_offset: charpos - term.length,
57
+ line_location: line.location.line_location
58
+ )
59
+ end
60
+
61
+ def skip_nonwords_and_flags
62
+ skip_nonwords || skip_and_track_enable || skip_and_track_disable
63
+ end
64
+
65
+ def next_term
66
+ return if eos?
67
+
68
+ (skip_nonwords_and_flags && next_term) || scan_term || next_term
69
+ end
70
+
71
+ def scan_term
72
+ term = title_case || lower_case || upper_case || other_case
73
+
74
+ return term if term && term.length >= Spellr.config.word_minimum_length
75
+ end
76
+
77
+ NOT_EVEN_NON_WORDS_RE = %r{[^[:alpha:]/%#0-9\\]+}.freeze # everything not covered by more specific skips/scans
78
+ LEFTOVER_NON_WORD_BITS_RE = %r{[/%#0-9\\]}.freeze # e.g. a / not starting //a-url.com
79
+ HEX_RE = /(?:#(?:\h{6}|\h{3})|0x\h+)(?![[:alpha:]])/.freeze
80
+ SHELL_COLOR_ESCAPE_RE = /\\(e|033)\[\d+(;\d+)*m/.freeze
81
+ BACKSLASH_ESCAPE_RE = /\\[a-zA-Z]/.freeze # TODO: hex escapes e.g. \xAA. TODO: language aware escapes
82
+ REPEATED_SINGLE_LETTERS_RE = /(?:([[:alpha:]])\1+)(?![[:alpha:]])/.freeze # e.g. xxxxxxxx (it's not a word)
83
+ # https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding
84
+ # Only the necessary percent encoding that actually ends in letters
85
+ # URL_ENCODED_ENTITIES_RE = /%(3A|2F|3F|5B|5D|%2A|%2B|%2C|%3B|%3D)/i.freeze
86
+ URL_ENCODED_ENTITIES_RE = /%[0-8A-F]{2}/.freeze
87
+ # There's got to be a better way of writing this
88
+ SEQUENTIAL_LETTERS_RE = /a(b(c(d(e(f(g(h(i(j(k(l(m(n(o(p(q(r(s(t(u(v(w(x(y(z)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?)?(?![[:alpha:]])/i.freeze # rubocop:disable Metrics/LineLength
89
+
90
+ def skip_nonwords # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
91
+ skip(NOT_EVEN_NON_WORDS_RE) ||
92
+ skip_uri_heuristically ||
93
+ skip_key_heuristically ||
94
+ skip(HEX_RE) ||
95
+ skip(URL_ENCODED_ENTITIES_RE) ||
96
+ skip(SHELL_COLOR_ESCAPE_RE) ||
97
+ skip(BACKSLASH_ESCAPE_RE) ||
98
+ skip(LEFTOVER_NON_WORD_BITS_RE) ||
99
+ skip(REPEATED_SINGLE_LETTERS_RE) ||
100
+ skip(SEQUENTIAL_LETTERS_RE)
101
+ end
102
+
103
+ # I didn't want to do this myself. BUT i need something to heuristically match on, and it's difficult
104
+ URL_RE = %r{
105
+ (?<scheme>//|https?://|s?ftp://|mailto:)?
106
+ (?<userinfo>[[:alnum:]]+(?::[[:alnum:]]+)?@)?
107
+ (?<hostname>(?:[[:alnum:]-]+(?:\\?\.[[:alnum:]-]+)+|localhost|\d{1,3}(?:.\d{1,3}){3}))
108
+ (?<port>:\d+)?
109
+ (?<path>/(?:[[:alnum:]=!$&\-/._\\]|%\h{2})+)?
110
+ (?<query>\?(?:[[:alnum:]=!$\-/.\\]|%\h{2})+(?:&(?:[[:alnum:]=!$\-/.\\]|%\h{2})+)*)?
111
+ (?<fragment>\#(?:[[:alnum:]=!$&\-/.\\]|%\h{2})+)?
112
+ }x.freeze
113
+ # unfortunately i have to match this regex a couple times because stringscanner doesn't give me matchdata
114
+ def skip_uri_heuristically
115
+ return unless skip_uri?
116
+ return unless match?(URL_RE)
117
+
118
+ captures = URL_RE.match(matched).named_captures
119
+ skip(URL_RE) if captures['scheme'] || captures['userinfo'] || captures['path']
120
+ end
121
+
122
+ # url unsafe base64 or url safe base64
123
+ # TODO: character distribution heuristic
124
+ KEY_FULL_RE = %r{([A-Za-z\d+/]|[A-Za-z\d\-_])+[=.]*}.freeze
125
+ KEY_RE = %r{
126
+ (?:
127
+ [A-Za-z\-_+/=]+|
128
+ [\d\-_+/=]+
129
+ )
130
+ }x.freeze
131
+ def skip_key_heuristically
132
+ return unless skip_key?
133
+ return unless match?(KEY_FULL_RE)
134
+
135
+ # can't use regular captures because repeated capture groups don't
136
+ matches = matched.scan(KEY_RE)
137
+ return unless matches.length >= 3 # number chosen arbitrarily
138
+
139
+ skip(KEY_FULL_RE)
140
+ end
141
+
142
+ # jump to character-aware position
143
+ def charpos=(new_charpos)
144
+ skip(/.{#{new_charpos - charpos}}/m)
145
+ end
146
+
147
+ # [Word], [Word]Word [Word]'s [Wordn't]
148
+ TITLE_CASE_RE = /[[:upper:]][[:lower:]]+(?:['’][[:lower:]]+(?<!['’]s))*/.freeze
149
+ def title_case
150
+ scan(TITLE_CASE_RE)
151
+ end
152
+
153
+ # [word] [word]'s [wordn't]
154
+ LOWER_CASE_RE = /[[:lower:]]+(?:['’][[:lower:]]+(?<!['’]s))*/.freeze
155
+ def lower_case
156
+ scan(LOWER_CASE_RE)
157
+ end
158
+
159
+ # [WORD] [WORD]Word [WORDN'T] [WORD]'S [WORD]'s [WORD]s
160
+ UPPER_CASE_RE = /[[:upper:]]+(?:['’][[:upper:]]+(?<!['’][Ss]))*((?![[:lower:]])|(?=s(?![[:lower:]])))/.freeze
161
+ def upper_case
162
+ scan(UPPER_CASE_RE)
163
+ end
164
+
165
+ # for characters in [:alpha:] that aren't in [:lower:] or [:upper:] e.g. Arabic
166
+ OTHER_CASE_RE = /[[:alpha:]]+/.freeze
167
+ def other_case
168
+ scan(OTHER_CASE_RE)
169
+ end
170
+
171
+ SPELLR_DISABLE_RE = /spellr:disable/.freeze
172
+ def skip_and_track_disable
173
+ skip(SPELLR_DISABLE_RE) && self.disabled = true
174
+ end
175
+
176
+ SPELLR_ENABLE_RE = /spellr:enable/.freeze
177
+ def skip_and_track_enable
178
+ skip(SPELLR_ENABLE_RE) && self.disabled = false
179
+ end
180
+ end
181
+ end