spellr 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +14 -14
- data/lib/.spellr.yml +2 -0
- data/lib/spellr/backports.rb +16 -6
- data/lib/spellr/base_reporter.rb +54 -0
- data/lib/spellr/check.rb +54 -20
- data/lib/spellr/cli.rb +13 -6
- data/lib/spellr/column_location.rb +1 -1
- data/lib/spellr/config.rb +6 -45
- data/lib/spellr/config_loader.rb +10 -6
- data/lib/spellr/file.rb +15 -2
- data/lib/spellr/file_list.rb +21 -17
- data/lib/spellr/interactive.rb +51 -116
- data/lib/spellr/interactive_add.rb +64 -0
- data/lib/spellr/interactive_replacement.rb +69 -0
- data/lib/spellr/key_tuner/naive_bayes.rb +49 -91
- data/lib/spellr/key_tuner/possible_key.rb +36 -32
- data/lib/spellr/key_tuner/stats.rb +26 -7
- data/lib/spellr/language.rb +28 -44
- data/lib/spellr/line_location.rb +13 -7
- data/lib/spellr/line_tokenizer.rb +35 -134
- data/lib/spellr/output.rb +62 -0
- data/lib/spellr/output_stubbed.rb +58 -0
- data/lib/spellr/quiet_reporter.rb +13 -0
- data/lib/spellr/reporter.rb +9 -13
- data/lib/spellr/token.rb +14 -16
- data/lib/spellr/token_regexps.rb +103 -0
- data/lib/spellr/tokenizer.rb +35 -14
- data/lib/spellr/version.rb +1 -1
- data/lib/spellr/wordlist.rb +29 -25
- data/lib/spellr/wordlist_reporter.rb +16 -8
- data/lib/spellr.rb +1 -0
- data/wordlists/ruby.txt +1046 -13
- metadata +9 -2
data/lib/spellr/interactive.rb
CHANGED
@@ -3,52 +3,62 @@
|
|
3
3
|
require 'io/console'
|
4
4
|
require 'readline'
|
5
5
|
require_relative '../spellr'
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
6
|
+
require_relative 'interactive_add'
|
7
|
+
require_relative 'interactive_replacement'
|
8
|
+
require_relative 'base_reporter'
|
8
9
|
|
9
10
|
module Spellr
|
10
|
-
class Interactive
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
attr_reader :global_insensitive_replacements
|
15
|
-
attr_reader :global_insensitive_skips
|
16
|
-
attr_accessor :total_skipped
|
17
|
-
attr_accessor :total_fixed
|
18
|
-
attr_accessor :total_added
|
11
|
+
class Interactive < BaseReporter
|
12
|
+
def parallel?
|
13
|
+
false
|
14
|
+
end
|
19
15
|
|
20
|
-
def finish
|
16
|
+
def finish # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
21
17
|
puts "\n"
|
22
|
-
puts "#{pluralize 'file', checked} checked"
|
18
|
+
puts "#{pluralize 'file', counts[:checked]} checked"
|
23
19
|
puts "#{pluralize 'error', total} found"
|
24
|
-
|
25
|
-
|
26
|
-
|
20
|
+
if counts[:total_skipped].positive?
|
21
|
+
puts "#{pluralize 'error', counts[:total_skipped]} skipped"
|
22
|
+
end
|
23
|
+
puts "#{pluralize 'error', counts[:total_fixed]} fixed" if counts[:total_fixed].positive?
|
24
|
+
puts "#{pluralize 'word', counts[:total_added]} added" if counts[:total_added].positive?
|
27
25
|
end
|
28
26
|
|
29
|
-
def
|
30
|
-
|
27
|
+
def global_replacements
|
28
|
+
@global_replacements ||= begin
|
29
|
+
counts[:global_replacements] = {} unless counts.key?(:global_replacements)
|
30
|
+
counts[:global_replacements]
|
31
|
+
end
|
31
32
|
end
|
32
33
|
|
33
|
-
def
|
34
|
-
@
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
@total_skipped = 0
|
39
|
-
@total_fixed = 0
|
40
|
-
@total_added = 0
|
34
|
+
def global_skips
|
35
|
+
@global_skips ||= begin
|
36
|
+
counts[:global_skips] = [] unless counts.key?(:global_skips)
|
37
|
+
counts[:global_skips]
|
38
|
+
end
|
41
39
|
end
|
42
40
|
|
43
41
|
def call(token)
|
44
42
|
return if attempt_global_replacement(token)
|
45
43
|
return if attempt_global_skip(token)
|
46
44
|
|
47
|
-
|
45
|
+
super
|
48
46
|
|
49
47
|
prompt(token)
|
50
48
|
end
|
51
49
|
|
50
|
+
def stdin_getch
|
51
|
+
choice = output.stdin.getch
|
52
|
+
clear_current_line
|
53
|
+
choice
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def total
|
59
|
+
counts[:total_skipped] + counts[:total_fixed] + counts[:total_added]
|
60
|
+
end
|
61
|
+
|
52
62
|
def prompt(token)
|
53
63
|
print bold('[r,R,s,S,a,e,?]')
|
54
64
|
|
@@ -58,21 +68,18 @@ module Spellr
|
|
58
68
|
end
|
59
69
|
|
60
70
|
def attempt_global_skip(token)
|
61
|
-
return unless global_skips.include?(token.to_s)
|
62
|
-
global_insensitive_skips.include?(token.normalize)
|
71
|
+
return unless global_skips.include?(token.to_s)
|
63
72
|
|
64
73
|
puts "Automatically skipped #{red(token)}"
|
65
|
-
|
74
|
+
increment(:total_skipped)
|
66
75
|
end
|
67
76
|
|
68
|
-
def attempt_global_replacement(token)
|
69
|
-
|
70
|
-
global_replacement ||= global_insensitive_replacements[token.normalize]
|
71
|
-
return unless global_replacement
|
77
|
+
def attempt_global_replacement(token, replacement = global_replacements[token.to_s])
|
78
|
+
return unless replacement
|
72
79
|
|
73
|
-
token.replace(
|
74
|
-
|
75
|
-
puts "Automatically replaced #{red(token)} with #{green(
|
80
|
+
token.replace(replacement)
|
81
|
+
increment(:total_fixed)
|
82
|
+
puts "Automatically replaced #{red(token)} with #{green(replacement)}"
|
76
83
|
throw :check_file_from, token
|
77
84
|
end
|
78
85
|
|
@@ -81,28 +88,21 @@ module Spellr
|
|
81
88
|
end
|
82
89
|
|
83
90
|
def handle_response(token) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
84
|
-
|
85
|
-
clear_current_line
|
86
|
-
|
87
|
-
case task
|
91
|
+
case stdin_getch
|
88
92
|
when "\u0003" # ctrl c
|
89
93
|
exit 1
|
90
94
|
when 'a'
|
91
|
-
|
95
|
+
Spellr::InteractiveAdd.new(token, self)
|
92
96
|
when 's', "\u0004" # ctrl d
|
93
97
|
handle_skip(token)
|
94
98
|
when 'S'
|
95
99
|
handle_skip(token) { |skip_token| global_skips << skip_token.to_s }
|
96
|
-
when 'i'
|
97
|
-
handle_skip(token) { |skip_token| global_insensitive_skips << skip_token.downcase }
|
98
100
|
when 'R'
|
99
|
-
|
100
|
-
when 'I'
|
101
|
-
handle_replacement(token) { |replacement| global_insensitive_replacements[token.normalize] = replacement }
|
101
|
+
Spellr::InteractiveReplacement.new(token, self).global_replace
|
102
102
|
when 'r'
|
103
|
-
|
103
|
+
Spellr::InteractiveReplacement.new(token, self).replace
|
104
104
|
when 'e'
|
105
|
-
|
105
|
+
Spellr::InteractiveReplacement.new(token, self).replace_line
|
106
106
|
when '?'
|
107
107
|
handle_help(token)
|
108
108
|
else
|
@@ -112,66 +112,12 @@ module Spellr
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def handle_skip(token)
|
115
|
-
|
115
|
+
increment(:total_skipped)
|
116
116
|
yield token if block_given?
|
117
117
|
puts "Skipped #{red(token)}"
|
118
118
|
end
|
119
119
|
|
120
|
-
#
|
121
|
-
def handle_add(token) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
122
|
-
puts "Add #{red(token)} to wordlist:"
|
123
|
-
languages = Spellr.config.languages_for(token.location.file.path)
|
124
|
-
|
125
|
-
languages.each do |language|
|
126
|
-
puts "[#{language.key}] #{language.name}"
|
127
|
-
end
|
128
|
-
choice = STDIN.getch
|
129
|
-
clear_current_line
|
130
|
-
case choice
|
131
|
-
when "\u0003" # ctrl c
|
132
|
-
puts '^C again to exit'
|
133
|
-
call(token)
|
134
|
-
when *languages.map(&:key)
|
135
|
-
wl = languages.find { |w| w.key == choice }.project_wordlist
|
136
|
-
|
137
|
-
wl.add(token)
|
138
|
-
self.total_added += 1
|
139
|
-
puts "Added #{red(token)} to #{wl.name} wordlist"
|
140
|
-
throw :check_file_from, token
|
141
|
-
else
|
142
|
-
handle_add(token)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
def handle_replacement(token, original_token: token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
147
|
-
readline_editable_print(token.chomp)
|
148
|
-
highlighted_token = token == original_token ? red(token) : token.highlight(original_token.char_range)
|
149
|
-
puts "#{aqua '>>'} #{highlighted_token.chomp}"
|
150
|
-
prompt = "#{aqua '=>'} "
|
151
|
-
replacement = Readline.readline(prompt)
|
152
|
-
if replacement.empty?
|
153
|
-
call(token)
|
154
|
-
else
|
155
|
-
full_replacement = token == original_token ? replacement : replacement + "\n"
|
156
|
-
token.replace(full_replacement)
|
157
|
-
yield replacement if block_given?
|
158
|
-
self.total_fixed += 1
|
159
|
-
puts "Replaced #{red(token.chomp)} with #{green(replacement.chomp)}"
|
160
|
-
throw :check_file_from, token
|
161
|
-
end
|
162
|
-
rescue Interrupt
|
163
|
-
puts '^C again to exit'
|
164
|
-
call(original_token)
|
165
|
-
end
|
166
|
-
|
167
|
-
def handle_replace_line(token)
|
168
|
-
handle_replacement(
|
169
|
-
token.line,
|
170
|
-
original_token: token
|
171
|
-
)
|
172
|
-
end
|
173
|
-
|
174
|
-
def handle_help(token) # rubocop:disable Metrics/AbcSize
|
120
|
+
def handle_help(token) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
175
121
|
puts "#{bold '[r]'} Replace #{red token}"
|
176
122
|
puts "#{bold '[R]'} Replace all future instances of #{red token}"
|
177
123
|
puts "#{bold '[s]'} Skip #{red token}"
|
@@ -181,16 +127,5 @@ module Spellr
|
|
181
127
|
puts "#{bold '[?]'} Show this help"
|
182
128
|
handle_response(token)
|
183
129
|
end
|
184
|
-
|
185
|
-
def readline_editable_print(string)
|
186
|
-
Readline.pre_input_hook = lambda {
|
187
|
-
Readline.refresh_line
|
188
|
-
Readline.insert_text string.to_s
|
189
|
-
Readline.redisplay
|
190
|
-
|
191
|
-
# Remove the hook right away.
|
192
|
-
Readline.pre_input_hook = nil
|
193
|
-
}
|
194
|
-
end
|
195
130
|
end
|
196
131
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'string_format'
|
4
|
+
|
5
|
+
module Spellr
|
6
|
+
class InteractiveAdd
|
7
|
+
include Spellr::StringFormat
|
8
|
+
|
9
|
+
attr_reader :token, :reporter
|
10
|
+
|
11
|
+
def initialize(token, reporter)
|
12
|
+
@token = token
|
13
|
+
@reporter = reporter
|
14
|
+
|
15
|
+
ask_wordlist
|
16
|
+
end
|
17
|
+
|
18
|
+
def languages
|
19
|
+
@languages ||= Spellr.config.languages_for(token.location.file.to_path)
|
20
|
+
end
|
21
|
+
|
22
|
+
def language_keys
|
23
|
+
@language_keys ||= @languages.map(&:key)
|
24
|
+
end
|
25
|
+
|
26
|
+
def ask_wordlist
|
27
|
+
puts "Add #{red(token)} to wordlist:"
|
28
|
+
|
29
|
+
languages.each do |language|
|
30
|
+
puts "[#{language.key}] #{language.name}"
|
31
|
+
end
|
32
|
+
|
33
|
+
handle_wordlist_choice(reporter.stdin_getch)
|
34
|
+
end
|
35
|
+
|
36
|
+
def handle_ctrl_c
|
37
|
+
puts '^C again to exit'
|
38
|
+
reporter.call(token)
|
39
|
+
end
|
40
|
+
|
41
|
+
def handle_wordlist_choice(choice) # rubocop:disable Metrics/MethodLength
|
42
|
+
case choice
|
43
|
+
when "\u0003"
|
44
|
+
handle_ctrl_c
|
45
|
+
when *language_keys
|
46
|
+
add_to_wordlist(choice)
|
47
|
+
else
|
48
|
+
ask_wordlist
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_to_wordlist(choice)
|
53
|
+
wordlist = languages.find { |w| w.key == choice }.project_wordlist
|
54
|
+
wordlist << token
|
55
|
+
reporter.increment(:total_added)
|
56
|
+
puts "Added #{red(token)} to #{wordlist.name} wordlist"
|
57
|
+
throw :check_file_from, token
|
58
|
+
end
|
59
|
+
|
60
|
+
def puts(str)
|
61
|
+
reporter.puts(str)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spellr
|
4
|
+
class InteractiveReplacement
|
5
|
+
include Spellr::StringFormat
|
6
|
+
|
7
|
+
attr_reader :token, :reporter, :original_token, :token_highlight, :suffix
|
8
|
+
|
9
|
+
def initialize(token, reporter)
|
10
|
+
@original_token = @token = token
|
11
|
+
@token_highlight = red(token)
|
12
|
+
@reporter = reporter
|
13
|
+
Readline.input = reporter.output.stdin
|
14
|
+
Readline.output = reporter.output.stdout
|
15
|
+
end
|
16
|
+
|
17
|
+
def global_replace
|
18
|
+
replace { |replacement| reporter.global_replacements[token.to_s] = replacement }
|
19
|
+
end
|
20
|
+
|
21
|
+
def replace_line
|
22
|
+
@token = token.line
|
23
|
+
@token_highlight = token.highlight(original_token.char_range).chomp
|
24
|
+
@suffix = "\n"
|
25
|
+
|
26
|
+
replace
|
27
|
+
end
|
28
|
+
|
29
|
+
def complete_replacement(replacement)
|
30
|
+
token.replace("#{replacement}#{suffix}")
|
31
|
+
|
32
|
+
reporter.increment(:total_fixed)
|
33
|
+
puts "Replaced #{red(token.chomp)} with #{green(replacement)}"
|
34
|
+
throw :check_file_from, token
|
35
|
+
end
|
36
|
+
|
37
|
+
def replace # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
38
|
+
readline_editable_print(token.chomp)
|
39
|
+
|
40
|
+
puts "#{aqua '>>'} #{token_highlight}"
|
41
|
+
replacement = Readline.readline("#{aqua '=>'} ")
|
42
|
+
|
43
|
+
return reporter.call(token) if replacement.empty?
|
44
|
+
|
45
|
+
yield replacement if block_given?
|
46
|
+
complete_replacement(replacement)
|
47
|
+
rescue Interrupt
|
48
|
+
puts '^C again to exit'
|
49
|
+
reporter.call(original_token)
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def readline_editable_print(string) # rubocop:disable Metrics/MethodLength
|
55
|
+
Readline.pre_input_hook = lambda {
|
56
|
+
Readline.refresh_line
|
57
|
+
Readline.insert_text string.to_s
|
58
|
+
Readline.redisplay
|
59
|
+
|
60
|
+
# Remove the hook right away.
|
61
|
+
Readline.pre_input_hook = nil
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
def puts(str)
|
66
|
+
reporter.puts(str)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -6,26 +6,22 @@ require 'yaml'
|
|
6
6
|
# this is lifted in whole from this article. i don't understand the maths and i don't want to
|
7
7
|
# https://www.sitepoint.com/machine-learning-ruby-naive-bayes-theorem/
|
8
8
|
|
9
|
-
class NaiveBayes
|
10
|
-
include Stats
|
11
|
-
|
9
|
+
class NaiveBayes
|
12
10
|
YAML_PATH = File.join(__dir__, 'data.yml')
|
13
11
|
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
data[key_key] << key.features
|
23
|
-
end
|
12
|
+
def initialize(path = YAML_PATH)
|
13
|
+
load_from_yaml(path) if File.exist?(path)
|
14
|
+
end
|
15
|
+
|
16
|
+
def key?(string)
|
17
|
+
@key ||= {}
|
18
|
+
@key.fetch(string) do
|
19
|
+
@key[string] = classify(PossibleKey.new(string).features).start_with?('key')
|
24
20
|
end
|
25
21
|
end
|
26
22
|
|
27
|
-
def load_from_yaml
|
28
|
-
data = YAML.safe_load(::File.read(
|
23
|
+
def load_from_yaml(path = YAML_PATH)
|
24
|
+
data = YAML.safe_load(::File.read(path), [Symbol])
|
29
25
|
|
30
26
|
@feature_set = data[:feature_set]
|
31
27
|
@num_classes = data[:num_classes]
|
@@ -33,18 +29,27 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
33
29
|
@features = data[:features]
|
34
30
|
end
|
35
31
|
|
36
|
-
def save_to_yaml
|
32
|
+
def save_to_yaml(path = YAML_PATH)
|
33
|
+
write_yaml(path,
|
34
|
+
feature_set: feature_set,
|
35
|
+
num_classes: num_classes,
|
36
|
+
classes: classes,
|
37
|
+
features: features)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def write_yaml(path = YAML_PATH, **hash)
|
37
43
|
require 'yaml'
|
38
|
-
|
39
|
-
|
40
|
-
num_classes: num_classes,
|
41
|
-
classes: classes,
|
42
|
-
features: features
|
43
|
-
}.to_yaml)
|
44
|
+
|
45
|
+
File.write(path, hash.to_yaml)
|
44
46
|
end
|
45
47
|
|
46
|
-
def
|
47
|
-
|
48
|
+
def training_data
|
49
|
+
@training_data ||= PossibleKey.keys.each_with_object({}) do |key, data|
|
50
|
+
data[key.classification] ||= []
|
51
|
+
data[key.classification] << key.features
|
52
|
+
end
|
48
53
|
end
|
49
54
|
|
50
55
|
def num_classes
|
@@ -59,51 +64,35 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
59
64
|
@features ||= training_data.first.last.first.keys
|
60
65
|
end
|
61
66
|
|
62
|
-
def feature_set
|
67
|
+
def feature_set
|
63
68
|
@feature_set ||= classes.each.with_object({}) do |class_name, feature_set|
|
64
|
-
feature_set[class_name] = {}
|
65
|
-
|
66
|
-
features.each do |feature|
|
67
|
-
values = training_data[class_name].map do |row|
|
68
|
-
row[feature]
|
69
|
-
end
|
70
|
-
|
71
|
-
feature_set[class_name][feature] = {
|
72
|
-
standard_deviation: standard_deviation(values),
|
73
|
-
mean: mean(values),
|
74
|
-
variance: variance(values)
|
75
|
-
}
|
69
|
+
feature_set[class_name] = features.each.with_object({}) do |feature, feature_set_for_class|
|
70
|
+
feature_set_for_class[feature] = feature_stats_for_class(class_name, feature)
|
76
71
|
end
|
77
72
|
end
|
78
73
|
end
|
79
74
|
|
75
|
+
def feature_stats_for_class(class_name, feature)
|
76
|
+
values = training_data[class_name].map { |row| row[feature] }
|
77
|
+
|
78
|
+
feature_stats(values)
|
79
|
+
end
|
80
|
+
|
81
|
+
def feature_stats(values)
|
82
|
+
{
|
83
|
+
standard_deviation: Stats.standard_deviation(values),
|
84
|
+
mean: Stats.mean(values),
|
85
|
+
variance: Stats.variance(values)
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
80
89
|
# given a class, this method determines the probability
|
81
90
|
# of a certain value occurring for a given feature
|
82
|
-
#
|
91
|
+
# feature: name of the feature in consideration in the training data
|
83
92
|
# value: the value of the feature for which we are finding the probability
|
84
93
|
# class_name: name of the class in consideration
|
85
|
-
def feature_probability(feature, value, class_name)
|
86
|
-
|
87
|
-
fs = feature_set[class_name][feature]
|
88
|
-
|
89
|
-
# statistical properties of the feature set
|
90
|
-
fs_std = fs[:standard_deviation]
|
91
|
-
fs_mean = fs[:mean]
|
92
|
-
fs_var = fs[:variance]
|
93
|
-
|
94
|
-
# deal with the edge case of a 0 standard deviation
|
95
|
-
if fs_std == 0
|
96
|
-
return fs_mean == value ? 1.0 : 0.0
|
97
|
-
end
|
98
|
-
|
99
|
-
# calculate the gaussian probability
|
100
|
-
pi = Math::PI
|
101
|
-
e = Math::E
|
102
|
-
|
103
|
-
exp = -((value - fs_mean)**2) / (2 * fs_var)
|
104
|
-
probability = (1.0 / Math.sqrt(2 * pi * fs_var)) * (e**exp)
|
105
|
-
|
106
|
-
probability
|
94
|
+
def feature_probability(feature, value, class_name)
|
95
|
+
Stats.gaussian_probability(value, feature_set[class_name][feature])
|
107
96
|
end
|
108
97
|
|
109
98
|
# multiply together the feature probabilities for all of the
|
@@ -114,24 +103,6 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
114
103
|
end
|
115
104
|
end
|
116
105
|
|
117
|
-
def debug(string) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
118
|
-
require 'terminal-table'
|
119
|
-
|
120
|
-
features = PossibleKey.new(string).features
|
121
|
-
|
122
|
-
table = Terminal::Table.new do |t|
|
123
|
-
t << ['classes'] + classes
|
124
|
-
t << :separator
|
125
|
-
t << ['probabilities'] + classes.map { |c| class_probability(features, c) }
|
126
|
-
features.each do |key, value|
|
127
|
-
t << [key] + classes.map { |c| feature_probability(key, value, c).round(4) }
|
128
|
-
end
|
129
|
-
end
|
130
|
-
puts table
|
131
|
-
|
132
|
-
nil
|
133
|
-
end
|
134
|
-
|
135
106
|
# this is where we compute the final naive Bayesian probability
|
136
107
|
# for a given set of features being a part of a given class.
|
137
108
|
def class_probability(features, class_name)
|
@@ -141,22 +112,9 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
141
112
|
feature_bayes * class_fraction
|
142
113
|
end
|
143
114
|
|
144
|
-
# This the method we should be calling!
|
145
|
-
# Given a set of feature values, it decides
|
146
|
-
# what class to categorize them under
|
147
115
|
def classify(features)
|
148
116
|
classes.max_by do |class_name|
|
149
117
|
class_probability(features, class_name)
|
150
118
|
end
|
151
119
|
end
|
152
|
-
|
153
|
-
def key?(string)
|
154
|
-
key_cache[string]
|
155
|
-
end
|
156
|
-
|
157
|
-
def key_cache
|
158
|
-
@key_cache ||= Hash.new do |cache, string|
|
159
|
-
cache[string] = classify(PossibleKey.new(string).features).start_with?('key')
|
160
|
-
end
|
161
|
-
end
|
162
120
|
end
|
@@ -6,23 +6,35 @@ require_relative 'stats'
|
|
6
6
|
class PossibleKey # rubocop:disable Metrics/ClassLength
|
7
7
|
include Stats
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
VOWELS = %i{
|
10
|
+
a e i o u
|
11
|
+
A E I O U
|
12
|
+
}.freeze
|
13
|
+
CONSONANTS = %i{
|
14
|
+
b c d f g h j k l m n p q r s t v w x y z
|
15
|
+
B C D F G H J K L M N P Q R S T V W X Y Z
|
16
|
+
}.freeze
|
17
|
+
BASE_64 = VOWELS + CONSONANTS + %i{0 1 2 3 4 5 6 7 8 9 - _ + / =}.freeze
|
18
|
+
LETTER_COUNT_HASH = BASE_64.map { |k| [k.to_sym, 0] }.to_h
|
19
|
+
FEATURE_LETTERS = %i{+ - _ / A z Z q Q X x}.freeze
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
21
|
+
class << self
|
22
|
+
def keys
|
23
|
+
@keys ||= begin
|
24
|
+
load_from_file('false_positives.txt', false) +
|
25
|
+
load_from_file('keys.txt', true)
|
26
|
+
end
|
20
27
|
end
|
21
28
|
|
22
|
-
|
23
|
-
next if line.chomp.empty?
|
29
|
+
private
|
24
30
|
|
25
|
-
|
31
|
+
def load_from_file(filename, key)
|
32
|
+
Pathname.new(__dir__).join('data', filename).each_line.map! do |line|
|
33
|
+
line = line.chomp
|
34
|
+
next if line.empty?
|
35
|
+
|
36
|
+
PossibleKey.new(line, key)
|
37
|
+
end.compact
|
26
38
|
end
|
27
39
|
end
|
28
40
|
|
@@ -35,7 +47,7 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
35
47
|
|
36
48
|
def features # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
37
49
|
{
|
38
|
-
**
|
50
|
+
**letter_frequency_difference_features,
|
39
51
|
equal: letter_count[:'='],
|
40
52
|
length: length,
|
41
53
|
hex: character_set == :hex ? 1 : 0,
|
@@ -69,24 +81,20 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
69
81
|
@key
|
70
82
|
end
|
71
83
|
|
84
|
+
def classification
|
85
|
+
key_class = key? ? 'key' : 'not_key'
|
86
|
+
"#{key_class}_#{character_set}"
|
87
|
+
end
|
88
|
+
|
72
89
|
def length
|
73
90
|
string.length
|
74
91
|
end
|
75
92
|
|
76
|
-
|
77
|
-
|
78
|
-
def significant_letter_frequency_difference
|
79
|
-
letter_frequency_difference.slice(*SIGNIFICANT_LETTERS)
|
80
|
-
end
|
81
|
-
else
|
82
|
-
def significant_letter_frequency_difference
|
83
|
-
letter_frequency_difference.each.with_object({}) do |key, value, hash|
|
84
|
-
hash[key] = value if SIGNIFICANT_LETTERS.include?(key)
|
85
|
-
end
|
86
|
-
end
|
93
|
+
def letter_frequency_difference_features
|
94
|
+
letter_frequency_difference.slice(*FEATURE_LETTERS)
|
87
95
|
end
|
88
96
|
|
89
|
-
def character_set
|
97
|
+
def character_set # rubocop:disable Metrics/MethodLength
|
90
98
|
@character_set ||= case string
|
91
99
|
when /^[a-fA-F0-9\-]+$/ then :hex
|
92
100
|
when /^[a-z0-9]+$/ then :lower36
|
@@ -97,7 +105,7 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
97
105
|
end
|
98
106
|
end
|
99
107
|
|
100
|
-
def character_set_total
|
108
|
+
def character_set_total # rubocop:disable Metrics/MethodLength
|
101
109
|
case character_set
|
102
110
|
when :hex then 16
|
103
111
|
when :lower36 then 36
|
@@ -110,11 +118,9 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
110
118
|
1.0 / character_set_total * length
|
111
119
|
end
|
112
120
|
|
113
|
-
LETTER_COUNT_HASH = (('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a + %w{+ _ / - =})
|
114
|
-
.map { |k| [k.to_sym, 0] }.to_h
|
115
121
|
def letter_count
|
116
122
|
@letter_count ||= begin
|
117
|
-
string.chars.
|
123
|
+
string.chars.each_with_object(LETTER_COUNT_HASH.dup) do |letter, hash|
|
118
124
|
hash[letter.to_sym] += 1
|
119
125
|
end
|
120
126
|
end
|
@@ -136,8 +142,6 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
136
142
|
end
|
137
143
|
end
|
138
144
|
|
139
|
-
VOWELS = %i{a e i o u A E I O U}.freeze
|
140
|
-
CONSONANTS = %i{b c d f g h j k l m n p q r s t v w x y z B C D F G H J K L M N P Q R S T V W X Y Z}.freeze
|
141
145
|
def vowel_consonant_ratio
|
142
146
|
vowels = letter_count.fetch_values(*VOWELS).sum
|
143
147
|
consonants = letter_count.fetch_values(*CONSONANTS).sum
|