spellr 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +14 -14
- data/lib/.spellr.yml +2 -0
- data/lib/spellr/backports.rb +16 -6
- data/lib/spellr/base_reporter.rb +54 -0
- data/lib/spellr/check.rb +54 -20
- data/lib/spellr/cli.rb +13 -6
- data/lib/spellr/column_location.rb +1 -1
- data/lib/spellr/config.rb +6 -45
- data/lib/spellr/config_loader.rb +10 -6
- data/lib/spellr/file.rb +15 -2
- data/lib/spellr/file_list.rb +21 -17
- data/lib/spellr/interactive.rb +51 -116
- data/lib/spellr/interactive_add.rb +64 -0
- data/lib/spellr/interactive_replacement.rb +69 -0
- data/lib/spellr/key_tuner/naive_bayes.rb +49 -91
- data/lib/spellr/key_tuner/possible_key.rb +36 -32
- data/lib/spellr/key_tuner/stats.rb +26 -7
- data/lib/spellr/language.rb +28 -44
- data/lib/spellr/line_location.rb +13 -7
- data/lib/spellr/line_tokenizer.rb +35 -134
- data/lib/spellr/output.rb +62 -0
- data/lib/spellr/output_stubbed.rb +58 -0
- data/lib/spellr/quiet_reporter.rb +13 -0
- data/lib/spellr/reporter.rb +9 -13
- data/lib/spellr/token.rb +14 -16
- data/lib/spellr/token_regexps.rb +103 -0
- data/lib/spellr/tokenizer.rb +35 -14
- data/lib/spellr/version.rb +1 -1
- data/lib/spellr/wordlist.rb +29 -25
- data/lib/spellr/wordlist_reporter.rb +16 -8
- data/lib/spellr.rb +1 -0
- data/wordlists/ruby.txt +1046 -13
- metadata +9 -2
data/lib/spellr/interactive.rb
CHANGED
@@ -3,52 +3,62 @@
|
|
3
3
|
require 'io/console'
|
4
4
|
require 'readline'
|
5
5
|
require_relative '../spellr'
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
6
|
+
require_relative 'interactive_add'
|
7
|
+
require_relative 'interactive_replacement'
|
8
|
+
require_relative 'base_reporter'
|
8
9
|
|
9
10
|
module Spellr
|
10
|
-
class Interactive
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
attr_reader :global_insensitive_replacements
|
15
|
-
attr_reader :global_insensitive_skips
|
16
|
-
attr_accessor :total_skipped
|
17
|
-
attr_accessor :total_fixed
|
18
|
-
attr_accessor :total_added
|
11
|
+
class Interactive < BaseReporter
|
12
|
+
def parallel?
|
13
|
+
false
|
14
|
+
end
|
19
15
|
|
20
|
-
def finish
|
16
|
+
def finish # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
21
17
|
puts "\n"
|
22
|
-
puts "#{pluralize 'file', checked} checked"
|
18
|
+
puts "#{pluralize 'file', counts[:checked]} checked"
|
23
19
|
puts "#{pluralize 'error', total} found"
|
24
|
-
|
25
|
-
|
26
|
-
|
20
|
+
if counts[:total_skipped].positive?
|
21
|
+
puts "#{pluralize 'error', counts[:total_skipped]} skipped"
|
22
|
+
end
|
23
|
+
puts "#{pluralize 'error', counts[:total_fixed]} fixed" if counts[:total_fixed].positive?
|
24
|
+
puts "#{pluralize 'word', counts[:total_added]} added" if counts[:total_added].positive?
|
27
25
|
end
|
28
26
|
|
29
|
-
def
|
30
|
-
|
27
|
+
def global_replacements
|
28
|
+
@global_replacements ||= begin
|
29
|
+
counts[:global_replacements] = {} unless counts.key?(:global_replacements)
|
30
|
+
counts[:global_replacements]
|
31
|
+
end
|
31
32
|
end
|
32
33
|
|
33
|
-
def
|
34
|
-
@
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
@total_skipped = 0
|
39
|
-
@total_fixed = 0
|
40
|
-
@total_added = 0
|
34
|
+
def global_skips
|
35
|
+
@global_skips ||= begin
|
36
|
+
counts[:global_skips] = [] unless counts.key?(:global_skips)
|
37
|
+
counts[:global_skips]
|
38
|
+
end
|
41
39
|
end
|
42
40
|
|
43
41
|
def call(token)
|
44
42
|
return if attempt_global_replacement(token)
|
45
43
|
return if attempt_global_skip(token)
|
46
44
|
|
47
|
-
|
45
|
+
super
|
48
46
|
|
49
47
|
prompt(token)
|
50
48
|
end
|
51
49
|
|
50
|
+
def stdin_getch
|
51
|
+
choice = output.stdin.getch
|
52
|
+
clear_current_line
|
53
|
+
choice
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def total
|
59
|
+
counts[:total_skipped] + counts[:total_fixed] + counts[:total_added]
|
60
|
+
end
|
61
|
+
|
52
62
|
def prompt(token)
|
53
63
|
print bold('[r,R,s,S,a,e,?]')
|
54
64
|
|
@@ -58,21 +68,18 @@ module Spellr
|
|
58
68
|
end
|
59
69
|
|
60
70
|
def attempt_global_skip(token)
|
61
|
-
return unless global_skips.include?(token.to_s)
|
62
|
-
global_insensitive_skips.include?(token.normalize)
|
71
|
+
return unless global_skips.include?(token.to_s)
|
63
72
|
|
64
73
|
puts "Automatically skipped #{red(token)}"
|
65
|
-
|
74
|
+
increment(:total_skipped)
|
66
75
|
end
|
67
76
|
|
68
|
-
def attempt_global_replacement(token)
|
69
|
-
|
70
|
-
global_replacement ||= global_insensitive_replacements[token.normalize]
|
71
|
-
return unless global_replacement
|
77
|
+
def attempt_global_replacement(token, replacement = global_replacements[token.to_s])
|
78
|
+
return unless replacement
|
72
79
|
|
73
|
-
token.replace(
|
74
|
-
|
75
|
-
puts "Automatically replaced #{red(token)} with #{green(
|
80
|
+
token.replace(replacement)
|
81
|
+
increment(:total_fixed)
|
82
|
+
puts "Automatically replaced #{red(token)} with #{green(replacement)}"
|
76
83
|
throw :check_file_from, token
|
77
84
|
end
|
78
85
|
|
@@ -81,28 +88,21 @@ module Spellr
|
|
81
88
|
end
|
82
89
|
|
83
90
|
def handle_response(token) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
84
|
-
|
85
|
-
clear_current_line
|
86
|
-
|
87
|
-
case task
|
91
|
+
case stdin_getch
|
88
92
|
when "\u0003" # ctrl c
|
89
93
|
exit 1
|
90
94
|
when 'a'
|
91
|
-
|
95
|
+
Spellr::InteractiveAdd.new(token, self)
|
92
96
|
when 's', "\u0004" # ctrl d
|
93
97
|
handle_skip(token)
|
94
98
|
when 'S'
|
95
99
|
handle_skip(token) { |skip_token| global_skips << skip_token.to_s }
|
96
|
-
when 'i'
|
97
|
-
handle_skip(token) { |skip_token| global_insensitive_skips << skip_token.downcase }
|
98
100
|
when 'R'
|
99
|
-
|
100
|
-
when 'I'
|
101
|
-
handle_replacement(token) { |replacement| global_insensitive_replacements[token.normalize] = replacement }
|
101
|
+
Spellr::InteractiveReplacement.new(token, self).global_replace
|
102
102
|
when 'r'
|
103
|
-
|
103
|
+
Spellr::InteractiveReplacement.new(token, self).replace
|
104
104
|
when 'e'
|
105
|
-
|
105
|
+
Spellr::InteractiveReplacement.new(token, self).replace_line
|
106
106
|
when '?'
|
107
107
|
handle_help(token)
|
108
108
|
else
|
@@ -112,66 +112,12 @@ module Spellr
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def handle_skip(token)
|
115
|
-
|
115
|
+
increment(:total_skipped)
|
116
116
|
yield token if block_given?
|
117
117
|
puts "Skipped #{red(token)}"
|
118
118
|
end
|
119
119
|
|
120
|
-
#
|
121
|
-
def handle_add(token) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
122
|
-
puts "Add #{red(token)} to wordlist:"
|
123
|
-
languages = Spellr.config.languages_for(token.location.file.path)
|
124
|
-
|
125
|
-
languages.each do |language|
|
126
|
-
puts "[#{language.key}] #{language.name}"
|
127
|
-
end
|
128
|
-
choice = STDIN.getch
|
129
|
-
clear_current_line
|
130
|
-
case choice
|
131
|
-
when "\u0003" # ctrl c
|
132
|
-
puts '^C again to exit'
|
133
|
-
call(token)
|
134
|
-
when *languages.map(&:key)
|
135
|
-
wl = languages.find { |w| w.key == choice }.project_wordlist
|
136
|
-
|
137
|
-
wl.add(token)
|
138
|
-
self.total_added += 1
|
139
|
-
puts "Added #{red(token)} to #{wl.name} wordlist"
|
140
|
-
throw :check_file_from, token
|
141
|
-
else
|
142
|
-
handle_add(token)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
def handle_replacement(token, original_token: token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
147
|
-
readline_editable_print(token.chomp)
|
148
|
-
highlighted_token = token == original_token ? red(token) : token.highlight(original_token.char_range)
|
149
|
-
puts "#{aqua '>>'} #{highlighted_token.chomp}"
|
150
|
-
prompt = "#{aqua '=>'} "
|
151
|
-
replacement = Readline.readline(prompt)
|
152
|
-
if replacement.empty?
|
153
|
-
call(token)
|
154
|
-
else
|
155
|
-
full_replacement = token == original_token ? replacement : replacement + "\n"
|
156
|
-
token.replace(full_replacement)
|
157
|
-
yield replacement if block_given?
|
158
|
-
self.total_fixed += 1
|
159
|
-
puts "Replaced #{red(token.chomp)} with #{green(replacement.chomp)}"
|
160
|
-
throw :check_file_from, token
|
161
|
-
end
|
162
|
-
rescue Interrupt
|
163
|
-
puts '^C again to exit'
|
164
|
-
call(original_token)
|
165
|
-
end
|
166
|
-
|
167
|
-
def handle_replace_line(token)
|
168
|
-
handle_replacement(
|
169
|
-
token.line,
|
170
|
-
original_token: token
|
171
|
-
)
|
172
|
-
end
|
173
|
-
|
174
|
-
def handle_help(token) # rubocop:disable Metrics/AbcSize
|
120
|
+
def handle_help(token) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
175
121
|
puts "#{bold '[r]'} Replace #{red token}"
|
176
122
|
puts "#{bold '[R]'} Replace all future instances of #{red token}"
|
177
123
|
puts "#{bold '[s]'} Skip #{red token}"
|
@@ -181,16 +127,5 @@ module Spellr
|
|
181
127
|
puts "#{bold '[?]'} Show this help"
|
182
128
|
handle_response(token)
|
183
129
|
end
|
184
|
-
|
185
|
-
def readline_editable_print(string)
|
186
|
-
Readline.pre_input_hook = lambda {
|
187
|
-
Readline.refresh_line
|
188
|
-
Readline.insert_text string.to_s
|
189
|
-
Readline.redisplay
|
190
|
-
|
191
|
-
# Remove the hook right away.
|
192
|
-
Readline.pre_input_hook = nil
|
193
|
-
}
|
194
|
-
end
|
195
130
|
end
|
196
131
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'string_format'
|
4
|
+
|
5
|
+
module Spellr
|
6
|
+
class InteractiveAdd
|
7
|
+
include Spellr::StringFormat
|
8
|
+
|
9
|
+
attr_reader :token, :reporter
|
10
|
+
|
11
|
+
def initialize(token, reporter)
|
12
|
+
@token = token
|
13
|
+
@reporter = reporter
|
14
|
+
|
15
|
+
ask_wordlist
|
16
|
+
end
|
17
|
+
|
18
|
+
def languages
|
19
|
+
@languages ||= Spellr.config.languages_for(token.location.file.to_path)
|
20
|
+
end
|
21
|
+
|
22
|
+
def language_keys
|
23
|
+
@language_keys ||= @languages.map(&:key)
|
24
|
+
end
|
25
|
+
|
26
|
+
def ask_wordlist
|
27
|
+
puts "Add #{red(token)} to wordlist:"
|
28
|
+
|
29
|
+
languages.each do |language|
|
30
|
+
puts "[#{language.key}] #{language.name}"
|
31
|
+
end
|
32
|
+
|
33
|
+
handle_wordlist_choice(reporter.stdin_getch)
|
34
|
+
end
|
35
|
+
|
36
|
+
def handle_ctrl_c
|
37
|
+
puts '^C again to exit'
|
38
|
+
reporter.call(token)
|
39
|
+
end
|
40
|
+
|
41
|
+
def handle_wordlist_choice(choice) # rubocop:disable Metrics/MethodLength
|
42
|
+
case choice
|
43
|
+
when "\u0003"
|
44
|
+
handle_ctrl_c
|
45
|
+
when *language_keys
|
46
|
+
add_to_wordlist(choice)
|
47
|
+
else
|
48
|
+
ask_wordlist
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_to_wordlist(choice)
|
53
|
+
wordlist = languages.find { |w| w.key == choice }.project_wordlist
|
54
|
+
wordlist << token
|
55
|
+
reporter.increment(:total_added)
|
56
|
+
puts "Added #{red(token)} to #{wordlist.name} wordlist"
|
57
|
+
throw :check_file_from, token
|
58
|
+
end
|
59
|
+
|
60
|
+
def puts(str)
|
61
|
+
reporter.puts(str)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spellr
|
4
|
+
class InteractiveReplacement
|
5
|
+
include Spellr::StringFormat
|
6
|
+
|
7
|
+
attr_reader :token, :reporter, :original_token, :token_highlight, :suffix
|
8
|
+
|
9
|
+
def initialize(token, reporter)
|
10
|
+
@original_token = @token = token
|
11
|
+
@token_highlight = red(token)
|
12
|
+
@reporter = reporter
|
13
|
+
Readline.input = reporter.output.stdin
|
14
|
+
Readline.output = reporter.output.stdout
|
15
|
+
end
|
16
|
+
|
17
|
+
def global_replace
|
18
|
+
replace { |replacement| reporter.global_replacements[token.to_s] = replacement }
|
19
|
+
end
|
20
|
+
|
21
|
+
def replace_line
|
22
|
+
@token = token.line
|
23
|
+
@token_highlight = token.highlight(original_token.char_range).chomp
|
24
|
+
@suffix = "\n"
|
25
|
+
|
26
|
+
replace
|
27
|
+
end
|
28
|
+
|
29
|
+
def complete_replacement(replacement)
|
30
|
+
token.replace("#{replacement}#{suffix}")
|
31
|
+
|
32
|
+
reporter.increment(:total_fixed)
|
33
|
+
puts "Replaced #{red(token.chomp)} with #{green(replacement)}"
|
34
|
+
throw :check_file_from, token
|
35
|
+
end
|
36
|
+
|
37
|
+
def replace # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
38
|
+
readline_editable_print(token.chomp)
|
39
|
+
|
40
|
+
puts "#{aqua '>>'} #{token_highlight}"
|
41
|
+
replacement = Readline.readline("#{aqua '=>'} ")
|
42
|
+
|
43
|
+
return reporter.call(token) if replacement.empty?
|
44
|
+
|
45
|
+
yield replacement if block_given?
|
46
|
+
complete_replacement(replacement)
|
47
|
+
rescue Interrupt
|
48
|
+
puts '^C again to exit'
|
49
|
+
reporter.call(original_token)
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def readline_editable_print(string) # rubocop:disable Metrics/MethodLength
|
55
|
+
Readline.pre_input_hook = lambda {
|
56
|
+
Readline.refresh_line
|
57
|
+
Readline.insert_text string.to_s
|
58
|
+
Readline.redisplay
|
59
|
+
|
60
|
+
# Remove the hook right away.
|
61
|
+
Readline.pre_input_hook = nil
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
def puts(str)
|
66
|
+
reporter.puts(str)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -6,26 +6,22 @@ require 'yaml'
|
|
6
6
|
# this is lifted in whole from this article. i don't understand the maths and i don't want to
|
7
7
|
# https://www.sitepoint.com/machine-learning-ruby-naive-bayes-theorem/
|
8
8
|
|
9
|
-
class NaiveBayes
|
10
|
-
include Stats
|
11
|
-
|
9
|
+
class NaiveBayes
|
12
10
|
YAML_PATH = File.join(__dir__, 'data.yml')
|
13
11
|
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
data[key_key] << key.features
|
23
|
-
end
|
12
|
+
def initialize(path = YAML_PATH)
|
13
|
+
load_from_yaml(path) if File.exist?(path)
|
14
|
+
end
|
15
|
+
|
16
|
+
def key?(string)
|
17
|
+
@key ||= {}
|
18
|
+
@key.fetch(string) do
|
19
|
+
@key[string] = classify(PossibleKey.new(string).features).start_with?('key')
|
24
20
|
end
|
25
21
|
end
|
26
22
|
|
27
|
-
def load_from_yaml
|
28
|
-
data = YAML.safe_load(::File.read(
|
23
|
+
def load_from_yaml(path = YAML_PATH)
|
24
|
+
data = YAML.safe_load(::File.read(path), [Symbol])
|
29
25
|
|
30
26
|
@feature_set = data[:feature_set]
|
31
27
|
@num_classes = data[:num_classes]
|
@@ -33,18 +29,27 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
33
29
|
@features = data[:features]
|
34
30
|
end
|
35
31
|
|
36
|
-
def save_to_yaml
|
32
|
+
def save_to_yaml(path = YAML_PATH)
|
33
|
+
write_yaml(path,
|
34
|
+
feature_set: feature_set,
|
35
|
+
num_classes: num_classes,
|
36
|
+
classes: classes,
|
37
|
+
features: features)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def write_yaml(path = YAML_PATH, **hash)
|
37
43
|
require 'yaml'
|
38
|
-
|
39
|
-
|
40
|
-
num_classes: num_classes,
|
41
|
-
classes: classes,
|
42
|
-
features: features
|
43
|
-
}.to_yaml)
|
44
|
+
|
45
|
+
File.write(path, hash.to_yaml)
|
44
46
|
end
|
45
47
|
|
46
|
-
def
|
47
|
-
|
48
|
+
def training_data
|
49
|
+
@training_data ||= PossibleKey.keys.each_with_object({}) do |key, data|
|
50
|
+
data[key.classification] ||= []
|
51
|
+
data[key.classification] << key.features
|
52
|
+
end
|
48
53
|
end
|
49
54
|
|
50
55
|
def num_classes
|
@@ -59,51 +64,35 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
59
64
|
@features ||= training_data.first.last.first.keys
|
60
65
|
end
|
61
66
|
|
62
|
-
def feature_set
|
67
|
+
def feature_set
|
63
68
|
@feature_set ||= classes.each.with_object({}) do |class_name, feature_set|
|
64
|
-
feature_set[class_name] = {}
|
65
|
-
|
66
|
-
features.each do |feature|
|
67
|
-
values = training_data[class_name].map do |row|
|
68
|
-
row[feature]
|
69
|
-
end
|
70
|
-
|
71
|
-
feature_set[class_name][feature] = {
|
72
|
-
standard_deviation: standard_deviation(values),
|
73
|
-
mean: mean(values),
|
74
|
-
variance: variance(values)
|
75
|
-
}
|
69
|
+
feature_set[class_name] = features.each.with_object({}) do |feature, feature_set_for_class|
|
70
|
+
feature_set_for_class[feature] = feature_stats_for_class(class_name, feature)
|
76
71
|
end
|
77
72
|
end
|
78
73
|
end
|
79
74
|
|
75
|
+
def feature_stats_for_class(class_name, feature)
|
76
|
+
values = training_data[class_name].map { |row| row[feature] }
|
77
|
+
|
78
|
+
feature_stats(values)
|
79
|
+
end
|
80
|
+
|
81
|
+
def feature_stats(values)
|
82
|
+
{
|
83
|
+
standard_deviation: Stats.standard_deviation(values),
|
84
|
+
mean: Stats.mean(values),
|
85
|
+
variance: Stats.variance(values)
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
80
89
|
# given a class, this method determines the probability
|
81
90
|
# of a certain value occurring for a given feature
|
82
|
-
#
|
91
|
+
# feature: name of the feature in consideration in the training data
|
83
92
|
# value: the value of the feature for which we are finding the probability
|
84
93
|
# class_name: name of the class in consideration
|
85
|
-
def feature_probability(feature, value, class_name)
|
86
|
-
|
87
|
-
fs = feature_set[class_name][feature]
|
88
|
-
|
89
|
-
# statistical properties of the feature set
|
90
|
-
fs_std = fs[:standard_deviation]
|
91
|
-
fs_mean = fs[:mean]
|
92
|
-
fs_var = fs[:variance]
|
93
|
-
|
94
|
-
# deal with the edge case of a 0 standard deviation
|
95
|
-
if fs_std == 0
|
96
|
-
return fs_mean == value ? 1.0 : 0.0
|
97
|
-
end
|
98
|
-
|
99
|
-
# calculate the gaussian probability
|
100
|
-
pi = Math::PI
|
101
|
-
e = Math::E
|
102
|
-
|
103
|
-
exp = -((value - fs_mean)**2) / (2 * fs_var)
|
104
|
-
probability = (1.0 / Math.sqrt(2 * pi * fs_var)) * (e**exp)
|
105
|
-
|
106
|
-
probability
|
94
|
+
def feature_probability(feature, value, class_name)
|
95
|
+
Stats.gaussian_probability(value, feature_set[class_name][feature])
|
107
96
|
end
|
108
97
|
|
109
98
|
# multiply together the feature probabilities for all of the
|
@@ -114,24 +103,6 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
114
103
|
end
|
115
104
|
end
|
116
105
|
|
117
|
-
def debug(string) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
118
|
-
require 'terminal-table'
|
119
|
-
|
120
|
-
features = PossibleKey.new(string).features
|
121
|
-
|
122
|
-
table = Terminal::Table.new do |t|
|
123
|
-
t << ['classes'] + classes
|
124
|
-
t << :separator
|
125
|
-
t << ['probabilities'] + classes.map { |c| class_probability(features, c) }
|
126
|
-
features.each do |key, value|
|
127
|
-
t << [key] + classes.map { |c| feature_probability(key, value, c).round(4) }
|
128
|
-
end
|
129
|
-
end
|
130
|
-
puts table
|
131
|
-
|
132
|
-
nil
|
133
|
-
end
|
134
|
-
|
135
106
|
# this is where we compute the final naive Bayesian probability
|
136
107
|
# for a given set of features being a part of a given class.
|
137
108
|
def class_probability(features, class_name)
|
@@ -141,22 +112,9 @@ class NaiveBayes # rubocop:disable Metrics/ClassLength
|
|
141
112
|
feature_bayes * class_fraction
|
142
113
|
end
|
143
114
|
|
144
|
-
# This the method we should be calling!
|
145
|
-
# Given a set of feature values, it decides
|
146
|
-
# what class to categorize them under
|
147
115
|
def classify(features)
|
148
116
|
classes.max_by do |class_name|
|
149
117
|
class_probability(features, class_name)
|
150
118
|
end
|
151
119
|
end
|
152
|
-
|
153
|
-
def key?(string)
|
154
|
-
key_cache[string]
|
155
|
-
end
|
156
|
-
|
157
|
-
def key_cache
|
158
|
-
@key_cache ||= Hash.new do |cache, string|
|
159
|
-
cache[string] = classify(PossibleKey.new(string).features).start_with?('key')
|
160
|
-
end
|
161
|
-
end
|
162
120
|
end
|
@@ -6,23 +6,35 @@ require_relative 'stats'
|
|
6
6
|
class PossibleKey # rubocop:disable Metrics/ClassLength
|
7
7
|
include Stats
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
VOWELS = %i{
|
10
|
+
a e i o u
|
11
|
+
A E I O U
|
12
|
+
}.freeze
|
13
|
+
CONSONANTS = %i{
|
14
|
+
b c d f g h j k l m n p q r s t v w x y z
|
15
|
+
B C D F G H J K L M N P Q R S T V W X Y Z
|
16
|
+
}.freeze
|
17
|
+
BASE_64 = VOWELS + CONSONANTS + %i{0 1 2 3 4 5 6 7 8 9 - _ + / =}.freeze
|
18
|
+
LETTER_COUNT_HASH = BASE_64.map { |k| [k.to_sym, 0] }.to_h
|
19
|
+
FEATURE_LETTERS = %i{+ - _ / A z Z q Q X x}.freeze
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
21
|
+
class << self
|
22
|
+
def keys
|
23
|
+
@keys ||= begin
|
24
|
+
load_from_file('false_positives.txt', false) +
|
25
|
+
load_from_file('keys.txt', true)
|
26
|
+
end
|
20
27
|
end
|
21
28
|
|
22
|
-
|
23
|
-
next if line.chomp.empty?
|
29
|
+
private
|
24
30
|
|
25
|
-
|
31
|
+
def load_from_file(filename, key)
|
32
|
+
Pathname.new(__dir__).join('data', filename).each_line.map! do |line|
|
33
|
+
line = line.chomp
|
34
|
+
next if line.empty?
|
35
|
+
|
36
|
+
PossibleKey.new(line, key)
|
37
|
+
end.compact
|
26
38
|
end
|
27
39
|
end
|
28
40
|
|
@@ -35,7 +47,7 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
35
47
|
|
36
48
|
def features # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
37
49
|
{
|
38
|
-
**
|
50
|
+
**letter_frequency_difference_features,
|
39
51
|
equal: letter_count[:'='],
|
40
52
|
length: length,
|
41
53
|
hex: character_set == :hex ? 1 : 0,
|
@@ -69,24 +81,20 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
69
81
|
@key
|
70
82
|
end
|
71
83
|
|
84
|
+
def classification
|
85
|
+
key_class = key? ? 'key' : 'not_key'
|
86
|
+
"#{key_class}_#{character_set}"
|
87
|
+
end
|
88
|
+
|
72
89
|
def length
|
73
90
|
string.length
|
74
91
|
end
|
75
92
|
|
76
|
-
|
77
|
-
|
78
|
-
def significant_letter_frequency_difference
|
79
|
-
letter_frequency_difference.slice(*SIGNIFICANT_LETTERS)
|
80
|
-
end
|
81
|
-
else
|
82
|
-
def significant_letter_frequency_difference
|
83
|
-
letter_frequency_difference.each.with_object({}) do |key, value, hash|
|
84
|
-
hash[key] = value if SIGNIFICANT_LETTERS.include?(key)
|
85
|
-
end
|
86
|
-
end
|
93
|
+
def letter_frequency_difference_features
|
94
|
+
letter_frequency_difference.slice(*FEATURE_LETTERS)
|
87
95
|
end
|
88
96
|
|
89
|
-
def character_set
|
97
|
+
def character_set # rubocop:disable Metrics/MethodLength
|
90
98
|
@character_set ||= case string
|
91
99
|
when /^[a-fA-F0-9\-]+$/ then :hex
|
92
100
|
when /^[a-z0-9]+$/ then :lower36
|
@@ -97,7 +105,7 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
97
105
|
end
|
98
106
|
end
|
99
107
|
|
100
|
-
def character_set_total
|
108
|
+
def character_set_total # rubocop:disable Metrics/MethodLength
|
101
109
|
case character_set
|
102
110
|
when :hex then 16
|
103
111
|
when :lower36 then 36
|
@@ -110,11 +118,9 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
110
118
|
1.0 / character_set_total * length
|
111
119
|
end
|
112
120
|
|
113
|
-
LETTER_COUNT_HASH = (('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a + %w{+ _ / - =})
|
114
|
-
.map { |k| [k.to_sym, 0] }.to_h
|
115
121
|
def letter_count
|
116
122
|
@letter_count ||= begin
|
117
|
-
string.chars.
|
123
|
+
string.chars.each_with_object(LETTER_COUNT_HASH.dup) do |letter, hash|
|
118
124
|
hash[letter.to_sym] += 1
|
119
125
|
end
|
120
126
|
end
|
@@ -136,8 +142,6 @@ class PossibleKey # rubocop:disable Metrics/ClassLength
|
|
136
142
|
end
|
137
143
|
end
|
138
144
|
|
139
|
-
VOWELS = %i{a e i o u A E I O U}.freeze
|
140
|
-
CONSONANTS = %i{b c d f g h j k l m n p q r s t v w x y z B C D F G H J K L M N P Q R S T V W X Y Z}.freeze
|
141
145
|
def vowel_consonant_ratio
|
142
146
|
vowels = letter_count.fetch_values(*VOWELS).sum
|
143
147
|
consonants = letter_count.fetch_values(*CONSONANTS).sum
|