substitution_solver 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/dictionary_builder.rb +13 -0
- data/dictionary_inspector.rb +15 -0
- data/substitution_solver.rb +124 -63
- metadata +4 -4
data/dictionary_builder.rb
CHANGED
@@ -1,4 +1,17 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# This program basically is responsible for building a new english.dic file from
|
3
|
+
# a source ascii text file. Feeding it a novel is probably the best idea. You
|
4
|
+
# want the text to be plain english, the more of it the better.
|
5
|
+
# the format of this command is
|
6
|
+
# ruby dictionary_builder.rb <ascii_filename>
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
require 'rdoc/usage'
|
10
|
+
|
11
|
+
opts = OptionParser.new
|
12
|
+
opts.on("-h", "--help") {RDoc::usage}
|
13
|
+
|
14
|
+
opts.parse(ARGV)
|
2
15
|
|
3
16
|
hash = Hash.new(0)
|
4
17
|
|
data/dictionary_inspector.rb
CHANGED
@@ -1,4 +1,19 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# This program is so that you can examine the contents of a english.dic file.
|
3
|
+
# The file is a binary file that is in ruby's marshal format.
|
4
|
+
# The usage of this command is
|
5
|
+
# ruby dictionary_inspector.rb
|
6
|
+
#
|
7
|
+
# you don't need to supply a filename, it assumes that english.dic is in the
|
8
|
+
# currnet directory.
|
9
|
+
|
10
|
+
require 'optparse'
|
11
|
+
require 'rdoc/usage'
|
12
|
+
|
13
|
+
opts = OptionParser.new
|
14
|
+
opts.on("-h", "--help") {RDoc::usage}
|
15
|
+
|
16
|
+
opts.parse(ARGV)
|
2
17
|
|
3
18
|
$dictionary = Hash.new(0) # The dictionary of tetragraph frequencies
|
4
19
|
|
data/substitution_solver.rb
CHANGED
@@ -1,24 +1,37 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# This program is for solving simple substitution ciphers such as the
|
3
|
+
# cryptoquotes found in the newspaper. The usage of the command is as follows
|
4
|
+
# ruby substitution_solver.rb <filename>
|
5
|
+
#
|
6
|
+
# where <filename> is the name of an ascii text file that contains the
|
7
|
+
# ciphertext that you would like to retrieve the plaintext of
|
8
|
+
#
|
9
|
+
# this command requires that english.dic be in the current working directory in
|
10
|
+
# order to function properly.
|
11
|
+
#
|
12
|
+
# also be aware that this program will never return, it has no way of knowing
|
13
|
+
# when it has achieved the correct answer, so you must hit CTRL-C to exith the
|
14
|
+
# program
|
2
15
|
|
3
|
-
|
4
|
-
|
16
|
+
require 'optparse'
|
17
|
+
require 'rdoc/usage'
|
5
18
|
|
6
|
-
|
19
|
+
opts = OptionParser.new
|
20
|
+
opts.on("-h", "--help") {RDoc::usage}
|
7
21
|
|
8
|
-
|
9
|
-
ciphertext << line
|
10
|
-
end
|
22
|
+
opts.parse(ARGV)
|
11
23
|
|
12
|
-
|
24
|
+
$iteration = 0 # To record how many iterations the programs
|
25
|
+
# had to churn through
|
13
26
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
def score(string)
|
27
|
+
# this function is responsible for scoring a string against the tetragraph
|
28
|
+
# statistics
|
29
|
+
# Parameters:
|
30
|
+
# * string = the string that you want to score
|
31
|
+
# Return Value:
|
32
|
+
# * A number representing the score, the higher the score, the better (ie more
|
33
|
+
# likely to be english)
|
34
|
+
def score(string)
|
22
35
|
$iteration += 1 # Increment the iteration count as this is probably the most fundamental loop to the program
|
23
36
|
tally = 0 # Set a counter to 0
|
24
37
|
0.upto(string.length-4) do |x| # Iterate through the string
|
@@ -27,7 +40,14 @@ def score(string) # This funct
|
|
27
40
|
return tally # and return our grand total when we're finished adding it all up
|
28
41
|
end
|
29
42
|
|
30
|
-
|
43
|
+
# this function makes small random adjustments to the key when we've hill
|
44
|
+
# climbed our way into a dead end
|
45
|
+
# Parameters:
|
46
|
+
# * key = A hash that represents the current translation mapping from ciphertext
|
47
|
+
# to plaintext
|
48
|
+
# Return Value:
|
49
|
+
# * none
|
50
|
+
def small_adj!(key)
|
31
51
|
for i in 0...rand(5) # pick a random number of changes to make
|
32
52
|
j = rand(26) # now pick two random letters in the alphabet to swap
|
33
53
|
k = rand(26)
|
@@ -38,8 +58,16 @@ def small_adj!(key) # this funct
|
|
38
58
|
end
|
39
59
|
end
|
40
60
|
end
|
41
|
-
|
42
|
-
|
61
|
+
|
62
|
+
# This function will return the decoded ciphertext using a given key to do the
|
63
|
+
# decoding
|
64
|
+
# Parameters:
|
65
|
+
# * ciphertext = A string that represents the ciphertext
|
66
|
+
# * key = A hash that represents how to translate the ciphertext into
|
67
|
+
# plaintext
|
68
|
+
# Return Value:
|
69
|
+
# * Returns the deciphered plaintext according to the key that was supplied.
|
70
|
+
def plaintext(ciphertext, key)
|
43
71
|
return_string = String.new # create a return string
|
44
72
|
|
45
73
|
for x in 0...ciphertext.length # loop through the ciphertext
|
@@ -47,8 +75,13 @@ def plaintext(ciphertext, key) # This funct
|
|
47
75
|
end
|
48
76
|
return return_string # return the answer
|
49
77
|
end
|
50
|
-
|
51
|
-
|
78
|
+
|
79
|
+
# completely randomize the key, ie start over from scratch
|
80
|
+
# Parameters:
|
81
|
+
# key = key as a Hash that needs to be randomized
|
82
|
+
# Return Value:
|
83
|
+
# none
|
84
|
+
def randomize!(key)
|
52
85
|
array = Array.new # create an array of letters to pick from
|
53
86
|
|
54
87
|
for x in 0...26
|
@@ -62,52 +95,80 @@ def randomize!(key) # completely
|
|
62
95
|
end
|
63
96
|
end
|
64
97
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
98
|
+
# This function is the main entry point for this program, it is responsible for
|
99
|
+
# implementing the main algorithm that solves the simple substitution cipher.
|
100
|
+
# It will not return. The only way to quite out of this function at present is
|
101
|
+
# to hit CTRL-C.
|
102
|
+
# Parameters:
|
103
|
+
# ciphertext = the ciphertext that you are trying to decipher
|
104
|
+
# Return Value:
|
105
|
+
# none (presently the function never returns)
|
106
|
+
def substitution_solver(ciphertext)
|
73
107
|
|
74
|
-
|
75
|
-
best_adj = best_score # set the best adjustment to the current best score
|
108
|
+
ciphertext.gsub!(/[^a-zA-Z]/, "").upcase! # get rid of any non-alphabetic characters
|
76
109
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
test_key[(j+65).chr] = temp
|
83
|
-
sc = score(plaintext(ciphertext, test_key)) # score the change we've made
|
84
|
-
if sc > best_adj # if it's better than any so far
|
85
|
-
best_adj=sc # then record the change so we can apply it later if it
|
86
|
-
best_i = i # turns out to be the best one
|
87
|
-
best_j = j
|
88
|
-
end
|
89
|
-
end
|
110
|
+
key = Hash.new # Create a hash that will represent the translation key
|
111
|
+
|
112
|
+
$dictionary = Hash.new(0) # The dictionary of tetragraph frequencies
|
113
|
+
File.open("english.dic") do |f| # Open the saved tetragraph information
|
114
|
+
$dictionary = Marshal.load(f) # And load this information into our dictionary
|
90
115
|
end
|
91
116
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
117
|
+
print "best overall = ", score(ciphertext), " : best score = ", score(ciphertext), "\n" #print the original ciphertext
|
118
|
+
puts ciphertext.gsub(/(.....)/, '\1 ')
|
119
|
+
|
120
|
+
randomize!(key) # randomize the key
|
121
|
+
|
122
|
+
best_score=score(ciphertext); # set the best score to the score of the ciphertext
|
123
|
+
best_overall=best_score-1; # set the best overall score to the best score -1
|
124
|
+
num_small_adjusts=0; # set the number of small adjustments to 0
|
125
|
+
|
126
|
+
loop do # loop forever
|
127
|
+
best_adj = best_score # set the best adjustment to the current best score
|
128
|
+
|
129
|
+
for i in 0...26 # loop through all possible "trivial" letter replacements
|
130
|
+
for j in i...26 # in the key looking for the best swap. This in effect is
|
131
|
+
test_key = key.dup # the so called "Hill Climbing" part of our program
|
132
|
+
temp = test_key[(i+65).chr]
|
133
|
+
test_key[(i+65).chr] = test_key[(j+65).chr]
|
134
|
+
test_key[(j+65).chr] = temp
|
135
|
+
sc = score(plaintext(ciphertext, test_key)) # score the change we've made
|
136
|
+
if sc > best_adj # if it's better than any so far
|
137
|
+
best_adj=sc # then record the change so we can apply it later if it
|
138
|
+
best_i = i # turns out to be the best one
|
139
|
+
best_j = j
|
140
|
+
end
|
141
|
+
end
|
102
142
|
end
|
103
|
-
|
104
|
-
if
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
143
|
+
|
144
|
+
if best_adj > best_score # if we found an adjustment that improves the best score
|
145
|
+
temp = key[(best_i+65).chr] # then apply that adjustment to the key
|
146
|
+
key[(best_i+65).chr] = key[(best_j+65).chr]
|
147
|
+
key[(best_j+65).chr] = temp
|
148
|
+
best_score = best_adj
|
149
|
+
if best_score > best_overall # if that adjustment is the best overall
|
150
|
+
num_small_adjusts = 0 # then reset the number of small adjusts counter
|
151
|
+
best_overall = best_score # set this new score as the best overall
|
152
|
+
print "best overall = ", best_overall, " : best score = ", best_score, " : iteration = #{$iteration}\n"
|
153
|
+
puts plaintext(ciphertext, key).gsub(/(.....)/, '\1 ') # and print our new found best overall value
|
154
|
+
end
|
155
|
+
else # otherwise none of the adjustments raised are score
|
156
|
+
if num_small_adjusts < 10 # so make a small random adjustment to the key
|
157
|
+
small_adj!(key) # as long as we haven't already made to many small adjustments
|
158
|
+
num_small_adjusts += 1 # increment the number of small adjustments
|
159
|
+
else # otherwise we've made to many small adjustments, we're
|
160
|
+
randomize!(key) # probably not getting anywhere and need to start looking
|
161
|
+
num_small_adjusts = 0 # somplace else, randomize the key and start climbing the
|
162
|
+
end # hill again
|
163
|
+
best_score=score(plaintext(ciphertext, key)) # set the best score to either the small adjustment value or the new randomized string value depending on what we did above.
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
ciphertext = String.new
|
169
|
+
|
170
|
+
File::readlines(ARGV[0]).each do |line| # Grab the input from the standard input
|
171
|
+
ciphertext << line
|
113
172
|
end
|
173
|
+
|
174
|
+
substitution_solver(ciphertext) # start the program on it's main loop
|
metadata
CHANGED
@@ -3,12 +3,12 @@ rubygems_version: 0.8.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: substitution_solver
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.5.
|
7
|
-
date: 2005-11-
|
6
|
+
version: 0.5.1
|
7
|
+
date: 2005-11-17
|
8
8
|
summary: "Program for solving mono-alphabetic simple substitution ciphers, (as in
|
9
9
|
cryptoquotes), without word lengths."
|
10
10
|
require_paths:
|
11
|
-
-
|
11
|
+
- "."
|
12
12
|
email: pfharlock@yahoo.com
|
13
13
|
homepage:
|
14
14
|
rubyforge_project:
|
@@ -16,7 +16,7 @@ description:
|
|
16
16
|
autorequire:
|
17
17
|
default_executable:
|
18
18
|
bindir: "."
|
19
|
-
has_rdoc:
|
19
|
+
has_rdoc: true
|
20
20
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
21
|
requirements:
|
22
22
|
-
|