wheeler 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +88 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/bin/wheeler +30 -0
- data/lib/wheeler/version.rb +3 -0
- data/lib/wheeler.rb +125 -0
- data/samples/adventure_of_the_speckled_band.txt +497 -0
- data/samples/dict.txt +2013 -0
- data/samples/puzzles.txt +3305 -0
- data/wheeler.gemspec +26 -0
- metadata +118 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d4596a6f0a690fab701f82468b023a03379c57df
|
4
|
+
data.tar.gz: ac02c86ac3aea93799b5b4a2ff3b75c0bae983be
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9f7ec98508653b28cac3369877b4916b8fbae51deb524b686a3f6f57641ec91051720c3e0d554bbc032b1c7de691e8be471ff10eb1b7908f670090a8fc1508e1
|
7
|
+
data.tar.gz: 04c1613827999da49e2aa60cad70fabd1c4e6bf2d2426f834ddd84c29b80dac3147fc6d288c0823841c7df4115373137749f369d99563da540cc32383a001250
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.7
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Scott Pierce
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# Wheeler
|
2
|
+
|
3
|
+
This is a map/reduce proof of concept with a little bit of fun thrown in.
|
4
|
+
|
5
|
+
Wheeler is a naive Wheel of Fortune solver. It does this by indexing sampled
|
6
|
+
text from where ever into every possible contiguous combination of words up
|
7
|
+
to a max phrase length.
|
8
|
+
|
9
|
+
The mapper in this project is `wheeler phrases`. Again, this is a naive program.
|
10
|
+
The phrase parser don't use any NLP or grammar logic. It simply splits words
|
11
|
+
by spaces, period, and double quotes. After splitting it outputs joined sets
|
12
|
+
of those words in descending word count.
|
13
|
+
|
14
|
+
The reducer is `wheeler reduce`. It build the phrase index into the `.index`
|
15
|
+
directory inside this project. Maybe we'll make it an arg in the future.
|
16
|
+
The index structure is:
|
17
|
+
1st word size/2nd word size/3rd word size/etc.../phrases
|
18
|
+
|
19
|
+
So for the phrase: "I love cats". An index entry would be written into
|
20
|
+
`.index/1/4/4/phrases`. The contents of phrases would be 'I LOVE CATS'
|
21
|
+
|
22
|
+
After the index are built, the guess matching is almost trivial.
|
23
|
+
|
24
|
+
1. receive a puzzle with underscores and known letters. For example: `_ ____ ___s`
|
25
|
+
2. get the word counts of the puzzle. 1, 4, 4.
|
26
|
+
3. find the phrases file based on the word counts. `.index/1/4/4/phrases`
|
27
|
+
4. perform a simple pattern match in that file. `grep -e '. .... ....' .index/1/4/4/phrases`
|
28
|
+
|
29
|
+
The `wheeler guess` script will do steps 2-4 automatically: `wheeler guess '_ ____ ___s'`
|
30
|
+
|
31
|
+
The more texts indexed, the better chance of solving any given puzzle. This solver
|
32
|
+
is only as good as the index it compiles.
|
33
|
+
|
34
|
+
## Build Index, Usage Step by Step
|
35
|
+
|
36
|
+
# Map text to phrases
|
37
|
+
$ wheeler phrases samples/adventure_of_the_speckled_band.txt 5 > tmp/phrases-unsorted.txt
|
38
|
+
|
39
|
+
# Sort the mapped phrases
|
40
|
+
$ sort tmp/phrases-unsorted.txt > tmp/phrases-sorted.txt
|
41
|
+
|
42
|
+
# Reduce to counts, this writes results to .index
|
43
|
+
$ wheeler reduce tmp/phrases-sorted.txt
|
44
|
+
|
45
|
+
# view phrase indexes
|
46
|
+
$ find .index -name phrases
|
47
|
+
|
48
|
+
# view contents of a phrase
|
49
|
+
$ less .index/9/8/4/9/3/phrases
|
50
|
+
|
51
|
+
## Solve a Puzzle
|
52
|
+
|
53
|
+
$ wheeler guess '____ __ _____ES'
|
54
|
+
# grep --color=always -e '.... .. .....ES' .index/4/2/7/phrases
|
55
|
+
# BAND OF GYPSIES
|
56
|
+
|
57
|
+
## Use a sample dictionary
|
58
|
+
# Map text to phrases
|
59
|
+
$ wheeler phrases samples/dict.txt 5 > tmp/phrases-unsorted.txt
|
60
|
+
|
61
|
+
# Sort the mapped phrases
|
62
|
+
$ sort tmp/phrases-unsorted.txt > tmp/phrases-sorted.txt
|
63
|
+
|
64
|
+
# Reduce to counts
|
65
|
+
$ wheeler reduce tmp/phrases-sorted.txt
|
66
|
+
|
67
|
+
## Use full English Dictionary
|
68
|
+
|
69
|
+
# Make the Index
|
70
|
+
$ curl http://www.gutenberg.org/ebooks/29765 > tmp/dictionary.txt
|
71
|
+
$ wheeler phrases tmp/dictionary.txt 5 > tmp/phrases-unsorted.txt
|
72
|
+
|
73
|
+
# Sort the mapped phrases
|
74
|
+
$ sort tmp/phrases-unsorted.txt > tmp/phrases-sorted.txt
|
75
|
+
|
76
|
+
# Reduce to counts
|
77
|
+
$ wheeler reduce tmp/phrases-sorted.txt
|
78
|
+
|
79
|
+
|
80
|
+
## Contributing
|
81
|
+
|
82
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/ddrscott/wheeler.
|
83
|
+
|
84
|
+
|
85
|
+
## License
|
86
|
+
|
87
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
88
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "wheeler"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "pry"
|
14
|
+
Pry.start
|
data/bin/setup
ADDED
data/bin/wheeler
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'wheeler'
|
5
|
+
require 'thor'
|
6
|
+
|
7
|
+
class WheelerCLI < Thor
|
8
|
+
include Wheeler
|
9
|
+
|
10
|
+
desc 'phrases SRC NUM_WORDS', 'split SRC into phrases of NUM_WORDS'
|
11
|
+
def phrases(src, words_in_phrase)
|
12
|
+
File.open(src, 'r') do |f|
|
13
|
+
map_phrases(f, words_in_phrase.to_i)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'index SRC built by phrases', 'places uniq phrases into .index directory based on word sizes'
|
18
|
+
def reduce(src)
|
19
|
+
File.open(src, 'r') do |f|
|
20
|
+
reduce_fs(f)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'guess PUZZLE', 'guess PUZZLE. PUZZLE should use _ (underscore) or . (dot) for unknown letters'
|
25
|
+
def guess(puzzle)
|
26
|
+
super(puzzle)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
WheelerCLI.start(ARGV)
|
data/lib/wheeler.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'wheeler/version'
|
2
|
+
|
3
|
+
module Wheeler
|
4
|
+
|
5
|
+
INDEX_PATH = '.index'
|
6
|
+
|
7
|
+
def each_word(io, &block)
|
8
|
+
# break on spaces instead of \n
|
9
|
+
io.each_line(' ') do |line|
|
10
|
+
# alphas with quote and period. We'll use the period as a hint for phrasing
|
11
|
+
line.scan(/\b([[:word:]]+)\b/).each do |word, _|
|
12
|
+
block.call word.upcase
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def each_phrase(io, max_words=4, &block)
|
18
|
+
words = []
|
19
|
+
each_word(io) do |word|
|
20
|
+
words << word
|
21
|
+
if word[-1] == '.'
|
22
|
+
# remove period
|
23
|
+
words[-1] = words[-1][0..-2]
|
24
|
+
|
25
|
+
cascade_words(words, &block)
|
26
|
+
|
27
|
+
# we can start a new phrase by resetting the words
|
28
|
+
words.clear
|
29
|
+
elsif words.length >= max_words
|
30
|
+
cascade_words(words, &block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
cascade_words(words, &block)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cascade_words(words, &block)
|
37
|
+
len = words.size
|
38
|
+
len.times do |i|
|
39
|
+
block.call words[0..i]
|
40
|
+
end
|
41
|
+
words.shift
|
42
|
+
end
|
43
|
+
|
44
|
+
def word_sizes(words)
|
45
|
+
words.map{|m| m.size } * ' '
|
46
|
+
end
|
47
|
+
|
48
|
+
def remove_last_punctuation_if_needed(words)
|
49
|
+
last_char = words.last[-1]
|
50
|
+
if last_char == ',' or last_char == ';'
|
51
|
+
words[-1] = words[0..-2]
|
52
|
+
end
|
53
|
+
words
|
54
|
+
end
|
55
|
+
|
56
|
+
def map_phrases(io, max_words)
|
57
|
+
each_phrase(io, max_words) do |words|
|
58
|
+
row = [
|
59
|
+
word_sizes(words),
|
60
|
+
words * ' '
|
61
|
+
]
|
62
|
+
|
63
|
+
puts row * '|'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# ensure the block is called at most once per duration
|
68
|
+
def throttle(duration=0.1, &block)
|
69
|
+
@last_time ||= Time.now
|
70
|
+
if @last_time and (Time.now - duration) > @last_time
|
71
|
+
block.call
|
72
|
+
@last_time = Time.now
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def reduce_fs(io)
|
77
|
+
last_sizes = nil
|
78
|
+
texts = []
|
79
|
+
|
80
|
+
io.each_line do |line|
|
81
|
+
|
82
|
+
sizes, text = *line.split('|')
|
83
|
+
|
84
|
+
# write out when the size of the words changes
|
85
|
+
if last_sizes and sizes != last_sizes
|
86
|
+
write_sizes(last_sizes, texts)
|
87
|
+
texts.clear
|
88
|
+
end
|
89
|
+
|
90
|
+
text = text[0..-2] # strip new line
|
91
|
+
throttle{$stderr.print "\e[0K#{sizes}|#{text}\r"} # some debug output
|
92
|
+
if texts.last != text
|
93
|
+
texts << text # text that matches the word size pattern
|
94
|
+
end
|
95
|
+
|
96
|
+
last_sizes = sizes
|
97
|
+
end
|
98
|
+
|
99
|
+
if texts.any? # make sure we write out the remaining phrases
|
100
|
+
write_sizes(last_sizes, texts)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def write_sizes(sizes, texts)
|
105
|
+
idx_path = "#{INDEX_PATH}/#{sizes.gsub(' ', '/')}"
|
106
|
+
FileUtils.mkdir_p idx_path
|
107
|
+
File.open("#{idx_path}/phrases", 'w') { |f| f << texts.join("\n") }
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param puzzle [String] Known letters in their position and `_` underscore the unknown letters
|
111
|
+
def guess(puzzle)
|
112
|
+
# split up into words and replace _ with dot
|
113
|
+
words = puzzle.split(/\s+/).map{|w| w.gsub('_', '.').upcase}
|
114
|
+
|
115
|
+
# Example: "_ ____ ____" should constuct `.index/1/4/4/phrases`
|
116
|
+
phrase_path = "#{INDEX_PATH}/#{words.map(&:size) * '/'}/phrases"
|
117
|
+
|
118
|
+
cmd = "grep --color=always -e '#{words * ' '}' #{phrase_path}"
|
119
|
+
puts cmd
|
120
|
+
puts `#{cmd}`
|
121
|
+
if $?.exitstatus == 1
|
122
|
+
puts "Phrases not found in #{phrase_path}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|