wheeler 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +88 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/bin/wheeler +30 -0
- data/lib/wheeler/version.rb +3 -0
- data/lib/wheeler.rb +125 -0
- data/samples/adventure_of_the_speckled_band.txt +497 -0
- data/samples/dict.txt +2013 -0
- data/samples/puzzles.txt +3305 -0
- data/wheeler.gemspec +26 -0
- metadata +118 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d4596a6f0a690fab701f82468b023a03379c57df
|
4
|
+
data.tar.gz: ac02c86ac3aea93799b5b4a2ff3b75c0bae983be
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9f7ec98508653b28cac3369877b4916b8fbae51deb524b686a3f6f57641ec91051720c3e0d554bbc032b1c7de691e8be471ff10eb1b7908f670090a8fc1508e1
|
7
|
+
data.tar.gz: 04c1613827999da49e2aa60cad70fabd1c4e6bf2d2426f834ddd84c29b80dac3147fc6d288c0823841c7df4115373137749f369d99563da540cc32383a001250
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.7
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Scott Pierce
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# Wheeler
|
2
|
+
|
3
|
+
This is a map/reduce proof of concept with a little bit of fun thrown in.
|
4
|
+
|
5
|
+
Wheeler is a naive Wheel of Fortune solver. It does this by indexing sampled
|
6
|
+
text from where ever into every possible contiguous combination of words up
|
7
|
+
to a max phrase length.
|
8
|
+
|
9
|
+
The mapper in this project is `wheeler phrases`. Again, this is a naive program.
|
10
|
+
The phrase parser don't use any NLP or grammar logic. It simply splits words
|
11
|
+
by spaces, period, and double quotes. After splitting it outputs joined sets
|
12
|
+
of those words in descending word count.
|
13
|
+
|
14
|
+
The reducer is `wheeler reduce`. It build the phrase index into the `.index`
|
15
|
+
directory inside this project. Maybe we'll make it an arg in the future.
|
16
|
+
The index structure is:
|
17
|
+
1st word size/2nd word size/3rd word size/etc.../phrases
|
18
|
+
|
19
|
+
So for the phrase: "I love cats". An index entry would be written into
|
20
|
+
`.index/1/4/4/phrases`. The contents of phrases would be 'I LOVE CATS'
|
21
|
+
|
22
|
+
After the index are built, the guess matching is almost trivial.
|
23
|
+
|
24
|
+
1. receive a puzzle with underscores and known letters. For example: `_ ____ ___s`
|
25
|
+
2. get the word counts of the puzzle. 1, 4, 4.
|
26
|
+
3. find the phrases file based on the word counts. `.index/1/4/4/phrases`
|
27
|
+
4. perform a simple pattern match in that file. `grep -e '. .... ....' .index/1/4/4/phrases`
|
28
|
+
|
29
|
+
The `wheeler guess` script will do steps 2-4 automatically: `wheeler guess '_ ____ ___s'`
|
30
|
+
|
31
|
+
The more texts indexed, the better chance of solving any given puzzle. This solver
|
32
|
+
is only as good as the index it compiles.
|
33
|
+
|
34
|
+
## Build Index, Usage Step by Step
|
35
|
+
|
36
|
+
# Map text to phrases
|
37
|
+
$ wheeler phrases samples/adventure_of_the_speckled_band.txt 5 > tmp/phrases-unsorted.txt
|
38
|
+
|
39
|
+
# Sort the mapped phrases
|
40
|
+
$ sort tmp/phrases-unsorted.txt > tmp/phrases-sorted.txt
|
41
|
+
|
42
|
+
# Reduce to counts, this writes results to .index
|
43
|
+
$ wheeler reduce tmp/phrases-sorted.txt
|
44
|
+
|
45
|
+
# view phrase indexes
|
46
|
+
$ find .index -name phrases
|
47
|
+
|
48
|
+
# view contents of a phrase
|
49
|
+
$ less .index/9/8/4/9/3/phrases
|
50
|
+
|
51
|
+
## Solve a Puzzle
|
52
|
+
|
53
|
+
$ wheeler guess '____ __ _____ES'
|
54
|
+
# grep --color=always -e '.... .. .....ES' .index/4/2/7/phrases
|
55
|
+
# BAND OF GYPSIES
|
56
|
+
|
57
|
+
## Use a sample dictionary
|
58
|
+
# Map text to phrases
|
59
|
+
$ wheeler phrases samples/dict.txt 5 > tmp/phrases-unsorted.txt
|
60
|
+
|
61
|
+
# Sort the mapped phrases
|
62
|
+
$ sort tmp/phrases-unsorted.txt > tmp/phrases-sorted.txt
|
63
|
+
|
64
|
+
# Reduce to counts
|
65
|
+
$ wheeler reduce tmp/phrases-sorted.txt
|
66
|
+
|
67
|
+
## Use full English Dictionary
|
68
|
+
|
69
|
+
# Make the Index
|
70
|
+
$ curl http://www.gutenberg.org/ebooks/29765 > tmp/dictionary.txt
|
71
|
+
$ wheeler phrases tmp/dictionary.txt 5 > tmp/phrases-unsorted.txt
|
72
|
+
|
73
|
+
# Sort the mapped phrases
|
74
|
+
$ sort tmp/phrases-unsorted.txt > tmp/phrases-sorted.txt
|
75
|
+
|
76
|
+
# Reduce to counts
|
77
|
+
$ wheeler reduce tmp/phrases-sorted.txt
|
78
|
+
|
79
|
+
|
80
|
+
## Contributing
|
81
|
+
|
82
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/ddrscott/wheeler.
|
83
|
+
|
84
|
+
|
85
|
+
## License
|
86
|
+
|
87
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
88
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "wheeler"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "pry"
|
14
|
+
Pry.start
|
data/bin/setup
ADDED
data/bin/wheeler
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'wheeler'
|
5
|
+
require 'thor'
|
6
|
+
|
7
|
+
class WheelerCLI < Thor
|
8
|
+
include Wheeler
|
9
|
+
|
10
|
+
desc 'phrases SRC NUM_WORDS', 'split SRC into phrases of NUM_WORDS'
|
11
|
+
def phrases(src, words_in_phrase)
|
12
|
+
File.open(src, 'r') do |f|
|
13
|
+
map_phrases(f, words_in_phrase.to_i)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'index SRC built by phrases', 'places uniq phrases into .index directory based on word sizes'
|
18
|
+
def reduce(src)
|
19
|
+
File.open(src, 'r') do |f|
|
20
|
+
reduce_fs(f)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'guess PUZZLE', 'guess PUZZLE. PUZZLE should use _ (underscore) or . (dot) for unknown letters'
|
25
|
+
def guess(puzzle)
|
26
|
+
super(puzzle)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
WheelerCLI.start(ARGV)
|
data/lib/wheeler.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'wheeler/version'
|
2
|
+
|
3
|
+
module Wheeler
|
4
|
+
|
5
|
+
INDEX_PATH = '.index'
|
6
|
+
|
7
|
+
def each_word(io, &block)
|
8
|
+
# break on spaces instead of \n
|
9
|
+
io.each_line(' ') do |line|
|
10
|
+
# alphas with quote and period. We'll use the period as a hint for phrasing
|
11
|
+
line.scan(/\b([[:word:]]+)\b/).each do |word, _|
|
12
|
+
block.call word.upcase
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def each_phrase(io, max_words=4, &block)
|
18
|
+
words = []
|
19
|
+
each_word(io) do |word|
|
20
|
+
words << word
|
21
|
+
if word[-1] == '.'
|
22
|
+
# remove period
|
23
|
+
words[-1] = words[-1][0..-2]
|
24
|
+
|
25
|
+
cascade_words(words, &block)
|
26
|
+
|
27
|
+
# we can start a new phrase by resetting the words
|
28
|
+
words.clear
|
29
|
+
elsif words.length >= max_words
|
30
|
+
cascade_words(words, &block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
cascade_words(words, &block)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cascade_words(words, &block)
|
37
|
+
len = words.size
|
38
|
+
len.times do |i|
|
39
|
+
block.call words[0..i]
|
40
|
+
end
|
41
|
+
words.shift
|
42
|
+
end
|
43
|
+
|
44
|
+
def word_sizes(words)
|
45
|
+
words.map{|m| m.size } * ' '
|
46
|
+
end
|
47
|
+
|
48
|
+
def remove_last_punctuation_if_needed(words)
|
49
|
+
last_char = words.last[-1]
|
50
|
+
if last_char == ',' or last_char == ';'
|
51
|
+
words[-1] = words[0..-2]
|
52
|
+
end
|
53
|
+
words
|
54
|
+
end
|
55
|
+
|
56
|
+
def map_phrases(io, max_words)
|
57
|
+
each_phrase(io, max_words) do |words|
|
58
|
+
row = [
|
59
|
+
word_sizes(words),
|
60
|
+
words * ' '
|
61
|
+
]
|
62
|
+
|
63
|
+
puts row * '|'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# ensure the block is called at most once per duration
|
68
|
+
def throttle(duration=0.1, &block)
|
69
|
+
@last_time ||= Time.now
|
70
|
+
if @last_time and (Time.now - duration) > @last_time
|
71
|
+
block.call
|
72
|
+
@last_time = Time.now
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def reduce_fs(io)
|
77
|
+
last_sizes = nil
|
78
|
+
texts = []
|
79
|
+
|
80
|
+
io.each_line do |line|
|
81
|
+
|
82
|
+
sizes, text = *line.split('|')
|
83
|
+
|
84
|
+
# write out when the size of the words changes
|
85
|
+
if last_sizes and sizes != last_sizes
|
86
|
+
write_sizes(last_sizes, texts)
|
87
|
+
texts.clear
|
88
|
+
end
|
89
|
+
|
90
|
+
text = text[0..-2] # strip new line
|
91
|
+
throttle{$stderr.print "\e[0K#{sizes}|#{text}\r"} # some debug output
|
92
|
+
if texts.last != text
|
93
|
+
texts << text # text that matches the word size pattern
|
94
|
+
end
|
95
|
+
|
96
|
+
last_sizes = sizes
|
97
|
+
end
|
98
|
+
|
99
|
+
if texts.any? # make sure we write out the remaining phrases
|
100
|
+
write_sizes(last_sizes, texts)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def write_sizes(sizes, texts)
|
105
|
+
idx_path = "#{INDEX_PATH}/#{sizes.gsub(' ', '/')}"
|
106
|
+
FileUtils.mkdir_p idx_path
|
107
|
+
File.open("#{idx_path}/phrases", 'w') { |f| f << texts.join("\n") }
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param puzzle [String] Known letters in their position and `_` underscore the unknown letters
|
111
|
+
def guess(puzzle)
|
112
|
+
# split up into words and replace _ with dot
|
113
|
+
words = puzzle.split(/\s+/).map{|w| w.gsub('_', '.').upcase}
|
114
|
+
|
115
|
+
# Example: "_ ____ ____" should constuct `.index/1/4/4/phrases`
|
116
|
+
phrase_path = "#{INDEX_PATH}/#{words.map(&:size) * '/'}/phrases"
|
117
|
+
|
118
|
+
cmd = "grep --color=always -e '#{words * ' '}' #{phrase_path}"
|
119
|
+
puts cmd
|
120
|
+
puts `#{cmd}`
|
121
|
+
if $?.exitstatus == 1
|
122
|
+
puts "Phrases not found in #{phrase_path}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|