correct-horse-battery-staple 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +1 -1
- data/.gemtest +0 -0
- data/Gemfile +53 -0
- data/Gemfile.lock +109 -0
- data/History.txt +6 -0
- data/Manifest.txt +57 -0
- data/README.txt +115 -0
- data/Rakefile +47 -0
- data/bin/chbs +234 -0
- data/bin/chbs-mkpass +16 -0
- data/correct-horse-battery-staple.gemspec +59 -0
- data/lib/correct_horse_battery_staple.rb +117 -0
- data/lib/correct_horse_battery_staple/assembler.rb +45 -0
- data/lib/correct_horse_battery_staple/backend.rb +6 -0
- data/lib/correct_horse_battery_staple/backend/isam_kd.rb +410 -0
- data/lib/correct_horse_battery_staple/backend/redis.rb +95 -0
- data/lib/correct_horse_battery_staple/backend/redis/d_range.rb +105 -0
- data/lib/correct_horse_battery_staple/corpus.rb +33 -0
- data/lib/correct_horse_battery_staple/corpus/base.rb +278 -0
- data/lib/correct_horse_battery_staple/corpus/isam.rb +258 -0
- data/lib/correct_horse_battery_staple/corpus/isam_kd.rb +60 -0
- data/lib/correct_horse_battery_staple/corpus/redis.rb +188 -0
- data/lib/correct_horse_battery_staple/corpus/redis2.rb +88 -0
- data/lib/correct_horse_battery_staple/corpus/serialized.rb +121 -0
- data/lib/correct_horse_battery_staple/corpus/sqlite.rb +266 -0
- data/lib/correct_horse_battery_staple/generator.rb +40 -0
- data/lib/correct_horse_battery_staple/memoize.rb +25 -0
- data/lib/correct_horse_battery_staple/parser.rb +5 -0
- data/lib/correct_horse_battery_staple/parser/base.rb +5 -0
- data/lib/correct_horse_battery_staple/parser/regex.rb +58 -0
- data/lib/correct_horse_battery_staple/range_parser.rb +29 -0
- data/lib/correct_horse_battery_staple/statistical_array.rb +74 -0
- data/lib/correct_horse_battery_staple/stats.rb +22 -0
- data/lib/correct_horse_battery_staple/word.rb +90 -0
- data/lib/correct_horse_battery_staple/writer.rb +29 -0
- data/lib/correct_horse_battery_staple/writer/base.rb +22 -0
- data/lib/correct_horse_battery_staple/writer/csv.rb +15 -0
- data/lib/correct_horse_battery_staple/writer/file.rb +54 -0
- data/lib/correct_horse_battery_staple/writer/isam.rb +50 -0
- data/lib/correct_horse_battery_staple/writer/isam_kd.rb +12 -0
- data/lib/correct_horse_battery_staple/writer/json.rb +19 -0
- data/lib/correct_horse_battery_staple/writer/marshal.rb +10 -0
- data/lib/correct_horse_battery_staple/writer/redis.rb +41 -0
- data/lib/correct_horse_battery_staple/writer/sqlite.rb +115 -0
- data/script/generate_all +34 -0
- data/script/load_redis +17 -0
- data/script/perftest +74 -0
- data/spec/corpus/serialized_spec.rb +62 -0
- data/spec/corpus_spec.rb +50 -0
- data/spec/correct_horse_battery_staple_spec.rb +73 -0
- data/spec/fixtures/100.json +101 -0
- data/spec/fixtures/corpus1.csv +101 -0
- data/spec/fixtures/corpus100.json +101 -0
- data/spec/fixtures/wiktionary1000.htm +648 -0
- data/spec/range_parser_spec.rb +54 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/statistical_array_spec.rb +52 -0
- data/spec/support/spec_pry.rb +1 -0
- data/spec/word_spec.rb +95 -0
- metadata +264 -0
- metadata.gz.sig +1 -0
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'redis'
|
2
|
+
if ! Object.const_defined?("JRUBY_VERSION")
|
3
|
+
require 'redis/connection/hiredis'
|
4
|
+
end
|
5
|
+
require 'securerandom'
|
6
|
+
|
7
|
+
module CorrectHorseBatteryStaple::Backend::Redis
|
8
|
+
|
9
|
+
def self.included(base)
|
10
|
+
base.extend ClassMethods
|
11
|
+
base.send :include, InstanceMethods
|
12
|
+
end
|
13
|
+
|
14
|
+
module ClassMethods
|
15
|
+
end
|
16
|
+
|
17
|
+
module InstanceMethods
|
18
|
+
def parse_uri(dest)
|
19
|
+
(dbname, host, port) = dest.gsub(/\.redis[0-9]?/, '').split(':')
|
20
|
+
options[:dbname] ||= (dbname || "chbs")
|
21
|
+
options[:host] ||= (host || "127.0.0.1")
|
22
|
+
options[:port] ||= (port || 6379).to_i
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_word(w, wid=nil)
|
26
|
+
percentile = [0, w.percentile].max
|
27
|
+
|
28
|
+
wid = get_new_word_id if wid.nil?
|
29
|
+
|
30
|
+
db.zadd(@words_key, wid, w.word)
|
31
|
+
db.zadd(@percentile_key, percentile, wid)
|
32
|
+
# db.zadd(@frequency_key, w.frequency, wid)
|
33
|
+
db.zadd(@lenprod_key, w.word.length + (percentile / 100.0), wid)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Note that this does NOT work inside a multi/exec
|
38
|
+
#
|
39
|
+
def get_new_word_id
|
40
|
+
db.incr(@id_key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_word_by_id(wid)
|
44
|
+
db.zrangebyscore(@words_key, wid, wid, :limit => [0,1])[0] rescue nil
|
45
|
+
end
|
46
|
+
|
47
|
+
def load_stats
|
48
|
+
#noinspection RubyHashKeysTypesInspection
|
49
|
+
load_stats_from_hash Hash[db.hgetall(@stats_key).map {|k,v| [k, v.to_f]}]
|
50
|
+
end
|
51
|
+
|
52
|
+
def save_stats(stats)
|
53
|
+
db.hmset @stats_key, *stats.to_a.flatten
|
54
|
+
end
|
55
|
+
|
56
|
+
def create_database
|
57
|
+
db.del(@length_key, @percentile_key, @frequency_key, @lenprod_key, @stats_key,
|
58
|
+
@words_key, @id_key)
|
59
|
+
end
|
60
|
+
|
61
|
+
def open_database
|
62
|
+
@db ||= begin
|
63
|
+
@gensym_id = 0
|
64
|
+
@length_key = make_key("length_zset")
|
65
|
+
@percentile_key = make_key("percentile_zset")
|
66
|
+
@frequency_key = make_key("frequency_zset")
|
67
|
+
# scores given by w.word.length + w.percentile/100.0
|
68
|
+
@lenprod_key = make_key("lenprod_zset")
|
69
|
+
@stats_key = make_key("stats_hash")
|
70
|
+
@words_key = make_key("words_zset")
|
71
|
+
@id_key = make_key("word_id_counter")
|
72
|
+
::Redis.new(:host => options[:host], :port => options[:port])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def db
|
77
|
+
@db || open_database
|
78
|
+
end
|
79
|
+
|
80
|
+
def close_database
|
81
|
+
end
|
82
|
+
|
83
|
+
def make_key(name)
|
84
|
+
"chbs_#{options[:dbname]}_#{name}"
|
85
|
+
end
|
86
|
+
|
87
|
+
def gensym_temp
|
88
|
+
@_gensym_id ||= 0
|
89
|
+
make_key("TEMP_#{Process.pid}_#{@gensym_id += 1}")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
autoload :DRange, 'correct_horse_battery_staple/backend/redis/d_range.rb'
|
94
|
+
end
|
95
|
+
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#
|
2
|
+
# Represents a list of items corresponding to a square area of items
|
3
|
+
# formed by a range on axis 1 and a range on axis 2 of numbers
|
4
|
+
# associated with an item. In other words, this composes two
|
5
|
+
# different data about an item into a single score in a Redis sorted
|
6
|
+
# set, and allow that area to be treated as a single logical ordered
|
7
|
+
# list of items.
|
8
|
+
#
|
9
|
+
# This is used to construct a single score out of a word's length
|
10
|
+
# and percentile ranking. The length is the "outer" score and
|
11
|
+
# ranges (generally) from 3..18 or thereabouts in integral steps.
|
12
|
+
# Percentiles exist as fractional parts of the score added to the
|
13
|
+
# base word length. So, to address the items in a sorted set
|
14
|
+
# with the word length from 5..8 and percentile range 20..30,
|
15
|
+
# you would (in the Writer::Redis class) generate a Sorted set
|
16
|
+
# in which every word has a score with an integer and fractional
|
17
|
+
# part. The word "the" which appeared in the 95th percentile would
|
18
|
+
# have a score of 3.95.
|
19
|
+
#
|
20
|
+
# Once defined, this class allows the following operations:
|
21
|
+
#
|
22
|
+
# - counting the total # of items in the 2d bounding box
|
23
|
+
# - picking the nth item from the (virtual) sorted list
|
24
|
+
#
|
25
|
+
#
|
26
|
+
|
27
|
+
class CorrectHorseBatteryStaple::Backend::Redis::DRange
|
28
|
+
include CorrectHorseBatteryStaple::Memoize
|
29
|
+
def initialize(db, key, outer, inner, divisor=100)
|
30
|
+
@db = db
|
31
|
+
@key = key
|
32
|
+
@outer = outer
|
33
|
+
@inner = inner
|
34
|
+
@divisor = divisor
|
35
|
+
@counts = {}
|
36
|
+
end
|
37
|
+
|
38
|
+
def dump
|
39
|
+
iterate_ranges do |min, max|
|
40
|
+
cnt = @db.zcount(@key, min, max)
|
41
|
+
[min, max, cnt]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def count
|
46
|
+
precache_counts
|
47
|
+
@counts.values.reduce(:+)
|
48
|
+
end
|
49
|
+
memoize :count
|
50
|
+
|
51
|
+
def pick_nth(n)
|
52
|
+
precache_counts
|
53
|
+
return nil if n > count-1
|
54
|
+
|
55
|
+
pos = 0
|
56
|
+
@outer.each do |base|
|
57
|
+
cib = count_in_base(base)
|
58
|
+
minpos = pos
|
59
|
+
maxpos = pos + cib
|
60
|
+
if cib > 0 && n >= minpos && n <= maxpos
|
61
|
+
(min, max) = minmax_for_base(base)
|
62
|
+
return @db.zrangebyscore(@key, min, max,
|
63
|
+
:limit => [n-pos, 1])[0]
|
64
|
+
end
|
65
|
+
pos += cib
|
66
|
+
end
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
protected
|
71
|
+
|
72
|
+
def precache_counts
|
73
|
+
return if @precached_counts
|
74
|
+
counts = @db.multi do
|
75
|
+
@outer.each do |base|
|
76
|
+
zcount(*minmax_for_base(base))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
#noinspection RubyHashKeysTypesInspection
|
80
|
+
@counts = Hash[@outer.to_a.zip(counts)]
|
81
|
+
@precached_counts = true
|
82
|
+
@counts
|
83
|
+
end
|
84
|
+
|
85
|
+
def count_in_base(b)
|
86
|
+
@counts[b] ||= zcount(*minmax_for_base(b))
|
87
|
+
end
|
88
|
+
|
89
|
+
def minmax_for_base(base)
|
90
|
+
[base + @inner.begin / (@divisor.to_f),
|
91
|
+
base + @inner.end / (@divisor.to_f)]
|
92
|
+
end
|
93
|
+
|
94
|
+
def zcount(min, max)
|
95
|
+
@db.zcount(@key, min, max)
|
96
|
+
end
|
97
|
+
|
98
|
+
def iterate_ranges
|
99
|
+
@outer.map do |base|
|
100
|
+
(min, max) = minmax_for_base(base)
|
101
|
+
yield min, max
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
class CorrectHorseBatteryStaple::Corpus
|
3
|
+
def self.read(filename, clazz=nil)
|
4
|
+
clazz ||=
|
5
|
+
case CorrectHorseBatteryStaple::Corpus.format_for(filename)
|
6
|
+
# when 'kdtree' then CorrectHorseBatteryStaple::Corpus::KDTree
|
7
|
+
when 'isam' then CorrectHorseBatteryStaple::Corpus::Isam
|
8
|
+
when 'kdtree', 'isamkd' then CorrectHorseBatteryStaple::Corpus::IsamKD
|
9
|
+
when 'sqlite' then CorrectHorseBatteryStaple::Corpus::Sqlite
|
10
|
+
when 'redis2' then CorrectHorseBatteryStaple::Corpus::Redis2
|
11
|
+
when 'redis' then CorrectHorseBatteryStaple::Corpus::Redis
|
12
|
+
else CorrectHorseBatteryStaple::Corpus::Serialized
|
13
|
+
end
|
14
|
+
|
15
|
+
clazz.read(filename)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.format_for(spec, defval = nil)
|
19
|
+
File.extname(spec)[1..-1].downcase || defval
|
20
|
+
rescue
|
21
|
+
defval
|
22
|
+
end
|
23
|
+
|
24
|
+
autoload :Base, 'correct_horse_battery_staple/corpus/base'
|
25
|
+
autoload :Serialized, 'correct_horse_battery_staple/corpus/serialized'
|
26
|
+
autoload :Isam, 'correct_horse_battery_staple/corpus/isam'
|
27
|
+
autoload :IsamKD, 'correct_horse_battery_staple/corpus/isam_kd'
|
28
|
+
autoload :Sqlite, 'correct_horse_battery_staple/corpus/sqlite'
|
29
|
+
autoload :Redis, 'correct_horse_battery_staple/corpus/redis'
|
30
|
+
autoload :Redis2, 'correct_horse_battery_staple/corpus/redis2'
|
31
|
+
# autoload :KDTree, 'correct_horse_battery_staple/corpus/kdtree'
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,278 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
# require 'securerandom'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
class CorrectHorseBatteryStaple::Corpus::Base < CorrectHorseBatteryStaple::Corpus
|
6
|
+
extend Forwardable
|
7
|
+
|
8
|
+
attr_accessor :frequency_mean, :frequency_stddev
|
9
|
+
attr_accessor :probability_mean, :probability_stddev
|
10
|
+
attr_accessor :original_size
|
11
|
+
attr_accessor :weighted_size
|
12
|
+
|
13
|
+
include CorrectHorseBatteryStaple::Common
|
14
|
+
include CorrectHorseBatteryStaple::Memoize
|
15
|
+
include Enumerable
|
16
|
+
|
17
|
+
def initialize(*args)
|
18
|
+
initialize_backend_variables if respond_to?(:initialize_backend_variables)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.read(dest)
|
22
|
+
self.new dest
|
23
|
+
end
|
24
|
+
|
25
|
+
# you MUST override this method for Enumerable to use
|
26
|
+
|
27
|
+
def each(&block)
|
28
|
+
raise NotImplementedError
|
29
|
+
end
|
30
|
+
|
31
|
+
# other methods you should implement if possible:
|
32
|
+
#
|
33
|
+
# Enumerable
|
34
|
+
# size
|
35
|
+
#
|
36
|
+
# CHBS::Corpus
|
37
|
+
# pick
|
38
|
+
# words
|
39
|
+
# frequencies
|
40
|
+
#
|
41
|
+
|
42
|
+
|
43
|
+
def sorted_entries
|
44
|
+
entries.sort
|
45
|
+
end
|
46
|
+
|
47
|
+
# return all the candidates for a given set of options
|
48
|
+
def candidates(options = {})
|
49
|
+
return size if !options || options.empty?
|
50
|
+
filter = filter_for_options(options)
|
51
|
+
return size unless filter
|
52
|
+
entries.select {|entry| filter.call(entry) }
|
53
|
+
end
|
54
|
+
|
55
|
+
def count_candidates(options = {})
|
56
|
+
return size if !options || options.empty?
|
57
|
+
filter = filter_for_options(options)
|
58
|
+
return size unless filter
|
59
|
+
|
60
|
+
count = 0
|
61
|
+
each do |entry|
|
62
|
+
count += 1 if filter.call(entry)
|
63
|
+
end
|
64
|
+
count
|
65
|
+
end
|
66
|
+
memoize :count_candidates
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
#
|
71
|
+
# this is the core password picker method. it is not especially
|
72
|
+
# efficient but it is relatively generic. If a corpus supports
|
73
|
+
# Enumerable, it will work.
|
74
|
+
#
|
75
|
+
def pick(count, options = {})
|
76
|
+
array = CorrectHorseBatteryStaple::StatisticalArray.new(sorted_entries)
|
77
|
+
|
78
|
+
filters = Array(options[:filter])
|
79
|
+
|
80
|
+
if options[:percentile]
|
81
|
+
range = array.index_range_for_percentile(options[:percentile])
|
82
|
+
else
|
83
|
+
range = 0..array.size-1
|
84
|
+
end
|
85
|
+
range_size = range_size(range)
|
86
|
+
|
87
|
+
if range_size < count
|
88
|
+
raise ArgumentError, "Percentile range contains fewer words than requested count"
|
89
|
+
end
|
90
|
+
|
91
|
+
if options[:word_length]
|
92
|
+
wl = options[:word_length]
|
93
|
+
filters << lambda {|entry| wl.include? entry.word.length }
|
94
|
+
end
|
95
|
+
|
96
|
+
filter = filters.empty? ? nil : compose_filters(filters)
|
97
|
+
|
98
|
+
max_iterations = options[:max_iterations] || 1000
|
99
|
+
|
100
|
+
result = []
|
101
|
+
iterations = 0
|
102
|
+
while result.length < count && iterations < max_iterations
|
103
|
+
i = random_number(range_size)
|
104
|
+
entry = array[i + range.first]
|
105
|
+
if entry && (!filter || filter.call(entry))
|
106
|
+
result << entry
|
107
|
+
end
|
108
|
+
iterations += 1
|
109
|
+
end
|
110
|
+
|
111
|
+
raise "Cannot find #{count} words matching criteria" if result.length < count
|
112
|
+
result
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
def words
|
118
|
+
execute_filters.map {|entry| entry.word }
|
119
|
+
end
|
120
|
+
memoize :words
|
121
|
+
|
122
|
+
# no-op for serialized forms
|
123
|
+
def precache(max=0)
|
124
|
+
end
|
125
|
+
|
126
|
+
def frequencies
|
127
|
+
CorrectHorseBatteryStaple::StatisticalArray.new(entries.map {|entry| entry.frequency })
|
128
|
+
end
|
129
|
+
memoize :frequencies
|
130
|
+
|
131
|
+
def entropy_per_word
|
132
|
+
Math.log(count) / Math.log(2)
|
133
|
+
end
|
134
|
+
|
135
|
+
# filtering
|
136
|
+
|
137
|
+
def filter(&block)
|
138
|
+
(@filters ||= []) << block
|
139
|
+
self
|
140
|
+
end
|
141
|
+
|
142
|
+
def reset
|
143
|
+
@filters = []
|
144
|
+
end
|
145
|
+
|
146
|
+
# create a single composed function of all the filters
|
147
|
+
def compose_filters(filters)
|
148
|
+
return nil if !filters || filters.empty?
|
149
|
+
filters.reduce do |prev, current|
|
150
|
+
lambda {|value| prev.call(value) && current.call(value) }
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def result
|
155
|
+
return self if @filters.empty?
|
156
|
+
|
157
|
+
self.class.new(execute_filters).tap do |new_corpus|
|
158
|
+
new_corpus.original_size = self.original_size
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
## statistics
|
164
|
+
|
165
|
+
def load_stats_from_hash(hash)
|
166
|
+
hash.each do |k,v|
|
167
|
+
setter = "#{k}=".to_sym
|
168
|
+
send setter, v if respond_to?(setter)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def recalculate
|
173
|
+
size = self.size
|
174
|
+
frequencies = self.frequencies
|
175
|
+
|
176
|
+
# corpus-wide statistics
|
177
|
+
self.weighted_size = frequencies.reduce(BigDecimal.new("0"), :+)
|
178
|
+
(self.probability_mean, self.probability_stddev) =
|
179
|
+
CorrectHorseBatteryStaple::StatisticalArray.new(frequencies.map do |freq|
|
180
|
+
(freq/weighted_size) * 100
|
181
|
+
end).mean_and_standard_deviation
|
182
|
+
|
183
|
+
(self.frequency_mean, self.frequency_stddev) = frequencies.mean_and_standard_deviation
|
184
|
+
|
185
|
+
# stats = corpus.stats
|
186
|
+
# size = corpus.size
|
187
|
+
# frequency_mean = corpus.frequency_mean
|
188
|
+
# frequency_stddev = corpus.frequency_stddev
|
189
|
+
# weighted_size = corpus.weighted_size
|
190
|
+
# probability_mean = corpus.probability_mean
|
191
|
+
# probability_stddev = corpus.probability_stddev
|
192
|
+
|
193
|
+
each_with_index do |entry, index|
|
194
|
+
entry.rank = size - index
|
195
|
+
entry.distance = (entry.frequency-frequency_mean)/frequency_stddev
|
196
|
+
entry.probability = entry.frequency / weighted_size
|
197
|
+
entry.distance_probability = (entry.probability - probability_mean) / probability_stddev
|
198
|
+
entry.percentile = (index-0.5)/size * 100
|
199
|
+
end
|
200
|
+
|
201
|
+
self
|
202
|
+
end
|
203
|
+
|
204
|
+
def stats
|
205
|
+
{:frequency_mean => frequency_mean, :frequency_stddev => frequency_stddev,
|
206
|
+
:probability_mean => probability_mean, :probability_stddev => probability_stddev,
|
207
|
+
:size => count, :original_size => original_size,
|
208
|
+
:weighted_size => weighted_size.to_f}
|
209
|
+
end
|
210
|
+
|
211
|
+
def inspect
|
212
|
+
<<INSPECT
|
213
|
+
Type: #{self.class.name}
|
214
|
+
Entry count: #{count}
|
215
|
+
|
216
|
+
Stats:
|
217
|
+
#{stats.map {|k,v| " #{k}: #{v}\n" }.join("") }
|
218
|
+
INSPECT
|
219
|
+
end
|
220
|
+
|
221
|
+
alias :length :count
|
222
|
+
|
223
|
+
|
224
|
+
protected
|
225
|
+
|
226
|
+
#
|
227
|
+
# Return the number of distinct objects within the Range.
|
228
|
+
# This assumes plain vanilla ranges, though it does respect .. vs ...
|
229
|
+
#
|
230
|
+
# Why? Range#count is basically #to_a.count, which is INSANE
|
231
|
+
#
|
232
|
+
def range_count(r)
|
233
|
+
(r.last - r.first +
|
234
|
+
(r.exclude_end? ? 0 : (r.first > r.last ? -1 : 1))
|
235
|
+
).abs
|
236
|
+
end
|
237
|
+
alias :range_size :range_count
|
238
|
+
|
239
|
+
#
|
240
|
+
# Given a filter, return all Word objects in this Corpus that the
|
241
|
+
# filter accepts.
|
242
|
+
#
|
243
|
+
# this is an exceptionally inefficient version
|
244
|
+
def execute_filters
|
245
|
+
return entries if @filters.nil? || @filters.empty?
|
246
|
+
entries.select(&compose_filters(@filters))
|
247
|
+
ensure
|
248
|
+
reset
|
249
|
+
end
|
250
|
+
|
251
|
+
#
|
252
|
+
# Return a single lambda that will return true/false given a Word object
|
253
|
+
#
|
254
|
+
# Respects the :word_length, :percentile, and :filter options
|
255
|
+
# :word_length and :percentile should be Range objects
|
256
|
+
# :filter can be a single Proc/lambda or an array of them
|
257
|
+
#
|
258
|
+
def filter_for_options(options = {})
|
259
|
+
return nil if !options || options.empty?
|
260
|
+
|
261
|
+
filters = Array(options[:filter])
|
262
|
+
if options[:percentile]
|
263
|
+
p_range = options[:percentile]
|
264
|
+
filters << lambda {|entry| p_range.include? entry.percentile }
|
265
|
+
end
|
266
|
+
|
267
|
+
if options[:word_length]
|
268
|
+
wl_range = options[:word_length]
|
269
|
+
filters << lambda {|entry| wl_range.include? entry.word.length }
|
270
|
+
end
|
271
|
+
|
272
|
+
filters.empty? ? nil : compose_filters(filters)
|
273
|
+
end
|
274
|
+
memoize :filter_for_options
|
275
|
+
|
276
|
+
end
|
277
|
+
|
278
|
+
# Random.srand(SecureRandom.random_number)
|