hiccup 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.mdown +0 -0
- data/hiccup.gemspec +0 -0
- data/lib/hiccup/enumerable.rb +21 -0
- data/lib/hiccup/inferable.rb +4 -284
- data/lib/hiccup/inferable/dates_enumerator.rb +30 -0
- data/lib/hiccup/inferable/guesser.rb +180 -0
- data/lib/hiccup/inferable/score.rb +38 -0
- data/lib/hiccup/inferable/scorer.rb +67 -0
- data/lib/hiccup/schedule.rb +1 -1
- data/lib/hiccup/version.rb +1 -1
- data/test/enumerable_test.rb +1 -2
- data/test/inferrable_test.rb +7 -1
- data/test/performance_test.rb +35 -0
- metadata +23 -39
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e36feec08a5a3bec9f239244d7da51952930cae4
|
4
|
+
data.tar.gz: f9a5e21e9a5e4ef0f38b0ce49f138ad8bea0963d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 068192d3b01e740acb6b356a9e4dadeb980c6f1c22bc68406d3480441f54f8a0239d942fa617c8e7e02032be81b74d921f1c3c0b99181687f6f9be310ee414df
|
7
|
+
data.tar.gz: 4433629beecf17aa82e1355413bce1546c2d4fcb0febd481e8689337131492fc702d0bd2b017b32fff744308a87c1e3c571d83402463875eaf7c3d4e887efd46
|
data/README.mdown
CHANGED
File without changes
|
data/hiccup.gemspec
CHANGED
File without changes
|
data/lib/hiccup/enumerable.rb
CHANGED
@@ -84,5 +84,26 @@ module Hiccup
|
|
84
84
|
|
85
85
|
|
86
86
|
|
87
|
+
def first_n_occurrences(limit)
|
88
|
+
n_occurrences_on_or_after(limit, start_date)
|
89
|
+
end
|
90
|
+
|
91
|
+
def n_occurrences_after(limit, date)
|
92
|
+
n_occurrences_on_or_after(limit, date.to_date + 1)
|
93
|
+
end
|
94
|
+
|
95
|
+
def n_occurrences_on_or_after(limit, date)
|
96
|
+
return [] if ends? and date > end_date
|
97
|
+
|
98
|
+
occurrences = []
|
99
|
+
enum = enumerator.new(self, date)
|
100
|
+
while (occurrence = enum.next) && occurrences.length < limit
|
101
|
+
occurrences << occurrence
|
102
|
+
end
|
103
|
+
occurrences
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
|
87
108
|
end
|
88
109
|
end
|
data/lib/hiccup/inferable.rb
CHANGED
@@ -3,6 +3,9 @@ require 'active_support/core_ext/date/conversions'
|
|
3
3
|
require 'hiccup/core_ext/enumerable'
|
4
4
|
require 'hiccup/core_ext/hash'
|
5
5
|
require "hiccup/core_ext/date"
|
6
|
+
require 'hiccup/inferable/dates_enumerator'
|
7
|
+
require 'hiccup/inferable/guesser'
|
8
|
+
require 'hiccup/inferable/score'
|
6
9
|
|
7
10
|
|
8
11
|
module Hiccup
|
@@ -17,7 +20,7 @@ module Hiccup
|
|
17
20
|
|
18
21
|
dates = extract_array_of_dates!(dates)
|
19
22
|
enumerator = DatesEnumerator.new(dates)
|
20
|
-
guesser = Guesser.new(self,
|
23
|
+
guesser = options.fetch :guesser, Guesser.new(self, options.merge(verbose: verbosity >= 2))
|
21
24
|
schedules = []
|
22
25
|
|
23
26
|
confidences = []
|
@@ -116,288 +119,5 @@ module Hiccup
|
|
116
119
|
|
117
120
|
|
118
121
|
end
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
class Guesser
|
123
|
-
|
124
|
-
def initialize(klass, options={})
|
125
|
-
@klass = klass
|
126
|
-
@verbose = options.fetch(:verbose, false)
|
127
|
-
start!
|
128
|
-
end
|
129
|
-
|
130
|
-
attr_reader :confidence, :schedule, :dates
|
131
|
-
|
132
|
-
def start!
|
133
|
-
@dates = []
|
134
|
-
@schedule = nil
|
135
|
-
@confidence = 0
|
136
|
-
end
|
137
|
-
alias :restart! :start!
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
def <<(date)
|
143
|
-
@dates << date
|
144
|
-
@schedule, @confidence = best_schedule_for(@dates)
|
145
|
-
date
|
146
|
-
end
|
147
|
-
|
148
|
-
def count
|
149
|
-
@dates.length
|
150
|
-
end
|
151
|
-
|
152
|
-
def predicted?(date)
|
153
|
-
@schedule && @schedule.contains?(date)
|
154
|
-
end
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
def best_schedule_for(dates)
|
159
|
-
guesses = generate_guesses(dates)
|
160
|
-
pick_best_guess(guesses, dates)
|
161
|
-
end
|
162
|
-
|
163
|
-
def generate_guesses(dates)
|
164
|
-
@start_date = dates.first
|
165
|
-
@end_date = dates.last
|
166
|
-
[].tap do |guesses|
|
167
|
-
guesses.concat generate_yearly_guesses(dates)
|
168
|
-
guesses.concat generate_monthly_guesses(dates)
|
169
|
-
guesses.concat generate_weekly_guesses(dates)
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
def generate_yearly_guesses(dates)
|
174
|
-
histogram_of_patterns = dates.to_histogram do |date|
|
175
|
-
[date.month, date.day]
|
176
|
-
end
|
177
|
-
patterns_by_popularity = histogram_of_patterns.flip # => {1 => [...], 2 => [...], 5 => [a, b]}
|
178
|
-
highest_popularity = patterns_by_popularity.keys.max # => 5
|
179
|
-
most_popular = patterns_by_popularity[highest_popularity].first # => a
|
180
|
-
start_date = Date.new(@start_date.year, *most_popular)
|
181
|
-
|
182
|
-
[].tap do |guesses|
|
183
|
-
(1...5).each do |skip|
|
184
|
-
guesses << @klass.new.tap do |schedule|
|
185
|
-
schedule.kind = :annually
|
186
|
-
schedule.start_date = start_date
|
187
|
-
schedule.end_date = @end_date
|
188
|
-
schedule.skip = skip
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
def generate_monthly_guesses(dates)
|
195
|
-
histogram_of_patterns = dates.to_histogram do |date|
|
196
|
-
[date.get_nth_wday_of_month, Date::DAYNAMES[date.wday]]
|
197
|
-
end
|
198
|
-
patterns_by_popularity = histogram_of_patterns.flip
|
199
|
-
|
200
|
-
histogram_of_days = dates.to_histogram(&:day)
|
201
|
-
days_by_popularity = histogram_of_days.flip
|
202
|
-
|
203
|
-
if @verbose
|
204
|
-
puts "",
|
205
|
-
" monthly analysis:",
|
206
|
-
" input: #{dates.inspect}",
|
207
|
-
" histogram (weekday): #{histogram_of_patterns.inspect}",
|
208
|
-
" by_popularity (weekday): #{patterns_by_popularity.inspect}",
|
209
|
-
" histogram (day): #{histogram_of_days.inspect}",
|
210
|
-
" by_popularity (day): #{days_by_popularity.inspect}"
|
211
|
-
end
|
212
|
-
|
213
|
-
[].tap do |guesses|
|
214
|
-
(1...5).each do |skip|
|
215
|
-
enumerate_by_popularity(days_by_popularity) do |days|
|
216
|
-
guesses << @klass.new.tap do |schedule|
|
217
|
-
schedule.kind = :monthly
|
218
|
-
schedule.start_date = @start_date
|
219
|
-
schedule.end_date = @end_date
|
220
|
-
schedule.skip = skip
|
221
|
-
schedule.monthly_pattern = days
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
enumerate_by_popularity(patterns_by_popularity) do |patterns|
|
226
|
-
guesses << @klass.new.tap do |schedule|
|
227
|
-
schedule.kind = :monthly
|
228
|
-
schedule.start_date = @start_date
|
229
|
-
schedule.end_date = @end_date
|
230
|
-
schedule.skip = skip
|
231
|
-
schedule.monthly_pattern = patterns
|
232
|
-
end
|
233
|
-
end
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
|
-
def generate_weekly_guesses(dates)
|
239
|
-
[].tap do |guesses|
|
240
|
-
histogram_of_wdays = dates.to_histogram do |date|
|
241
|
-
Date::DAYNAMES[date.wday]
|
242
|
-
end
|
243
|
-
wdays_by_popularity = histogram_of_wdays.flip
|
244
|
-
|
245
|
-
if @verbose
|
246
|
-
puts "",
|
247
|
-
" weekly analysis:",
|
248
|
-
" input: #{dates.inspect}",
|
249
|
-
" histogram: #{histogram_of_wdays.inspect}",
|
250
|
-
" by_popularity: #{wdays_by_popularity.inspect}"
|
251
|
-
end
|
252
|
-
|
253
|
-
(1...5).each do |skip|
|
254
|
-
enumerate_by_popularity(wdays_by_popularity) do |wdays|
|
255
|
-
guesses << @klass.new.tap do |schedule|
|
256
|
-
schedule.kind = :weekly
|
257
|
-
schedule.start_date = @start_date
|
258
|
-
schedule.end_date = @end_date
|
259
|
-
schedule.skip = skip
|
260
|
-
schedule.weekly_pattern = wdays
|
261
|
-
end
|
262
|
-
end
|
263
|
-
end
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
# Expects a hash of values grouped by popularity
|
270
|
-
# Yields the most popular values first, and then
|
271
|
-
# increasingly less popular values
|
272
|
-
def enumerate_by_popularity(values_by_popularity)
|
273
|
-
popularities = values_by_popularity.keys.sort.reverse
|
274
|
-
popularities.length.times do |i|
|
275
|
-
at_popularities = popularities.take(i + 1)
|
276
|
-
yield values_by_popularity.values_at(*at_popularities).flatten(1)
|
277
|
-
end
|
278
|
-
end
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
def pick_best_guess(guesses, dates)
|
283
|
-
scored_guesses = guesses \
|
284
|
-
.map { |guess| [guess, score_guess(guess, dates)] } \
|
285
|
-
.sort_by { |(guess, score)| -score.to_f }
|
286
|
-
|
287
|
-
if @verbose
|
288
|
-
puts "\nGUESSES FOR #{dates}:"
|
289
|
-
scored_guesses.each do |(guess, score)|
|
290
|
-
puts " (%.3f p/%.3f b/%.3f c/%.3f) #{guess.humanize}" % [
|
291
|
-
score.to_f,
|
292
|
-
score.prediction_rate,
|
293
|
-
score.brick_penalty,
|
294
|
-
score.complexity_penalty]
|
295
|
-
end
|
296
|
-
puts ""
|
297
|
-
end
|
298
|
-
|
299
|
-
scored_guesses.first
|
300
|
-
end
|
301
|
-
|
302
|
-
def score_guess(guess, input_dates)
|
303
|
-
predicted_dates = guess.occurrences_between(guess.start_date, guess.end_date)
|
304
|
-
|
305
|
-
# prediction_rate is the percent of input dates predicted
|
306
|
-
predictions = (predicted_dates & input_dates).length
|
307
|
-
prediction_rate = Float(predictions) / Float(input_dates.length)
|
308
|
-
|
309
|
-
# bricks are dates predicted by this guess but not in the input
|
310
|
-
bricks = (predicted_dates - input_dates).length
|
311
|
-
|
312
|
-
# brick_rate is the percent of bricks to predictions
|
313
|
-
# A brick_rate >= 1 means that this guess bricks more than it predicts
|
314
|
-
brick_rate = Float(bricks) / Float(input_dates.length)
|
315
|
-
|
316
|
-
# complexity measures how many rules are necesary
|
317
|
-
# to describe the pattern
|
318
|
-
complexity = complexity_of(guess)
|
319
|
-
|
320
|
-
# complexity_rate is the number of rules per inputs
|
321
|
-
complexity_rate = Float(complexity) / Float(input_dates.length)
|
322
|
-
|
323
|
-
Score.new(prediction_rate, brick_rate, complexity_rate)
|
324
|
-
end
|
325
|
-
|
326
|
-
def complexity_of(schedule)
|
327
|
-
return schedule.weekly_pattern.length if schedule.weekly?
|
328
|
-
return schedule.monthly_pattern.length if schedule.monthly?
|
329
|
-
1
|
330
|
-
end
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
class Score < Struct.new(:prediction_rate, :brick_rate, :complexity_rate)
|
335
|
-
|
336
|
-
# as brick rate rises, our confidence in this guess drops
|
337
|
-
def brick_penalty
|
338
|
-
brick_penalty = brick_rate * 0.33
|
339
|
-
brick_penalty = 1 if brick_penalty > 1
|
340
|
-
brick_penalty
|
341
|
-
end
|
342
|
-
|
343
|
-
# as the complexity rises, our confidence in this guess drops
|
344
|
-
# this hash table is a stand-in for a proper formala
|
345
|
-
#
|
346
|
-
# A complexity of 1 means that 1 rule is required per input
|
347
|
-
# date. This means we haven't really discovered a pattern.
|
348
|
-
def complexity_penalty
|
349
|
-
complexity_rate
|
350
|
-
end
|
351
|
-
|
352
|
-
# our confidence is weakened by bricks and complexity
|
353
|
-
def confidence
|
354
|
-
confidence = 1.0
|
355
|
-
confidence *= (1 - brick_penalty)
|
356
|
-
confidence *= (1 - complexity_penalty)
|
357
|
-
confidence
|
358
|
-
end
|
359
|
-
|
360
|
-
# a number between 0 and 1
|
361
|
-
def to_f
|
362
|
-
prediction_rate * confidence
|
363
|
-
end
|
364
|
-
|
365
|
-
end
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
end
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
class DatesEnumerator
|
374
|
-
|
375
|
-
def initialize(dates)
|
376
|
-
@dates = dates
|
377
|
-
@last_index = @dates.length - 1
|
378
|
-
@index = -1
|
379
|
-
end
|
380
|
-
|
381
|
-
attr_reader :index
|
382
|
-
|
383
|
-
def done?
|
384
|
-
@index == @last_index
|
385
|
-
end
|
386
|
-
|
387
|
-
def next
|
388
|
-
@index += 1
|
389
|
-
raise OutOfRangeException if @index > @last_index
|
390
|
-
@dates[@index]
|
391
|
-
end
|
392
|
-
|
393
|
-
def rewind_by(n)
|
394
|
-
@index -= n
|
395
|
-
@index = -1 if @index < -1
|
396
|
-
end
|
397
|
-
|
398
|
-
end
|
399
|
-
|
400
|
-
|
401
|
-
|
402
122
|
end
|
403
123
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Hiccup
|
2
|
+
module Inferable
|
3
|
+
class DatesEnumerator
|
4
|
+
|
5
|
+
def initialize(dates)
|
6
|
+
@dates = dates
|
7
|
+
@last_index = @dates.length - 1
|
8
|
+
@index = -1
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :index
|
12
|
+
|
13
|
+
def done?
|
14
|
+
@index == @last_index
|
15
|
+
end
|
16
|
+
|
17
|
+
def next
|
18
|
+
@index += 1
|
19
|
+
raise OutOfRangeException if @index > @last_index
|
20
|
+
@dates[@index]
|
21
|
+
end
|
22
|
+
|
23
|
+
def rewind_by(n)
|
24
|
+
@index -= n
|
25
|
+
@index = -1 if @index < -1
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'hiccup/inferable/scorer'
|
2
|
+
|
3
|
+
module Hiccup
|
4
|
+
module Inferable
|
5
|
+
class Guesser
|
6
|
+
|
7
|
+
def initialize(klass, options={})
|
8
|
+
@klass = klass
|
9
|
+
@verbose = options.fetch(:verbose, false)
|
10
|
+
@allow_skips = options.fetch(:allow_skips, true)
|
11
|
+
@max_complexity = options.fetch(:max_complexity, 3)
|
12
|
+
@scorer = options.fetch(:scorer, Scorer.new(options))
|
13
|
+
start!
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :confidence, :schedule, :dates, :scorer, :max_complexity
|
17
|
+
|
18
|
+
def allow_skips?
|
19
|
+
@allow_skips
|
20
|
+
end
|
21
|
+
|
22
|
+
def start!
|
23
|
+
@dates = []
|
24
|
+
@schedule = nil
|
25
|
+
@confidence = 0
|
26
|
+
end
|
27
|
+
alias :restart! :start!
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
def <<(date)
|
33
|
+
@dates << date
|
34
|
+
@schedule, @confidence = best_schedule_for(@dates)
|
35
|
+
date
|
36
|
+
end
|
37
|
+
|
38
|
+
def count
|
39
|
+
@dates.length
|
40
|
+
end
|
41
|
+
|
42
|
+
def predicted?(date)
|
43
|
+
@schedule && @schedule.contains?(date)
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
def best_schedule_for(dates)
|
49
|
+
guesses = generate_guesses(dates)
|
50
|
+
scorer.pick_best_guess(guesses, dates)
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_guesses(dates)
|
54
|
+
@start_date = dates.first
|
55
|
+
@end_date = dates.last
|
56
|
+
[].tap do |guesses|
|
57
|
+
guesses.concat generate_yearly_guesses(dates)
|
58
|
+
guesses.concat generate_monthly_guesses(dates)
|
59
|
+
guesses.concat generate_weekly_guesses(dates)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def generate_yearly_guesses(dates)
|
64
|
+
histogram_of_patterns = dates.to_histogram do |date|
|
65
|
+
[date.month, date.day]
|
66
|
+
end
|
67
|
+
patterns_by_popularity = histogram_of_patterns.flip # => {1 => [...], 2 => [...], 5 => [a, b]}
|
68
|
+
highest_popularity = patterns_by_popularity.keys.max # => 5
|
69
|
+
most_popular = patterns_by_popularity[highest_popularity].first # => a
|
70
|
+
start_date = Date.new(@start_date.year, *most_popular)
|
71
|
+
|
72
|
+
[].tap do |guesses|
|
73
|
+
skip_range.each do |skip|
|
74
|
+
guesses << @klass.new.tap do |schedule|
|
75
|
+
schedule.kind = :annually
|
76
|
+
schedule.start_date = start_date
|
77
|
+
schedule.end_date = @end_date
|
78
|
+
schedule.skip = skip
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def generate_monthly_guesses(dates)
|
85
|
+
histogram_of_patterns = dates.to_histogram do |date|
|
86
|
+
[date.get_nth_wday_of_month, Date::DAYNAMES[date.wday]]
|
87
|
+
end
|
88
|
+
patterns_by_popularity = histogram_of_patterns.flip
|
89
|
+
|
90
|
+
histogram_of_days = dates.to_histogram(&:day)
|
91
|
+
days_by_popularity = histogram_of_days.flip
|
92
|
+
|
93
|
+
if @verbose
|
94
|
+
puts "",
|
95
|
+
" monthly analysis:",
|
96
|
+
" input: #{dates.inspect}",
|
97
|
+
" histogram (weekday): #{histogram_of_patterns.inspect}",
|
98
|
+
" by_popularity (weekday): #{patterns_by_popularity.inspect}",
|
99
|
+
" histogram (day): #{histogram_of_days.inspect}",
|
100
|
+
" by_popularity (day): #{days_by_popularity.inspect}"
|
101
|
+
end
|
102
|
+
|
103
|
+
[].tap do |guesses|
|
104
|
+
skip_range.each do |skip|
|
105
|
+
enumerate_by_popularity(days_by_popularity) do |days|
|
106
|
+
next if days.length > max_complexity
|
107
|
+
guesses << @klass.new.tap do |schedule|
|
108
|
+
schedule.kind = :monthly
|
109
|
+
schedule.start_date = @start_date
|
110
|
+
schedule.end_date = @end_date
|
111
|
+
schedule.skip = skip
|
112
|
+
schedule.monthly_pattern = days
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
enumerate_by_popularity(patterns_by_popularity) do |patterns|
|
117
|
+
next if patterns.length > max_complexity
|
118
|
+
guesses << @klass.new.tap do |schedule|
|
119
|
+
schedule.kind = :monthly
|
120
|
+
schedule.start_date = @start_date
|
121
|
+
schedule.end_date = @end_date
|
122
|
+
schedule.skip = skip
|
123
|
+
schedule.monthly_pattern = patterns
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def generate_weekly_guesses(dates)
|
131
|
+
[].tap do |guesses|
|
132
|
+
histogram_of_wdays = dates.to_histogram do |date|
|
133
|
+
Date::DAYNAMES[date.wday]
|
134
|
+
end
|
135
|
+
wdays_by_popularity = histogram_of_wdays.flip
|
136
|
+
|
137
|
+
if @verbose
|
138
|
+
puts "",
|
139
|
+
" weekly analysis:",
|
140
|
+
" input: #{dates.inspect}",
|
141
|
+
" histogram: #{histogram_of_wdays.inspect}",
|
142
|
+
" by_popularity: #{wdays_by_popularity.inspect}"
|
143
|
+
end
|
144
|
+
|
145
|
+
skip_range.each do |skip|
|
146
|
+
enumerate_by_popularity(wdays_by_popularity) do |wdays|
|
147
|
+
next if wdays.length > max_complexity
|
148
|
+
guesses << @klass.new.tap do |schedule|
|
149
|
+
schedule.kind = :weekly
|
150
|
+
schedule.start_date = @start_date
|
151
|
+
schedule.end_date = @end_date
|
152
|
+
schedule.skip = skip
|
153
|
+
schedule.weekly_pattern = wdays
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def skip_range
|
161
|
+
return 1..1 unless allow_skips?
|
162
|
+
1...5
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
# Expects a hash of values grouped by popularity
|
168
|
+
# Yields the most popular values first, and then
|
169
|
+
# increasingly less popular values
|
170
|
+
def enumerate_by_popularity(values_by_popularity)
|
171
|
+
popularities = values_by_popularity.keys.sort.reverse
|
172
|
+
popularities.length.times do |i|
|
173
|
+
at_popularities = popularities.take(i + 1)
|
174
|
+
yield values_by_popularity.values_at(*at_popularities).flatten(1)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Hiccup
|
2
|
+
module Inferable
|
3
|
+
|
4
|
+
class Score < Struct.new(:prediction_rate, :brick_rate, :complexity_rate)
|
5
|
+
|
6
|
+
# as brick rate rises, our confidence in this guess drops
|
7
|
+
def brick_penalty
|
8
|
+
brick_penalty = brick_rate * 0.33
|
9
|
+
brick_penalty = 1 if brick_penalty > 1
|
10
|
+
brick_penalty
|
11
|
+
end
|
12
|
+
|
13
|
+
# as the complexity rises, our confidence in this guess drops
|
14
|
+
# this hash table is a stand-in for a proper formala
|
15
|
+
#
|
16
|
+
# A complexity of 1 means that 1 rule is required per input
|
17
|
+
# date. This means we haven't really discovered a pattern.
|
18
|
+
def complexity_penalty
|
19
|
+
complexity_rate
|
20
|
+
end
|
21
|
+
|
22
|
+
# our confidence is weakened by bricks and complexity
|
23
|
+
def confidence
|
24
|
+
confidence = 1.0
|
25
|
+
confidence *= (1 - brick_penalty)
|
26
|
+
confidence *= (1 - complexity_penalty)
|
27
|
+
confidence
|
28
|
+
end
|
29
|
+
|
30
|
+
# a number between 0 and 1
|
31
|
+
def to_f
|
32
|
+
prediction_rate * confidence
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Hiccup
|
2
|
+
module Inferable
|
3
|
+
class Scorer
|
4
|
+
|
5
|
+
def initialize(options={})
|
6
|
+
@verbose = options.fetch(:verbose, false)
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def pick_best_guess(guesses, dates)
|
12
|
+
scored_guesses = guesses \
|
13
|
+
.map { |guess| [guess, score_guess(guess, dates)] } \
|
14
|
+
.sort_by { |(guess, score)| -score.to_f }
|
15
|
+
|
16
|
+
if @verbose
|
17
|
+
puts "\nGUESSES FOR #{dates}:"
|
18
|
+
scored_guesses.each do |(guess, score)|
|
19
|
+
puts " (%.3f p/%.3f b/%.3f c/%.3f) #{guess.humanize}" % [
|
20
|
+
score.to_f,
|
21
|
+
score.prediction_rate,
|
22
|
+
score.brick_penalty,
|
23
|
+
score.complexity_penalty]
|
24
|
+
end
|
25
|
+
puts ""
|
26
|
+
end
|
27
|
+
|
28
|
+
scored_guesses.first
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
def score_guess(guess, input_dates)
|
34
|
+
predicted_dates = guess.occurrences_between(guess.start_date, guess.end_date)
|
35
|
+
|
36
|
+
# prediction_rate is the percent of input dates predicted
|
37
|
+
predictions = (predicted_dates & input_dates).length
|
38
|
+
prediction_rate = Float(predictions) / Float(input_dates.length)
|
39
|
+
|
40
|
+
# bricks are dates predicted by this guess but not in the input
|
41
|
+
bricks = (predicted_dates - input_dates).length
|
42
|
+
|
43
|
+
# brick_rate is the percent of bricks to predictions
|
44
|
+
# A brick_rate >= 1 means that this guess bricks more than it predicts
|
45
|
+
brick_rate = Float(bricks) / Float(input_dates.length)
|
46
|
+
|
47
|
+
# complexity measures how many rules are necesary
|
48
|
+
# to describe the pattern
|
49
|
+
complexity = complexity_of(guess)
|
50
|
+
|
51
|
+
# complexity_rate is the number of rules per inputs
|
52
|
+
complexity_rate = Float(complexity) / Float(input_dates.length)
|
53
|
+
|
54
|
+
Score.new(prediction_rate, brick_rate, complexity_rate)
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
def complexity_of(schedule)
|
60
|
+
return schedule.weekly_pattern.length if schedule.weekly?
|
61
|
+
return schedule.monthly_pattern.length if schedule.monthly?
|
62
|
+
1
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/hiccup/schedule.rb
CHANGED
data/lib/hiccup/version.rb
CHANGED
data/test/enumerable_test.rb
CHANGED
@@ -3,7 +3,6 @@ require "test_helper"
|
|
3
3
|
|
4
4
|
class EnumerableTest < ActiveSupport::TestCase
|
5
5
|
include Hiccup
|
6
|
-
PERFORMANCE_TEST = false
|
7
6
|
|
8
7
|
|
9
8
|
|
@@ -347,7 +346,7 @@ class EnumerableTest < ActiveSupport::TestCase
|
|
347
346
|
|
348
347
|
|
349
348
|
|
350
|
-
if PERFORMANCE_TEST
|
349
|
+
if ENV['PERFORMANCE_TEST']
|
351
350
|
test "performance test" do
|
352
351
|
n = 100
|
353
352
|
|
data/test/inferrable_test.rb
CHANGED
@@ -248,9 +248,15 @@ class InferableTest < ActiveSupport::TestCase
|
|
248
248
|
|
249
249
|
|
250
250
|
|
251
|
-
test "should diabolically complex schedules" do
|
251
|
+
test "should reject diabolically complex schedules by default" do
|
252
252
|
dates = %w{2012-11-06 2012-11-08 2012-11-15 2012-11-20 2012-11-27 2012-11-29 2013-02-05 2013-02-14 2013-02-21 2013-02-19 2013-02-26 2013-05-07 2013-05-09 2013-05-16 2013-05-28 2013-05-21 2013-05-30}
|
253
253
|
schedules = Schedule.infer(dates)
|
254
|
+
refute_equal ["The first Tuesday, second Thursday, third Thursday, third Tuesday, fourth Tuesday, and fifth Thursday of every third month"], schedules.map(&:humanize)
|
255
|
+
end
|
256
|
+
|
257
|
+
test "should infer diabolically complex schedules on demand" do
|
258
|
+
dates = %w{2012-11-06 2012-11-08 2012-11-15 2012-11-20 2012-11-27 2012-11-29 2013-02-05 2013-02-14 2013-02-21 2013-02-19 2013-02-26 2013-05-07 2013-05-09 2013-05-16 2013-05-28 2013-05-21 2013-05-30}
|
259
|
+
schedules = Schedule.infer(dates, max_complexity: 99)
|
254
260
|
assert_equal ["The first Tuesday, second Thursday, third Thursday, third Tuesday, fourth Tuesday, and fifth Thursday of every third month"], schedules.map(&:humanize)
|
255
261
|
end
|
256
262
|
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
require "benchmark"
|
3
|
+
|
4
|
+
class PerformanceTest < ActiveSupport::TestCase
|
5
|
+
include Hiccup
|
6
|
+
|
7
|
+
|
8
|
+
{ 100 => 50,
|
9
|
+
500 => 50,
|
10
|
+
1000 => 50 }.each do |number, expected_duration|
|
11
|
+
test "should generated guesses from #{number} dates in under #{expected_duration}ms" do
|
12
|
+
guesser = Hiccup::Inferable::Guesser.new(Hiccup::Schedule)
|
13
|
+
dates = (0...number).map { |i| Date.new(2010, 1, 1) + i.week }
|
14
|
+
duration = Benchmark.ms { guesser.generate_guesses(dates) }
|
15
|
+
# puts "\e[33m\e[1m#{number}\e[0m\e[33m dates took \e[1m%.2fms\e[0m" % duration
|
16
|
+
assert duration <= expected_duration, "It took %.2fms" % duration
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
# Inferring 500 dates still takes 10 seconds.
|
22
|
+
# It spends 7.3 of those seconds predicting dates,
|
23
|
+
# 6.9 of those predicting monthly or weekly dates.
|
24
|
+
{ 10 => 0.1.seconds,
|
25
|
+
50 => 0.5.seconds,
|
26
|
+
100 => 1.0.seconds }.each do |number, expected_duration|
|
27
|
+
test "should infer a schedule from #{number} dates in under #{expected_duration} second(s)" do
|
28
|
+
dates = (0...number).map { |i| Date.new(2010, 1, 1) + i.week }
|
29
|
+
duration = Benchmark.ms { Schedule.infer(dates, verbosity: 0) } / 1000
|
30
|
+
assert duration <= expected_duration, "It took %.2f seconds" % duration
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
end
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hiccup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
5
|
-
prerelease:
|
4
|
+
version: 0.4.4
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Bob Lail
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-09-18 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: activesupport
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ~>
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ~>
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,39 +27,34 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: builder
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: ri_cal
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - '>='
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - '>='
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: rails
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
59
|
- - ~>
|
68
60
|
- !ruby/object:Gem::Version
|
@@ -70,7 +62,6 @@ dependencies:
|
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
66
|
- - ~>
|
76
67
|
- !ruby/object:Gem::Version
|
@@ -78,49 +69,43 @@ dependencies:
|
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: turn
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- -
|
73
|
+
- - '>='
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: '0'
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- -
|
80
|
+
- - '>='
|
92
81
|
- !ruby/object:Gem::Version
|
93
82
|
version: '0'
|
94
83
|
- !ruby/object:Gem::Dependency
|
95
84
|
name: simplecov
|
96
85
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
86
|
requirements:
|
99
|
-
- -
|
87
|
+
- - '>='
|
100
88
|
- !ruby/object:Gem::Version
|
101
89
|
version: '0'
|
102
90
|
type: :development
|
103
91
|
prerelease: false
|
104
92
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
93
|
requirements:
|
107
|
-
- -
|
94
|
+
- - '>='
|
108
95
|
- !ruby/object:Gem::Version
|
109
96
|
version: '0'
|
110
97
|
- !ruby/object:Gem::Dependency
|
111
98
|
name: pry
|
112
99
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - '>='
|
116
102
|
- !ruby/object:Gem::Version
|
117
103
|
version: '0'
|
118
104
|
type: :development
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
107
|
requirements:
|
123
|
-
- -
|
108
|
+
- - '>='
|
124
109
|
- !ruby/object:Gem::Version
|
125
110
|
version: '0'
|
126
111
|
description: Hiccup mixes a-la-cart recurrence features into your data structure.
|
@@ -152,6 +137,10 @@ files:
|
|
152
137
|
- lib/hiccup/enumerable/weekly_enumerator.rb
|
153
138
|
- lib/hiccup/humanizable.rb
|
154
139
|
- lib/hiccup/inferable.rb
|
140
|
+
- lib/hiccup/inferable/dates_enumerator.rb
|
141
|
+
- lib/hiccup/inferable/guesser.rb
|
142
|
+
- lib/hiccup/inferable/score.rb
|
143
|
+
- lib/hiccup/inferable/scorer.rb
|
155
144
|
- lib/hiccup/schedule.rb
|
156
145
|
- lib/hiccup/serializable/ical.rb
|
157
146
|
- lib/hiccup/serializers/ical.rb
|
@@ -163,37 +152,31 @@ files:
|
|
163
152
|
- test/humanizable_test.rb
|
164
153
|
- test/ical_serializable_test.rb
|
165
154
|
- test/inferrable_test.rb
|
155
|
+
- test/performance_test.rb
|
166
156
|
- test/test_helper.rb
|
167
157
|
- test/validatable_test.rb
|
168
158
|
homepage: http://boblail.github.com/hiccup/
|
169
159
|
licenses: []
|
160
|
+
metadata: {}
|
170
161
|
post_install_message:
|
171
162
|
rdoc_options: []
|
172
163
|
require_paths:
|
173
164
|
- lib
|
174
165
|
required_ruby_version: !ruby/object:Gem::Requirement
|
175
|
-
none: false
|
176
166
|
requirements:
|
177
|
-
- -
|
167
|
+
- - '>='
|
178
168
|
- !ruby/object:Gem::Version
|
179
169
|
version: '0'
|
180
|
-
segments:
|
181
|
-
- 0
|
182
|
-
hash: -936135857702157052
|
183
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
184
|
-
none: false
|
185
171
|
requirements:
|
186
|
-
- -
|
172
|
+
- - '>='
|
187
173
|
- !ruby/object:Gem::Version
|
188
174
|
version: '0'
|
189
|
-
segments:
|
190
|
-
- 0
|
191
|
-
hash: -936135857702157052
|
192
175
|
requirements: []
|
193
176
|
rubyforge_project: hiccup
|
194
|
-
rubygems_version:
|
177
|
+
rubygems_version: 2.0.2
|
195
178
|
signing_key:
|
196
|
-
specification_version:
|
179
|
+
specification_version: 4
|
197
180
|
summary: A library for working with things that recur
|
198
181
|
test_files:
|
199
182
|
- test/core_ext_date_test.rb
|
@@ -202,5 +185,6 @@ test_files:
|
|
202
185
|
- test/humanizable_test.rb
|
203
186
|
- test/ical_serializable_test.rb
|
204
187
|
- test/inferrable_test.rb
|
188
|
+
- test/performance_test.rb
|
205
189
|
- test/test_helper.rb
|
206
190
|
- test/validatable_test.rb
|