feldtruby 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +23 -0
- data/.gemtest +0 -0
- data/History.txt +4 -0
- data/Manifest.txt +44 -0
- data/README.md +63 -0
- data/README.txt +59 -0
- data/Rakefile +19 -0
- data/TODO +6 -0
- data/lib/feldtruby/array/basic_stats.rb +88 -0
- data/lib/feldtruby/array/count_by.rb +7 -0
- data/lib/feldtruby/array.rb +34 -0
- data/lib/feldtruby/file/file_change_watcher.rb +88 -0
- data/lib/feldtruby/file/tempfile.rb +25 -0
- data/lib/feldtruby/float.rb +17 -0
- data/lib/feldtruby/math/rand.rb +5 -0
- data/lib/feldtruby/net/html_doc_getter.rb +31 -0
- data/lib/feldtruby/optimize/differential_evolution.rb +186 -0
- data/lib/feldtruby/optimize/max_steps_termination_criterion.rb +24 -0
- data/lib/feldtruby/optimize/objective.rb +302 -0
- data/lib/feldtruby/optimize/optimizer.rb +145 -0
- data/lib/feldtruby/optimize/random_search.rb +9 -0
- data/lib/feldtruby/optimize/search_space.rb +69 -0
- data/lib/feldtruby/optimize/stdout_logger.rb +138 -0
- data/lib/feldtruby/optimize.rb +28 -0
- data/lib/feldtruby/string/to_iso.rb +7 -0
- data/lib/feldtruby/time.rb +22 -0
- data/lib/feldtruby/vector.rb +14 -0
- data/lib/feldtruby/visualization/circos.rb +25 -0
- data/lib/feldtruby/word_counter.rb +100 -0
- data/lib/feldtruby.rb +6 -0
- data/test/helper.rb +7 -0
- data/test/test_array.rb +71 -0
- data/test/test_array_basic_stats.rb +130 -0
- data/test/test_array_count_by.rb +13 -0
- data/test/test_float.rb +20 -0
- data/test/test_html_doc_getter.rb +16 -0
- data/test/test_optimize.rb +55 -0
- data/test/test_optimize_differential_evolution.rb +42 -0
- data/test/test_optimize_objective.rb +157 -0
- data/test/test_optimize_populationbasedoptimizer.rb +24 -0
- data/test/test_optimize_random_search.rb +46 -0
- data/test/test_optimize_search_space.rb +97 -0
- data/test/test_time.rb +27 -0
- data/test/test_vector.rb +98 -0
- data/test/test_word_counter.rb +57 -0
- metadata +149 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'feldtruby/optimize'
|
2
|
+
|
3
|
+
# A search space is a set of constraints that limits which values
|
4
|
+
# are searched for. The search space can generate valid candidate
|
5
|
+
# solutions that are inside the space. It can also check if a
|
6
|
+
# given candidate is in the space. The default search space has min
|
7
|
+
# and max values for each element of a continuous vector.
|
8
|
+
class FeldtRuby::Optimize::SearchSpace
|
9
|
+
attr_reader :min_values, :max_values
|
10
|
+
|
11
|
+
def initialize(minValues, maxValues)
|
12
|
+
# Check that we have valid min and max values
|
13
|
+
raise "Not same num of min values (#{minValues.length}) as there are max values (#{maxValues.length})" if minValues.length != maxValues.length
|
14
|
+
raise "A search space must have >= 1 variable to be searched, here you specified min values: #{minValues.inspect}" if minValues.length < 1
|
15
|
+
minValues.zip(maxValues).each do |min,max|
|
16
|
+
raise "The min value #{min} is larger than the max value #{max} in min values = #{minValues.inspect} and #{maxValues.inspect}" if min > max
|
17
|
+
end
|
18
|
+
@min_values, @max_values = minValues, maxValues
|
19
|
+
@deltas = @min_values.zip(@max_values).map {|min,max| max-min}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Bound candidate using the min and max values. We randomly generate a new value inside the space
|
23
|
+
# for each element that is outside.
|
24
|
+
def bound(candidate)
|
25
|
+
a = candidate.each_with_index.map do |v, i|
|
26
|
+
in_range_for_position?(v, i) ? v : gen_value_for_position(i)
|
27
|
+
end
|
28
|
+
candidate.class.send(:[], *a)
|
29
|
+
end
|
30
|
+
|
31
|
+
def in_range_for_position?(value, index)
|
32
|
+
(value >= @min_values[index]) && (value <= @max_values[index])
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.new_symmetric(numVariables = 2, distanceFromZero = 1)
|
36
|
+
min_values = Array.new(numVariables).map {-distanceFromZero}
|
37
|
+
max_values = Array.new(numVariables).map {distanceFromZero}
|
38
|
+
self.new_from_min_max(numVariables, -distanceFromZero, distanceFromZero)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.new_from_min_max(numVariables = 2, min = -1, max = 1)
|
42
|
+
min_values = Array.new(numVariables).map {min}
|
43
|
+
max_values = Array.new(numVariables).map {max}
|
44
|
+
self.new(min_values, max_values)
|
45
|
+
end
|
46
|
+
|
47
|
+
def num_variables
|
48
|
+
@min_values.length
|
49
|
+
end
|
50
|
+
|
51
|
+
def gen_candidate
|
52
|
+
(0...num_variables).map {|i| gen_value_for_position(i)}
|
53
|
+
end
|
54
|
+
|
55
|
+
def gen_value_for_position(i)
|
56
|
+
min, delta = @min_values[i], @deltas[i]
|
57
|
+
min + delta * rand()
|
58
|
+
end
|
59
|
+
|
60
|
+
def is_candidate?(c)
|
61
|
+
return false unless c.length == num_variables
|
62
|
+
c.length.times do |i|
|
63
|
+
return false unless c[i] >= min_values[i] && c[i] <= max_values[i]
|
64
|
+
end
|
65
|
+
return true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
FeldtRuby::Optimize::DefaultSearchSpace = FeldtRuby::Optimize::SearchSpace.new_symmetric(2, 1)
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'feldtruby/time'
|
2
|
+
require 'feldtruby/float'
|
3
|
+
|
4
|
+
class FeldtRuby::Optimize::StdOutLogger
|
5
|
+
class DummyStream
|
6
|
+
def puts(str); end
|
7
|
+
def print(str); end
|
8
|
+
def flush(); end
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(optimizer, verbose = true)
|
12
|
+
@optimizer = optimizer
|
13
|
+
@verbose = verbose
|
14
|
+
@start_time = Time.now # To ensure we have a value even if optimizer forgot calling note_optimization_starts
|
15
|
+
@events = Hash.new(0)
|
16
|
+
@last_report_time = Hash.new(Time.new("1970-01-01")) # Maps event strings to the last time they were reported on, used by anote.
|
17
|
+
if verbose
|
18
|
+
@outstream = STDOUT
|
19
|
+
else
|
20
|
+
@outstream = DummyStream.new
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def note_optimization_starts
|
25
|
+
log("Optimization with optimizer #{@optimizer.class.inspect} started")
|
26
|
+
@start_time = Time.now
|
27
|
+
end
|
28
|
+
|
29
|
+
def internal_note(shouldPrint, msg, values)
|
30
|
+
@events[msg] += 1
|
31
|
+
if (values.all? {|e| String === e})
|
32
|
+
vstr = values.join("\n ")
|
33
|
+
else
|
34
|
+
vstr = values.inspect
|
35
|
+
end
|
36
|
+
if msg == "."
|
37
|
+
# Just a tick so no event stat etc
|
38
|
+
log_print( msg ) if shouldPrint
|
39
|
+
else
|
40
|
+
log( "#{event_stat_in_relation_to_step(@events[msg])}: #{msg}\n #{vstr}", true ) if shouldPrint
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def note(msg, *values)
|
45
|
+
internal_note true, msg, values
|
46
|
+
end
|
47
|
+
|
48
|
+
# Adaptive notes are recorded as any (normal) notes but is only reported to the user in a readable
|
49
|
+
# manner i.e. the frequency of reporting them is limited.
|
50
|
+
def adaptive_note(frequency, msg, values = [])
|
51
|
+
should_print = elapsed_since_last_reporting_of(msg) > frequency
|
52
|
+
@last_report_time[msg] = Time.now if should_print
|
53
|
+
internal_note should_print, msg, values
|
54
|
+
end
|
55
|
+
|
56
|
+
def anote(msg, *values)
|
57
|
+
adaptive_note(2.0, msg, values)
|
58
|
+
end
|
59
|
+
|
60
|
+
def elapsed_since_last_reporting_of(msg)
|
61
|
+
Time.now - @last_report_time[msg]
|
62
|
+
end
|
63
|
+
|
64
|
+
def note_end_of_optimization(optimizer)
|
65
|
+
best_msg = info_about_candidate(optimizer.best, optimizer.best_quality_value,
|
66
|
+
optimizer.best_sub_quality_values, "best")
|
67
|
+
note("End of optimization", "Optimizer: #{optimizer.class}",
|
68
|
+
best_msg,
|
69
|
+
event_summary_to_str(),
|
70
|
+
"Time used = #{Time.human_readable_timestr(elapsed_time)}, " +
|
71
|
+
"Steps performed = #{num_steps}, " +
|
72
|
+
"#{Time.human_readable_timestr(time_per_step, true)}/step")
|
73
|
+
end
|
74
|
+
|
75
|
+
def event_summary_to_str()
|
76
|
+
"Event counts:\n " + @events.to_a.map {|key,count| "#{key}: #{event_stat_in_relation_to_step(count)}"}.join("\n ")
|
77
|
+
end
|
78
|
+
|
79
|
+
def event_stat_in_relation_to_step(eventCount)
|
80
|
+
"#{eventCount} times (%.3f times/step)" % (eventCount.to_f / num_steps)
|
81
|
+
end
|
82
|
+
|
83
|
+
def info_about_candidate(candidate, qualityValue, subQualityValues, nameString = "new")
|
84
|
+
info_str = nameString ? "#{nameString} = #{candidate.inspect}\n " : " "
|
85
|
+
info_str + candidate._quality_value.inspect
|
86
|
+
end
|
87
|
+
|
88
|
+
def note_new_better(betterMsg, newBetter, newQv, newSubQvs)
|
89
|
+
new_better_msg = info_about_candidate(newBetter, newQv, newSubQvs, nil)
|
90
|
+
anote(betterMsg, new_better_msg)
|
91
|
+
end
|
92
|
+
|
93
|
+
def note_new_best(newBest, newQv, newSubQvs, oldBest = nil, oldQv = nil, oldSubQvs = nil)
|
94
|
+
new_best_msg = info_about_candidate(newBest, newQv, newSubQvs, "new")
|
95
|
+
if oldBest
|
96
|
+
new_best_msg += ",\n supplants old best\n #{oldQv.inspect}"
|
97
|
+
new_best_msg += "\n #{newQv.improvement_in_relation_to(oldQv)}\n"
|
98
|
+
end
|
99
|
+
anote("Found new best", new_best_msg)
|
100
|
+
end
|
101
|
+
|
102
|
+
def note_another_optimization_step(stepNumber)
|
103
|
+
@events['Optimization steps'] += 1 # we note it by hand since we are printing something different than the event name
|
104
|
+
adaptive_note(0.1, '.')
|
105
|
+
end
|
106
|
+
|
107
|
+
def quality_values_to_str(qv, subQvs)
|
108
|
+
"q = %.4f, subqs = %s" % [qv, subQvs.map {|v| v.round_to_decimals(4)}.inspect]
|
109
|
+
end
|
110
|
+
|
111
|
+
def note_termination(message)
|
112
|
+
log(message, true)
|
113
|
+
end
|
114
|
+
|
115
|
+
def log(str, newlineBefore = false, newlineAfter = true)
|
116
|
+
@outstream.puts "" if newlineBefore
|
117
|
+
@outstream.print "#{Time.timestamp({:short => true})} #{num_steps}: (#{Time.human_readable_timestr(elapsed_time)}), #{str}"
|
118
|
+
@outstream.puts "" if newlineAfter
|
119
|
+
@outstream.flush
|
120
|
+
end
|
121
|
+
|
122
|
+
def log_print(str)
|
123
|
+
@outstream.print str
|
124
|
+
@outstream.flush
|
125
|
+
end
|
126
|
+
|
127
|
+
def num_steps
|
128
|
+
@events['Optimization steps']
|
129
|
+
end
|
130
|
+
|
131
|
+
def time_per_step
|
132
|
+
elapsed_time / num_steps
|
133
|
+
end
|
134
|
+
|
135
|
+
def elapsed_time
|
136
|
+
Time.now - @start_time
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feldtruby'
|
2
|
+
|
3
|
+
module FeldtRuby::Optimize; end
|
4
|
+
|
5
|
+
require 'feldtruby/optimize/differential_evolution'
|
6
|
+
module FeldtRuby::Optimize
|
7
|
+
# Optimize the _numVariables_ between the _min_ and _max_ values given _costFunction_.
|
8
|
+
# Default is to minimize.
|
9
|
+
def self.optimize(min, max, options = {:verbose => true},
|
10
|
+
objectiveFuncClass = FeldtRuby::Optimize::ObjectiveMinimizeBlock, &costFunction)
|
11
|
+
objective = objectiveFuncClass.new(&costFunction)
|
12
|
+
num_vars = costFunction.arity
|
13
|
+
search_space = SearchSpace.new_from_min_max(num_vars, min, max)
|
14
|
+
optimizer = DEOptimizer.new(objective, search_space, options)
|
15
|
+
optimizer.optimize()
|
16
|
+
optimizer.best.to_a
|
17
|
+
end
|
18
|
+
|
19
|
+
# Short hand wrapper for function minimization.
|
20
|
+
def self.minimize(min, max, options = {}, &costFunction)
|
21
|
+
optimize(min, max, options, &costFunction)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Short hand wrapper for function maximization.
|
25
|
+
def self.maximize(min, max, options = {}, &costFunction)
|
26
|
+
optimize(min, max, options, FeldtRuby::Optimize::ObjectiveMaximizeBlock, &costFunction)
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
def Time.timestamp(options = {:short => false})
|
2
|
+
if options[:short]
|
3
|
+
Time.now.strftime("%y%m%d %H:%M.%S")
|
4
|
+
else
|
5
|
+
Time.now.strftime("%Y%m%d %H:%M.%S")
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
def Time.human_readable_timestr(seconds, insertSpace = false)
|
10
|
+
sp = insertSpace ? " " : ""
|
11
|
+
if seconds < 1e-4
|
12
|
+
"%.2f#{sp}usec" % (seconds*1e6)
|
13
|
+
elsif seconds < 1e-1
|
14
|
+
"%.2f#{sp}msec" % (seconds*1e3)
|
15
|
+
elsif seconds > 60*60.0
|
16
|
+
"%.2f#{sp}hours" % (seconds/3600.0)
|
17
|
+
elsif seconds > 60.0
|
18
|
+
"%.2f#{sp}mins" % (seconds/60.0)
|
19
|
+
else
|
20
|
+
"%.2f#{sp}sec" % seconds
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
require 'feldtruby/array/basic_stats'
|
3
|
+
|
4
|
+
class Vector
|
5
|
+
# length is used by the BasicStatistics methods but is not available in Vector so add it...
|
6
|
+
def length; size(); end
|
7
|
+
include BasicStatistics
|
8
|
+
|
9
|
+
# Override index method and add slicing.
|
10
|
+
def [](index, length = nil)
|
11
|
+
return @elements[index] unless length
|
12
|
+
Vector.elements(self.to_a[index, length])
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Minimal info to dump circos conf files, for beautiful rendering of
|
2
|
+
# circular data visualizations.
|
3
|
+
class Circos
|
4
|
+
class Chromosome < Hash
|
5
|
+
attr_reader :bands
|
6
|
+
def initialize(*args, &block)
|
7
|
+
super
|
8
|
+
@bands = Hash.new
|
9
|
+
end
|
10
|
+
def dump_to_circos_data(keyOrder = nil)
|
11
|
+
keyOrder ||= self.keys.sort
|
12
|
+
(name + keyOrder.map {|key| self[key]}).join(" ") + "\n"
|
13
|
+
end
|
14
|
+
def dump_bands_to_circos(bandKeyOrder = nil)
|
15
|
+
bands.map {|b| b.dump_to_circos_data(bandKeyOrder)}.join("\n")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
class Band < Hash
|
19
|
+
attr_reader :name
|
20
|
+
def dump_to_circos_data(keyOrder = nil)
|
21
|
+
keyOrder ||= self.keys.sort
|
22
|
+
(name + keyOrder.map {|key| self[key]}).join(" ")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
class FeldtRuby::WordCounter
|
2
|
+
def initialize
|
3
|
+
@counts = Hash.new(0)
|
4
|
+
end
|
5
|
+
|
6
|
+
# Ensure it has canonical form
|
7
|
+
def preprocess_word(word)
|
8
|
+
word.strip.downcase
|
9
|
+
end
|
10
|
+
|
11
|
+
def count_word(word)
|
12
|
+
w = preprocess_word(word)
|
13
|
+
@counts[w] += 1 unless is_stop_word?(w)
|
14
|
+
end
|
15
|
+
|
16
|
+
def invidual_words_in_string(str)
|
17
|
+
str.downcase.split(/[^\w-]+/)
|
18
|
+
end
|
19
|
+
|
20
|
+
def count_words(string)
|
21
|
+
invidual_words_in_string(string).map {|w| count_word(w)}
|
22
|
+
end
|
23
|
+
|
24
|
+
def words
|
25
|
+
@counts.keys
|
26
|
+
end
|
27
|
+
|
28
|
+
def count(word)
|
29
|
+
@counts[preprocess_word(word)]
|
30
|
+
end
|
31
|
+
|
32
|
+
def top_words(numberOfWords)
|
33
|
+
@counts.to_a.sort_by {|e| e.last}[-numberOfWords, numberOfWords].reverse
|
34
|
+
end
|
35
|
+
|
36
|
+
StopWords = ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours ", "ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"]
|
37
|
+
|
38
|
+
def is_stop_word?(word)
|
39
|
+
StopWords.include?(word)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Merge words together that are pluralis or -ing (or -ming) forms of each other.
|
43
|
+
# Destructive, so only use this after all words have been added.
|
44
|
+
def merge!
|
45
|
+
words = @counts.keys
|
46
|
+
base_words = words.select {|w| w[-1,1] != "s" && w[-4,4] != "ming" && w[-3,3] != "ing"}
|
47
|
+
non_base = words - base_words
|
48
|
+
ending_in_s = non_base.select {|w| w[-1,1] == "s"}
|
49
|
+
ending_in_ing = non_base.select {|w| w[-3,3] == "ing"}
|
50
|
+
ending_in_ming = non_base.select {|w| w[-4,4] == "ming"}
|
51
|
+
base_words.each do |base_word|
|
52
|
+
merged_word = base_word
|
53
|
+
count = @counts[base_word]
|
54
|
+
if ending_in_s.include?(base_word + "s")
|
55
|
+
count += @counts[base_word + "s"]
|
56
|
+
@counts.delete(base_word + "s")
|
57
|
+
merged_word += "|#{base_word}s"
|
58
|
+
end
|
59
|
+
if ending_in_ming.include?(base_word + "ming")
|
60
|
+
count += @counts[base_word + "ming"]
|
61
|
+
@counts.delete(base_word + "ming")
|
62
|
+
merged_word += "|#{base_word}ming"
|
63
|
+
end
|
64
|
+
if ending_in_ing.include?(base_word + "ing")
|
65
|
+
count += @counts[base_word + "ing"]
|
66
|
+
@counts.delete(base_word + "ing")
|
67
|
+
merged_word += "|#{base_word}ing"
|
68
|
+
end
|
69
|
+
if merged_word != base_word
|
70
|
+
@counts[merged_word] = count
|
71
|
+
@counts.delete(base_word)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class FeldtRuby::NgramWordCounter < FeldtRuby::WordCounter
|
78
|
+
def initialize(n = 2)
|
79
|
+
super()
|
80
|
+
@n = n
|
81
|
+
end
|
82
|
+
def count_words(words)
|
83
|
+
# Split sentences, get words in each sentence, create n-grams, filter n-grams containing stop words, and count remaining
|
84
|
+
words.split(/\.\s+(?=[A-Z]{1})/).each do |sentence|
|
85
|
+
ngrams = all_ngrams(invidual_words_in_string(sentence))
|
86
|
+
non_stop_ngrams = ngrams.select {|ngram| !ngram.any? {|ngw| is_stop_word?(ngw)}}
|
87
|
+
non_stop_ngrams.each {|ngram| count_word(ngram.join(' '))}
|
88
|
+
end
|
89
|
+
end
|
90
|
+
def all_ngrams(array)
|
91
|
+
res = []
|
92
|
+
length = array.length
|
93
|
+
index = 0
|
94
|
+
while (length - index) >= @n
|
95
|
+
res << array[index, @n]
|
96
|
+
index += 1
|
97
|
+
end
|
98
|
+
res
|
99
|
+
end
|
100
|
+
end
|
data/lib/feldtruby.rb
ADDED
data/test/helper.rb
ADDED
data/test/test_array.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'minitest/spec'
|
2
|
+
require 'feldtruby/array'
|
3
|
+
|
4
|
+
class TestFeldtRubyArray < MiniTest::Unit::TestCase
|
5
|
+
def test_distance_between_elements_normal_cases
|
6
|
+
assert_equal [1], [1, 2].distance_between_elements
|
7
|
+
assert_equal [1, 2], [1, 2, 4].distance_between_elements
|
8
|
+
assert_equal [3, 11], [-1, 2, 13].distance_between_elements
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_distance_elements_when_one_element
|
12
|
+
assert_equal [], [1].distance_between_elements
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_distance_elements_empty_array
|
16
|
+
assert_equal nil, [].distance_between_elements
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_swap!
|
20
|
+
a = (0..9).to_a
|
21
|
+
|
22
|
+
a.swap!(0, 8)
|
23
|
+
assert_equal 8, a[0]
|
24
|
+
assert_equal 0, a[8]
|
25
|
+
assert_equal 1, a[1]
|
26
|
+
assert_equal 9, a[9]
|
27
|
+
|
28
|
+
a.swap!(0, 9)
|
29
|
+
assert_equal 9, a[0]
|
30
|
+
assert_equal 0, a[8]
|
31
|
+
assert_equal 8, a[9]
|
32
|
+
assert_equal 2, a[2]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "Array extensions" do
|
37
|
+
describe "ranks" do
|
38
|
+
it "works when elements are already in order" do
|
39
|
+
[2.5, 1.5, 0.3].ranks.must_equal [1, 2, 3]
|
40
|
+
[15, 7, 1, 0].ranks.must_equal [1, 2, 3, 4]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "works when elements are in reverse order" do
|
44
|
+
[0.3, 1.5, 2.5].ranks.must_equal [3, 2, 1]
|
45
|
+
[0, 1, 7, 15].ranks.must_equal [4, 3, 2, 1]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "works when elements are out of order" do
|
49
|
+
[1.5, 0.5, 2.3].ranks.must_equal [2, 3, 1]
|
50
|
+
[1, 7, 15, 0].ranks.must_equal [3, 2, 1, 4]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "works when given an empty array" do
|
54
|
+
[].ranks.must_equal []
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "ranks_by" do
|
59
|
+
it "works when element to rank by is first and we prepend the ranks" do
|
60
|
+
[[2.5, :a], [1.5, :b], [0.3, :c]].ranks_by(true) {|v| v.first}.must_equal [
|
61
|
+
[1, 2.5, :a], [2, 1.5, :b], [3, 0.3, :c]
|
62
|
+
]
|
63
|
+
end
|
64
|
+
|
65
|
+
it "works when element to rank by is second and we append the ranks" do
|
66
|
+
[[:a, 2.5], [:c, 0.3], [:b, 1.5]].ranks_by(false) {|v| v[1]}.must_equal [
|
67
|
+
[:a, 2.5, 1], [:c, 0.3, 3], [:b, 1.5, 2]
|
68
|
+
]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'feldtruby/array/basic_stats'
|
3
|
+
|
4
|
+
class TestArrayBasicStats < MiniTest::Unit::TestCase
|
5
|
+
def test_sum_normal
|
6
|
+
assert_equal 3, [1,2].sum
|
7
|
+
assert_equal 6, [1,2,3].sum
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_sum_one_element
|
11
|
+
assert_equal 1, [1].sum
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_sum_empty_array
|
15
|
+
assert_equal 0, [].sum
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_mean_normal
|
19
|
+
assert_equal 1.5, [1,2].mean
|
20
|
+
assert_equal 2, [1,2,3].mean
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_mean_one_element
|
24
|
+
assert_equal 1, [1].mean
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_mean_empty_array
|
28
|
+
assert_equal 0, [].mean
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_mean_from_wikipedia_def_page_for_stdev
|
32
|
+
assert_equal 2.0, [2, 4, 4, 4, 5, 5, 7, 9].stdev
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_root_mean_square
|
36
|
+
assert_equal Math.sqrt((1*1 + 2*2)/2.0), [1, 2].root_mean_square
|
37
|
+
assert_equal Math.sqrt((10*10 + 243*243)/2.0), [10, 243].rms
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_weighted_sum_one_element
|
41
|
+
assert_equal 1, [1].weighted_sum([1])
|
42
|
+
assert_equal 2, [1].weighted_sum([2])
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_weighted_sum_two_elements
|
46
|
+
assert_equal 3, [1, 2].weighted_sum([1, 1])
|
47
|
+
assert_equal 22, [1, 4].weighted_sum([2, 5])
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_weighted_mean_one_elements
|
51
|
+
assert_equal 1, [1].weighted_mean([1])
|
52
|
+
assert_equal 4, [4].weighted_mean([2])
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_weighted_mean_two_elements
|
56
|
+
assert_equal 1.5, [1, 2].weighted_mean([1, 1])
|
57
|
+
assert_equal 22.0/7, [1, 4].weighted_mean([2, 5])
|
58
|
+
|
59
|
+
assert_equal 1.5, [1, 2].weighted_mean()
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
describe "Basic statistics" do
|
64
|
+
describe "sum of abs" do
|
65
|
+
it "works for simple example" do
|
66
|
+
[1, 2, 3, -4, 5, -6].sum_of_abs.must_equal 1+2+3+4+5+6
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe "sum of absolute deviations from value" do
|
71
|
+
it "is same as sum of absolutes if the value is 0.0" do
|
72
|
+
a = [1, 2, 3, -4, 5, -6]
|
73
|
+
expected = a.map {|v| v.abs}.sum
|
74
|
+
a.sum_of_abs_deviations(0.0).must_equal expected
|
75
|
+
end
|
76
|
+
|
77
|
+
it "works for simple example" do
|
78
|
+
a = [1, 2, 3, -4, 5, -6]
|
79
|
+
a.sum_of_abs_deviations(1.0).must_equal 0+1+2+5+4+7
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "rms_from_scalar" do
|
84
|
+
it "is the same as rms if scalar is 0.0" do
|
85
|
+
a = [1,2,3,4,5]
|
86
|
+
a.rms_from_scalar(0.0).must_be_within_delta a.rms
|
87
|
+
end
|
88
|
+
|
89
|
+
it "is correct for concrete example" do
|
90
|
+
a = [1,2]
|
91
|
+
a.rms_from_scalar(1.5).must_equal Math.sqrt( (0.5**2 + 0.5**2)/2 )
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "squared_error" do
|
96
|
+
it "works for simple example" do
|
97
|
+
a = [1, 2, 3]
|
98
|
+
b = [2, 4, 7]
|
99
|
+
a.sum_squared_error(b).must_equal(1*1 + 2*2 + 4*4)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "median" do
|
104
|
+
it "works when there is a single value" do
|
105
|
+
[1].median.must_equal 1
|
106
|
+
end
|
107
|
+
|
108
|
+
it "works when there are two integers, median is float" do
|
109
|
+
[1, 2].median.must_equal 1.5
|
110
|
+
end
|
111
|
+
|
112
|
+
it "works when there are two floats, median is float" do
|
113
|
+
[1.0, 2.0].median.must_equal 1.5
|
114
|
+
end
|
115
|
+
|
116
|
+
it "works when there are three inputs" do
|
117
|
+
[1.0, 2.0, 3.0].median.must_equal 2.0
|
118
|
+
end
|
119
|
+
|
120
|
+
it "works when there are four inputs" do
|
121
|
+
[1, 2, 3, 4].median.must_equal 2.5
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
describe "summary_stats" do
|
126
|
+
it "gives a nice string with descriptive statistics" do
|
127
|
+
[1,2,3,4].summary_stats.must_equal "2.500 (min = 1.0, max = 4.0, median = 2.5, stdev = 1.12)"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'minitest/spec'
|
2
|
+
require 'feldtruby/array/count_by'
|
3
|
+
|
4
|
+
describe "Array#count_by" do
|
5
|
+
it "counts right" do
|
6
|
+
counts = ["a", "ab", "b", "abfd", "e", "gf"].count_by {|e| e.length}
|
7
|
+
counts.must_be_instance_of Hash
|
8
|
+
counts.keys.sort.must_equal [1, 2, 4]
|
9
|
+
counts[1].must_equal 3
|
10
|
+
counts[2].must_equal 2
|
11
|
+
counts[4].must_equal 1
|
12
|
+
end
|
13
|
+
end
|
data/test/test_float.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'feldtruby/float'
|
2
|
+
|
3
|
+
class TestFloat < MiniTest::Unit::TestCase
|
4
|
+
def test_round_to_decimals
|
5
|
+
assert_equal 1.2, 1.204.round_to_decimals(1)
|
6
|
+
assert_equal 1.20, 1.204.round_to_decimals(2)
|
7
|
+
assert_equal 1.204, 1.204.round_to_decimals(3)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "protected_division_with" do
|
12
|
+
it "works for non-zero values" do
|
13
|
+
1.0.protected_division_with(2).must_equal 0.5
|
14
|
+
120.4.protected_division_with(4).must_equal 30.1
|
15
|
+
end
|
16
|
+
|
17
|
+
it "returns positive infinity if numerator is positive and denominator is zero" do
|
18
|
+
1.0.protected_division_with(0).must_equal 0.0
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'minitest/spec'
|
2
|
+
require 'feldtruby/net/html_doc_getter'
|
3
|
+
|
4
|
+
describe "HtmlDocGetter" do
|
5
|
+
it "Can get the html page as a string" do
|
6
|
+
h = FeldtRuby::HtmlDocGetter.new
|
7
|
+
s = h.get("http://www.google.com")
|
8
|
+
s.must_be_instance_of String
|
9
|
+
end
|
10
|
+
|
11
|
+
it "Can get the html page as a Nokogiri doc" do
|
12
|
+
h = FeldtRuby::HtmlDocGetter.new
|
13
|
+
d = h.get_html_doc("http://www.google.com")
|
14
|
+
d.must_be_instance_of Nokogiri::HTML::Document
|
15
|
+
end
|
16
|
+
end
|