experiment 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,5 +2,5 @@ $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
4
  module Experiment
5
- VERSION = '0.0.1'
5
+ VERSION = '0.2.0'
6
6
  end
@@ -1,25 +1,45 @@
1
1
  require File.dirname(__FILE__) + "/notify"
2
2
  require File.dirname(__FILE__) + "/stats"
3
3
  require File.dirname(__FILE__) + "/config"
4
+ require File.dirname(__FILE__) + "/distributed"
4
5
  require 'benchmark'
6
+ require "drb/drb"
5
7
 
6
8
  module Experiment
7
9
  class Base
8
- attr_reader :dir, :current_cv, :cvs
9
10
 
10
- def initialize(experiment, options, env)
11
+ include Distributed
12
+
13
+ attr_reader :dir, :current_cv, :cvs
14
+
15
+ def initialize(mode, experiment, options, env)
11
16
  @experiment = experiment
12
-
17
+ case mode
18
+
19
+ when :normal
20
+ @abm = []
21
+ when :master
22
+ @abm = []
23
+ extend DRb::DRbUndumped
24
+ @done = false
25
+ when :slave
26
+
27
+ end
13
28
  Experiment::Config::load(experiment, options, env)
14
- require "./experiments/#{experiment}/#{experiment}"
15
- @abm = []
29
+ @mode = mode
16
30
  end
31
+
32
+ def done?
33
+ @done
34
+ end
35
+
36
+
17
37
 
18
38
  # runs the whole experiment
19
- def run!(cv)
39
+ def normal_run!(cv)
20
40
  @cvs = cv || 1
21
41
  @results = {}
22
- Notify.print "Running #{@experiment} "
42
+ Notify.started @experiment
23
43
  split_up_data
24
44
  write_dir!
25
45
  specification!
@@ -28,21 +48,39 @@ module Experiment
28
48
  @bm = []
29
49
  @current_cv = cv_num
30
50
  File.open(@dir + "/raw-#{cv_num}.txt", "w") do |output|
51
+ @ouptut_file = output
31
52
  run_the_experiment(@data[cv_num], output)
32
53
  end
33
54
  array_merge @results, analyze_result!(@dir + "/raw-#{cv_num}.txt", @dir + "/analyzed-#{cv_num}.txt")
34
55
  write_performance!
35
- Notify.print "."
56
+ Notify.cv_done @experiment, cv_num
36
57
  end
37
58
  summarize_performance!
38
59
  summarize_results! @results
39
- Notify.print result_line
60
+ Notify.completed @experiment
40
61
  end
41
-
42
-
62
+
63
+
64
+ # use this evry time you want to do a measurement.
65
+ # It will be put on the record file and benchmarked
66
+ # automatically
67
+ # The weight parameter is used for calculating
68
+ # Notify::step. It should be an integer denoting how many
69
+ # such measurements you wish to do.
70
+ def measure(label = "", weight = nil, &block)
71
+ out = ""
72
+ benchmark label do
73
+ out = yield
74
+ end
75
+ @ouptut_file << out
76
+ Notify::step(@experiment, @current_cv, 1.0/weight) unless weight.nil?
77
+ end
78
+
79
+
43
80
  # Registers and performs a benchmark which is then
44
81
  # calculated to the total and everage times
45
82
  def benchmark(label = "", &block)
83
+ @bm ||= []
46
84
  @bm << Benchmark.measure("CV #{@current_cv} #{label}", &block)
47
85
  end
48
86
 
@@ -70,6 +108,7 @@ module Experiment
70
108
  f << @bm.map {|m| m.format("%19n "+Benchmark::FMTSTR)}.join
71
109
  total = @bm.reduce(0) {|t, m| m + t}
72
110
  f << total.format(" Total: "+Benchmark::FMTSTR)
111
+ @abm ||= []
73
112
  @abm << total
74
113
  end
75
114
  end
@@ -85,7 +124,7 @@ module Experiment
85
124
  # creates a summary of the results and writes to 'all.csv'
86
125
  def summarize_results!(results)
87
126
  File.open(@dir + '/results.yaml', 'w' ) do |out|
88
- YAML.dump(results, out )
127
+ YAML.dump(results, out)
89
128
  end
90
129
 
91
130
  # create an array of arrays
@@ -99,7 +138,6 @@ module Experiment
99
138
 
100
139
  ls = ["Standard Deviation".length] + ls
101
140
  res = [["cv"] + (1..cvs).to_a.map(&:to_s) + ["Mean", "Standard Deviation"]] + res
102
-
103
141
  out = ""
104
142
  res.transpose.each do |col|
105
143
  col.each_with_index do |cell, i|
@@ -9,7 +9,8 @@ module Experiment
9
9
  # the options string (which should be in this format:
10
10
  # "key: value, key2:value2,key3: value3")
11
11
  def load(experiment, options, env = :development)
12
- init env
12
+ #init env
13
+ @config ||= {}
13
14
  expath = File.expand_path("./experiments/#{experiment}/config.yaml")
14
15
  if File.exists? expath
15
16
  exp = YAML::load_file(expath)
@@ -17,20 +18,48 @@ module Experiment
17
18
  end
18
19
  @config.merge! parse(options)
19
20
  end
20
-
21
+
22
+ # loads the main config file
21
23
  def init(env = :development)
22
24
  conf = YAML::load_file("./config/config.yaml")
23
25
  @config = conf["environments"][env.to_s]
24
26
  end
25
-
27
+
28
+
29
+ # Allows access to any config option by key (either String or Symbol)
26
30
  def [](v)
27
31
  @config[v.to_s]
28
32
  end
29
33
 
34
+ # Allows access to any config option by key. Supports Interploations.
35
+ # Interpolations are supported as opts argument
36
+ # words preceded with a colon (:) are interpolated
37
+ # Otionaly second argument may be a default value to use if option
38
+ # not present.
39
+ def get(v, *opts)
40
+ default = opts.shift if opts.length == 2 || !opts.first.is_a?(Hash)
41
+ out = @config[v.to_s] || default
42
+ if opts = opts.first
43
+ opts.keys.reduce(out.dup) do |result, inter|
44
+ result.gsub /:#{inter}/, opts[inter]
45
+ end
46
+ else
47
+ out
48
+ end
49
+ end
50
+
51
+ def set(opts)
52
+ @config ||= opts
53
+ @config.merge opts
54
+ end
55
+
56
+ # parses a string as passed into the CLI -o option
30
57
  def parse(options)
58
+ return {} if options == ""
31
59
  Hash[options.split(/\, ?/).map{|a| a.split /\: ?/ }]
32
60
  end
33
61
 
62
+ # returns current options as a Hash object
34
63
  def to_h
35
64
  @config
36
65
  end
@@ -0,0 +1,67 @@
1
+ module Experiment
2
+ module Distributed
3
+ attr_accessor :master
4
+ def get_work()
5
+ if cv = @started.index(false)
6
+ @started[cv] = true
7
+ {:cv => cv, :input => @data[cv], :dir => @dir, :options => Experiment::Config.to_h }
8
+ else
9
+ false
10
+ end
11
+ end
12
+
13
+ def distribution_done?
14
+ @started.all?
15
+ end
16
+
17
+ def submit_result(cv, result, performance)
18
+ @completed[cv] = true
19
+ array_merge(@results, result)
20
+ @abm << performance
21
+ Notify.cv_done @experiment, cv
22
+ master_done! if @completed.all?
23
+ end
24
+
25
+
26
+ def slave_run!
27
+ while work = @master.get_work
28
+ puts work.inspect
29
+ Experiment::Config.set work[:options]
30
+ @current_cv = work[:cv]
31
+
32
+ @dir = work[:dir]
33
+ File.open(@dir + "/raw-#{@current_cv}.txt", "w") do |output|
34
+ @ouptut_file = output
35
+ run_the_experiment(work[:input], output)
36
+ end
37
+ result = analyze_result!(@dir + "/raw-#{@current_cv}.txt", @dir + "/analyzed-#{@current_cv}.txt")
38
+ write_performance!
39
+ @master.submit_result @current_cv, result, @abm.first
40
+ end
41
+
42
+ end
43
+
44
+
45
+ def master_run!(cv)
46
+
47
+ @cvs = cv || 1
48
+ @results = {}
49
+ Notify.started @experiment
50
+ split_up_data
51
+ write_dir!
52
+ specification!
53
+ @completed = (1..@cvs).map {|a| false }
54
+ @started = @completed.dup
55
+ end
56
+
57
+ def master_done!
58
+ @done = true
59
+ summarize_performance!
60
+ summarize_results! @results
61
+ Notify.completed @experiment
62
+
63
+ #sleep 1
64
+ #DRb.stop_service
65
+ end
66
+ end
67
+ end
@@ -1,6 +1,180 @@
1
+ # This class is responsible for UI goodness in letting you know
2
+ # about the progress of your experiments
3
+ require "drb/drb"
1
4
  class Notify
2
- def self.method_missing(meth, *args, &blk)
3
- $stdout.sync = true
4
- $stdout.send meth, *args, &blk
5
+
6
+ class << self
7
+ include DRb::DRbUndumped
8
+ # initialize display
9
+ def init(total, out = STDERR, growl = true, mode = :normal)
10
+ @curent_experiment = ""
11
+ @current_cv = 0
12
+ @cv_prog = {}
13
+ @total = total
14
+ @out = out
15
+ @terminal_width = 80
16
+ @bar_mark = "o"
17
+ @current = 0
18
+ @previous = 0
19
+ @finished_p = false
20
+ @start_time = Time.now
21
+ @previous_time = @start_time
22
+ @growl = growl
23
+ @mode = mode
24
+ show if @mode == :normal
25
+ end
26
+
27
+ # Called when starting work on a particular experiment
28
+ def started(experiment)
29
+ @curent_experiment = experiment
30
+ @current_cv = 1
31
+ @cv_prog[experiment] = []
32
+ show_if_needed
33
+ end
34
+
35
+ # Called when experiment completed.
36
+ # Shows a Growl notification on OSX.
37
+ # The message can be expanded by overriding the result_line
38
+ # method in the experiment class
39
+ def completed(experiment, msg = "")
40
+ if @growl
41
+ begin
42
+ `G_TITLE="Experiment Complete" #{File.dirname(__FILE__)}/../../bin/growl.sh -nosticky "Experimental condition #{experiment} complete. #{msg}"`
43
+ rescue
44
+ # probably not on OSX
45
+ end
46
+ end
47
+ m = "Condition #{experiment} complete. #{msg}"
48
+ puts m + " " * @terminal_width
49
+ @curent_experiment = nil
50
+ end
51
+
52
+ # called after a crossvalidation has completed
53
+ def cv_done(experiment, num)
54
+ @cv_prog[experiment][num] ||= 0
55
+ inc(1 - @cv_prog[experiment][num])
56
+ #@cv_prog = 0
57
+ end
58
+
59
+ # Wrap up
60
+ def done
61
+ @current = @total
62
+ @finished_p = true
63
+ #show
64
+ end
65
+
66
+ # Use this in experiment after each (potentially time consuming) task
67
+ # The argument should be a fraction (0 < num < 1) which tells
68
+ # how big a portion the task was of the complete run (eg. your
69
+ # calls should sum up to 1).
70
+ def step(experiment, cv, num)
71
+ if @mode == :normal
72
+ if num > 1
73
+ num = num / 100
74
+ end
75
+ inc(num)
76
+ @cv_prog[experiment][cv] ||= 0
77
+ @cv_prog[experiment][cv] += num
78
+ else
79
+ @mode.notify.step(experiment, cv, num)
80
+ end
81
+ end
82
+
5
83
  end
84
+
85
+ # a big part of this module is copied/inspired by Satoru Takabayashi's <satoru@namazu.org> ProgressBar class at http://0xcc.net/ruby-progressbar/index.html.en
86
+ module ProgressBar #:nodoc
87
+ def inc(step = 1)
88
+ @current += step
89
+ @current = @total if @current > @total
90
+ show_if_needed
91
+ @previous = @current
92
+ end
93
+
94
+ def show_if_needed
95
+ if @total.zero?
96
+ cur_percentage = 100
97
+ prev_percentage = 0
98
+ else
99
+ cur_percentage = (@current * 100 / @total).to_i
100
+ prev_percentage = (@previous * 100 / @total).to_i
101
+ end
102
+ @finished_p = cur_percentage == 100
103
+ # Use "!=" instead of ">" to support negative changes
104
+ if cur_percentage != prev_percentage ||
105
+ Time.now - @previous_time >= 1 || @finished_p
106
+ show
107
+ end
108
+ end
109
+
110
+
111
+
112
+ def show
113
+ percent = @current * 100 / @total
114
+ bar_width = percent * @terminal_width / 100
115
+ line = sprintf "%3d%% |%s%s| %s", percent, "=" * bar_width, "-" * (@terminal_width - bar_width), stat
116
+
117
+
118
+ width = get_width
119
+ if line.length == width - 1
120
+ @out.print(line + (@finished_p ? "\n" : "\r"))
121
+ @out.flush
122
+ elsif line.length >= width
123
+ @terminal_width = [@terminal_width - (line.length - width + 1), 0].max
124
+ if @terminal_width == 0 then @out.print(line + eol) else show end
125
+ else # line.length < width - 1
126
+ @terminal_width += width - line.length + 1
127
+ show
128
+ end
129
+ @previous_time = Time.now
130
+ end
131
+
132
+ def stat
133
+ if @finished_p then elapsed else eta end
134
+ end
135
+
136
+ def eta
137
+ if @current == 0
138
+ "ETA: --:--:--"
139
+ else
140
+ elapsed = Time.now - @start_time
141
+ eta = elapsed * @total / @current - elapsed;
142
+ sprintf("ETA: %s", format_time(eta))
143
+ end
144
+ end
145
+
146
+ def elapsed
147
+ elapsed = Time.now - @start_time
148
+ sprintf("Time: %s", format_time(elapsed))
149
+ end
150
+
151
+ def format_time (t)
152
+ t = t.to_i
153
+ sec = t % 60
154
+ min = (t / 60) % 60
155
+ hour = t / 3600
156
+ sprintf("%02d:%02d:%02d", hour, min, sec);
157
+ end
158
+
159
+
160
+ def get_width
161
+ # FIXME: I don't know how portable it is.
162
+ default_width = 80
163
+ begin
164
+ tiocgwinsz = 0x5413
165
+ data = [0, 0, 0, 0].pack("SSSS")
166
+ if @out.ioctl(tiocgwinsz, data) >= 0 then
167
+ rows, cols, xpixels, ypixels = data.unpack("SSSS")
168
+ if cols >= 0 then cols else default_width end
169
+ else
170
+ default_width
171
+ end
172
+ rescue Exception
173
+ default_width
174
+ end
175
+ end
176
+ end
177
+
178
+ extend ProgressBar
179
+
6
180
  end
@@ -0,0 +1,208 @@
1
+ module Experiment
2
+
3
+ # This is the class behind the command line magic
4
+ class Runner
5
+
6
+ attr_reader :options
7
+
8
+ def initialize(arg, opt)
9
+ @arguments, @options = arg, opt
10
+ end
11
+
12
+
13
+ # Generates a new experiment condition
14
+ # Usage of the -m flag for writing a hypothesis is recommended
15
+ def generate
16
+ dir = "./experiments/" + @arguments.first
17
+ Dir.mkdir(dir)
18
+ File.open(dir + "/" + @arguments.first + ".rb", "w") do |req_file|
19
+ req_file.puts "# ## #{as_human_name @arguments.first} ##"
20
+ req_file.puts "# "+@options.description.split("\n").join("\n# ")
21
+ req_file.puts
22
+ req_file.puts
23
+ req_file.puts "# The first contious block of comment will be included in your report."
24
+ req_file.puts "# This includes the reference implementation."
25
+ req_file.puts "# Override any desired files in this directory."
26
+ Dir["./app/**/*.rb"].each do |f|
27
+ p = f.split("/") - File.expand_path(".").split("/")
28
+ req_file.puts "require File.dirname(__FILE__) + \"/../../#{p.join("/")}\""
29
+ end
30
+ req_file.puts "\nclass #{as_class_name @arguments.first} < MyExperiment\n\t\nend"
31
+ end
32
+ File.open(dir + "/config.yaml", "w") do |f|
33
+ f << "---\nexperiment:\n development:\n compute:\n"
34
+ end
35
+ end
36
+
37
+ # generate a new project in the current directory
38
+ def new_project
39
+ require 'fileutils'
40
+ dir = "./" + @arguments.first
41
+ Dir.mkdir(dir)
42
+ %w[app config experiments report results test tmp vendor].each do |d|
43
+ Dir.mkdir(dir + "/" + d)
44
+ end
45
+ basedir = File.dirname(__FILE__)
46
+ File.open(File.join(dir, "config", "config.yaml"), "w") do |f|
47
+ f << "---\nenvironments:\n development:\n compute:\n"
48
+ end
49
+ File.open(File.join(dir, ".gitignore"), "w") do |f|
50
+ f << "tmp/*"
51
+ end
52
+ FileUtils::cp File.join(basedir, "generator/readme_template.txt"), File.join(dir, "README")
53
+ FileUtils::cp File.join(basedir, "generator/Rakefile"), File.join(dir, "Rakefile")
54
+ FileUtils::cp File.join(basedir, "generator/experiment_template.rb"), File.join(dir, "experiments", "experiment.rb")
55
+ end
56
+
57
+ # Lists available experiments
58
+ def list
59
+ puts "Available experiments:"
60
+ puts " " + Dir["./experiments/*"].map{|a| File.basename(a) }.join(", ")
61
+ end
62
+
63
+ # Generates 2 files in the report directory
64
+ # method.mmd which sums up comments from experimental conditions
65
+ # data.csv which sums all results in a table
66
+ def report
67
+ dir = "./report/"
68
+ File.open(dir + "method.mmd", "w") do |f|
69
+ f.puts "# Methods #"
70
+ Dir["./experiments/*/*.rb"].each do |desc|
71
+ if File.basename(desc) == File.basename(File.dirname(desc)) + ".rb"
72
+ File.read(desc).split("\n").each do |line|
73
+ if m = line.match(/^\# (.+)/)
74
+ f.puts m[1]
75
+ else
76
+ break
77
+ end
78
+ end
79
+ f.puts
80
+ f.puts
81
+ end
82
+ end
83
+ end
84
+ require 'csv'
85
+ require "yaml"
86
+ require File.dirname(__FILE__) + "/stats"
87
+ CSV.open(dir + "/data.csv", "w") do |csv|
88
+ data = {}
89
+ Dir["./results/*/results.yaml"].each do |res|
90
+ d = YAML::load_file(res)
91
+ da = {}
92
+ d.each do |k, vals|
93
+ da[k.to_s + " mean"], da[k.to_s + " sd"] = Stats::mean(vals), Stats::standard_deviation(vals)
94
+ vals.each_with_index do |v, i|
95
+ da[k.to_s + " cv:" + i.to_s] = v
96
+ end
97
+ end
98
+ array_merge(data, da)
99
+ end
100
+ data.keys.map do |key|
101
+ # calculate stats
102
+ a = data[key]
103
+ [key] + a
104
+ end.transpose.each do |row|
105
+ csv << row
106
+ end
107
+ end
108
+
109
+ end
110
+
111
+
112
+ # runs experiments passed aa arguments
113
+ # use the -o option to override configuration
114
+ def run
115
+ require File.dirname(__FILE__) + "/base"
116
+
117
+ require "./experiments/experiment"
118
+ Experiment::Config::init @options.env
119
+
120
+ if @options.distributed
121
+ require "drb/drb"
122
+ require File.dirname(__FILE__) + "/work_server"
123
+ puts "Running in distributed mode. Run other machines with:\nexperiment worker --address #{local_ip}\n"
124
+ Notify::init @arguments.length * @options.cv, STDOUT, Experiment::Config::get(:growl_notifications, true)
125
+ ws = WorkServer.new @arguments, @options, local_ip
126
+ Notify::done
127
+ return true
128
+ else
129
+ Notify::init @arguments.length * @options.cv, STDOUT, Experiment::Config::get(:growl_notifications, true)
130
+ @arguments.each do |exp|
131
+ require "./experiments/#{exp}/#{exp}"
132
+ cla = eval(as_class_name(exp))
133
+ experiment = cla.new :normal, exp, @options.opts, @options.env
134
+ experiment.normal_run! @options.cv
135
+ end
136
+ Notify::done
137
+ end
138
+ end
139
+
140
+
141
+ # This is a Worker implementation. It requires an --address option
142
+ # of it's master server and will recieve tasks (experiments and
143
+ # cross-validations) and compute them.
144
+ def worker
145
+ require "drb/drb"
146
+ require File.dirname(__FILE__) + "/base"
147
+ Experiment::Config::init @options.env
148
+ loop do
149
+ @server_uri="druby://#{@options.master}:8787"
150
+ connect
151
+ Notify::init 0, STDOUT, false, @master
152
+ while item = @master.new_item
153
+ #puts item
154
+ exp = @master.experiment item
155
+ require "./experiments/experiment"
156
+ require "./experiments/#{exp}/#{exp}"
157
+ cla = eval(as_class_name(exp))
158
+ experiment = cla.new :slave, exp, @options.opts, @options.env
159
+ experiment.master = @master.instance item
160
+ experiment.slave_run!
161
+ end
162
+ end
163
+ end
164
+
165
+ private
166
+
167
+ require 'socket'
168
+
169
+ def connect
170
+ begin
171
+ puts "Connecting..."
172
+ DRb.start_service
173
+ @master = DRbObject.new_with_uri(@server_uri)
174
+ @master.ready?
175
+ rescue
176
+ sleep 10
177
+ connect
178
+ end
179
+ end
180
+
181
+ def local_ip
182
+ orig, Socket.do_not_reverse_lookup = Socket.do_not_reverse_lookup, true
183
+ UDPSocket.open do |s|
184
+ s.connect '64.233.187.99', 1
185
+ s.addr.last
186
+ end
187
+ ensure
188
+ Socket.do_not_reverse_lookup = orig
189
+ end
190
+
191
+
192
+ def array_merge(h1, h2)
193
+ h2.each do |key, value|
194
+ h1[key] ||= []
195
+ h1[key] << value
196
+ end
197
+ end
198
+
199
+ def as_class_name(str)
200
+ str.split(/[\_\-]+/).map(&:capitalize).join
201
+ end
202
+
203
+ def as_human_name(str)
204
+ str.split(/[\_\-]+/).map(&:capitalize).join(" ")
205
+ end
206
+ end
207
+
208
+ end