experiment 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,5 +2,5 @@ $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
4
  module Experiment
5
- VERSION = '0.0.1'
5
+ VERSION = '0.2.0'
6
6
  end
@@ -1,25 +1,45 @@
1
1
  require File.dirname(__FILE__) + "/notify"
2
2
  require File.dirname(__FILE__) + "/stats"
3
3
  require File.dirname(__FILE__) + "/config"
4
+ require File.dirname(__FILE__) + "/distributed"
4
5
  require 'benchmark'
6
+ require "drb/drb"
5
7
 
6
8
  module Experiment
7
9
  class Base
8
- attr_reader :dir, :current_cv, :cvs
9
10
 
10
- def initialize(experiment, options, env)
11
+ include Distributed
12
+
13
+ attr_reader :dir, :current_cv, :cvs
14
+
15
+ def initialize(mode, experiment, options, env)
11
16
  @experiment = experiment
12
-
17
+ case mode
18
+
19
+ when :normal
20
+ @abm = []
21
+ when :master
22
+ @abm = []
23
+ extend DRb::DRbUndumped
24
+ @done = false
25
+ when :slave
26
+
27
+ end
13
28
  Experiment::Config::load(experiment, options, env)
14
- require "./experiments/#{experiment}/#{experiment}"
15
- @abm = []
29
+ @mode = mode
16
30
  end
31
+
32
+ def done?
33
+ @done
34
+ end
35
+
36
+
17
37
 
18
38
  # runs the whole experiment
19
- def run!(cv)
39
+ def normal_run!(cv)
20
40
  @cvs = cv || 1
21
41
  @results = {}
22
- Notify.print "Running #{@experiment} "
42
+ Notify.started @experiment
23
43
  split_up_data
24
44
  write_dir!
25
45
  specification!
@@ -28,21 +48,39 @@ module Experiment
28
48
  @bm = []
29
49
  @current_cv = cv_num
30
50
  File.open(@dir + "/raw-#{cv_num}.txt", "w") do |output|
51
+ @ouptut_file = output
31
52
  run_the_experiment(@data[cv_num], output)
32
53
  end
33
54
  array_merge @results, analyze_result!(@dir + "/raw-#{cv_num}.txt", @dir + "/analyzed-#{cv_num}.txt")
34
55
  write_performance!
35
- Notify.print "."
56
+ Notify.cv_done @experiment, cv_num
36
57
  end
37
58
  summarize_performance!
38
59
  summarize_results! @results
39
- Notify.print result_line
60
+ Notify.completed @experiment
40
61
  end
41
-
42
-
62
+
63
+
64
+ # use this evry time you want to do a measurement.
65
+ # It will be put on the record file and benchmarked
66
+ # automatically
67
+ # The weight parameter is used for calculating
68
+ # Notify::step. It should be an integer denoting how many
69
+ # such measurements you wish to do.
70
+ def measure(label = "", weight = nil, &block)
71
+ out = ""
72
+ benchmark label do
73
+ out = yield
74
+ end
75
+ @ouptut_file << out
76
+ Notify::step(@experiment, @current_cv, 1.0/weight) unless weight.nil?
77
+ end
78
+
79
+
43
80
  # Registers and performs a benchmark which is then
44
81
  # calculated to the total and everage times
45
82
  def benchmark(label = "", &block)
83
+ @bm ||= []
46
84
  @bm << Benchmark.measure("CV #{@current_cv} #{label}", &block)
47
85
  end
48
86
 
@@ -70,6 +108,7 @@ module Experiment
70
108
  f << @bm.map {|m| m.format("%19n "+Benchmark::FMTSTR)}.join
71
109
  total = @bm.reduce(0) {|t, m| m + t}
72
110
  f << total.format(" Total: "+Benchmark::FMTSTR)
111
+ @abm ||= []
73
112
  @abm << total
74
113
  end
75
114
  end
@@ -85,7 +124,7 @@ module Experiment
85
124
  # creates a summary of the results and writes to 'all.csv'
86
125
  def summarize_results!(results)
87
126
  File.open(@dir + '/results.yaml', 'w' ) do |out|
88
- YAML.dump(results, out )
127
+ YAML.dump(results, out)
89
128
  end
90
129
 
91
130
  # create an array of arrays
@@ -99,7 +138,6 @@ module Experiment
99
138
 
100
139
  ls = ["Standard Deviation".length] + ls
101
140
  res = [["cv"] + (1..cvs).to_a.map(&:to_s) + ["Mean", "Standard Deviation"]] + res
102
-
103
141
  out = ""
104
142
  res.transpose.each do |col|
105
143
  col.each_with_index do |cell, i|
@@ -9,7 +9,8 @@ module Experiment
9
9
  # the options string (which should be in this format:
10
10
  # "key: value, key2:value2,key3: value3")
11
11
  def load(experiment, options, env = :development)
12
- init env
12
+ #init env
13
+ @config ||= {}
13
14
  expath = File.expand_path("./experiments/#{experiment}/config.yaml")
14
15
  if File.exists? expath
15
16
  exp = YAML::load_file(expath)
@@ -17,20 +18,48 @@ module Experiment
17
18
  end
18
19
  @config.merge! parse(options)
19
20
  end
20
-
21
+
22
+ # loads the main config file
21
23
  def init(env = :development)
22
24
  conf = YAML::load_file("./config/config.yaml")
23
25
  @config = conf["environments"][env.to_s]
24
26
  end
25
-
27
+
28
+
29
+ # Allows access to any config option by key (either String or Symbol)
26
30
  def [](v)
27
31
  @config[v.to_s]
28
32
  end
29
33
 
34
+ # Allows access to any config option by key. Supports Interploations.
35
+ # Interpolations are supported as opts argument
36
+ # words preceded with a colon (:) are interpolated
37
+ # Otionaly second argument may be a default value to use if option
38
+ # not present.
39
+ def get(v, *opts)
40
+ default = opts.shift if opts.length == 2 || !opts.first.is_a?(Hash)
41
+ out = @config[v.to_s] || default
42
+ if opts = opts.first
43
+ opts.keys.reduce(out.dup) do |result, inter|
44
+ result.gsub /:#{inter}/, opts[inter]
45
+ end
46
+ else
47
+ out
48
+ end
49
+ end
50
+
51
+ def set(opts)
52
+ @config ||= opts
53
+ @config.merge opts
54
+ end
55
+
56
+ # parses a string as passed into the CLI -o option
30
57
  def parse(options)
58
+ return {} if options == ""
31
59
  Hash[options.split(/\, ?/).map{|a| a.split /\: ?/ }]
32
60
  end
33
61
 
62
+ # returns current options as a Hash object
34
63
  def to_h
35
64
  @config
36
65
  end
@@ -0,0 +1,67 @@
1
+ module Experiment
2
+ module Distributed
3
+ attr_accessor :master
4
+ def get_work()
5
+ if cv = @started.index(false)
6
+ @started[cv] = true
7
+ {:cv => cv, :input => @data[cv], :dir => @dir, :options => Experiment::Config.to_h }
8
+ else
9
+ false
10
+ end
11
+ end
12
+
13
+ def distribution_done?
14
+ @started.all?
15
+ end
16
+
17
+ def submit_result(cv, result, performance)
18
+ @completed[cv] = true
19
+ array_merge(@results, result)
20
+ @abm << performance
21
+ Notify.cv_done @experiment, cv
22
+ master_done! if @completed.all?
23
+ end
24
+
25
+
26
+ def slave_run!
27
+ while work = @master.get_work
28
+ puts work.inspect
29
+ Experiment::Config.set work[:options]
30
+ @current_cv = work[:cv]
31
+
32
+ @dir = work[:dir]
33
+ File.open(@dir + "/raw-#{@current_cv}.txt", "w") do |output|
34
+ @ouptut_file = output
35
+ run_the_experiment(work[:input], output)
36
+ end
37
+ result = analyze_result!(@dir + "/raw-#{@current_cv}.txt", @dir + "/analyzed-#{@current_cv}.txt")
38
+ write_performance!
39
+ @master.submit_result @current_cv, result, @abm.first
40
+ end
41
+
42
+ end
43
+
44
+
45
+ def master_run!(cv)
46
+
47
+ @cvs = cv || 1
48
+ @results = {}
49
+ Notify.started @experiment
50
+ split_up_data
51
+ write_dir!
52
+ specification!
53
+ @completed = (1..@cvs).map {|a| false }
54
+ @started = @completed.dup
55
+ end
56
+
57
+ def master_done!
58
+ @done = true
59
+ summarize_performance!
60
+ summarize_results! @results
61
+ Notify.completed @experiment
62
+
63
+ #sleep 1
64
+ #DRb.stop_service
65
+ end
66
+ end
67
+ end
@@ -1,6 +1,180 @@
1
+ # This class is responsible for UI goodness in letting you know
2
+ # about the progress of your experiments
3
+ require "drb/drb"
1
4
  class Notify
2
- def self.method_missing(meth, *args, &blk)
3
- $stdout.sync = true
4
- $stdout.send meth, *args, &blk
5
+
6
+ class << self
7
+ include DRb::DRbUndumped
8
+ # initialize display
9
+ def init(total, out = STDERR, growl = true, mode = :normal)
10
+ @curent_experiment = ""
11
+ @current_cv = 0
12
+ @cv_prog = {}
13
+ @total = total
14
+ @out = out
15
+ @terminal_width = 80
16
+ @bar_mark = "o"
17
+ @current = 0
18
+ @previous = 0
19
+ @finished_p = false
20
+ @start_time = Time.now
21
+ @previous_time = @start_time
22
+ @growl = growl
23
+ @mode = mode
24
+ show if @mode == :normal
25
+ end
26
+
27
+ # Called when starting work on a particular experiment
28
+ def started(experiment)
29
+ @curent_experiment = experiment
30
+ @current_cv = 1
31
+ @cv_prog[experiment] = []
32
+ show_if_needed
33
+ end
34
+
35
+ # Called when experiment completed.
36
+ # Shows a Growl notification on OSX.
37
+ # The message can be expanded by overriding the result_line
38
+ # method in the experiment class
39
+ def completed(experiment, msg = "")
40
+ if @growl
41
+ begin
42
+ `G_TITLE="Experiment Complete" #{File.dirname(__FILE__)}/../../bin/growl.sh -nosticky "Experimental condition #{experiment} complete. #{msg}"`
43
+ rescue
44
+ # probably not on OSX
45
+ end
46
+ end
47
+ m = "Condition #{experiment} complete. #{msg}"
48
+ puts m + " " * @terminal_width
49
+ @curent_experiment = nil
50
+ end
51
+
52
+ # called after a crossvalidation has completed
53
+ def cv_done(experiment, num)
54
+ @cv_prog[experiment][num] ||= 0
55
+ inc(1 - @cv_prog[experiment][num])
56
+ #@cv_prog = 0
57
+ end
58
+
59
+ # Wrap up
60
+ def done
61
+ @current = @total
62
+ @finished_p = true
63
+ #show
64
+ end
65
+
66
+ # Use this in experiment after each (potentially time consuming) task
67
+ # The argument should be a fraction (0 < num < 1) which tells
68
+ # how big a portion the task was of the complete run (eg. your
69
+ # calls should sum up to 1).
70
+ def step(experiment, cv, num)
71
+ if @mode == :normal
72
+ if num > 1
73
+ num = num / 100
74
+ end
75
+ inc(num)
76
+ @cv_prog[experiment][cv] ||= 0
77
+ @cv_prog[experiment][cv] += num
78
+ else
79
+ @mode.notify.step(experiment, cv, num)
80
+ end
81
+ end
82
+
5
83
  end
84
+
85
+ # a big part of this module is copied/inspired by Satoru Takabayashi's <satoru@namazu.org> ProgressBar class at http://0xcc.net/ruby-progressbar/index.html.en
86
+ module ProgressBar #:nodoc
87
+ def inc(step = 1)
88
+ @current += step
89
+ @current = @total if @current > @total
90
+ show_if_needed
91
+ @previous = @current
92
+ end
93
+
94
+ def show_if_needed
95
+ if @total.zero?
96
+ cur_percentage = 100
97
+ prev_percentage = 0
98
+ else
99
+ cur_percentage = (@current * 100 / @total).to_i
100
+ prev_percentage = (@previous * 100 / @total).to_i
101
+ end
102
+ @finished_p = cur_percentage == 100
103
+ # Use "!=" instead of ">" to support negative changes
104
+ if cur_percentage != prev_percentage ||
105
+ Time.now - @previous_time >= 1 || @finished_p
106
+ show
107
+ end
108
+ end
109
+
110
+
111
+
112
+ def show
113
+ percent = @current * 100 / @total
114
+ bar_width = percent * @terminal_width / 100
115
+ line = sprintf "%3d%% |%s%s| %s", percent, "=" * bar_width, "-" * (@terminal_width - bar_width), stat
116
+
117
+
118
+ width = get_width
119
+ if line.length == width - 1
120
+ @out.print(line + (@finished_p ? "\n" : "\r"))
121
+ @out.flush
122
+ elsif line.length >= width
123
+ @terminal_width = [@terminal_width - (line.length - width + 1), 0].max
124
+ if @terminal_width == 0 then @out.print(line + eol) else show end
125
+ else # line.length < width - 1
126
+ @terminal_width += width - line.length + 1
127
+ show
128
+ end
129
+ @previous_time = Time.now
130
+ end
131
+
132
+ def stat
133
+ if @finished_p then elapsed else eta end
134
+ end
135
+
136
+ def eta
137
+ if @current == 0
138
+ "ETA: --:--:--"
139
+ else
140
+ elapsed = Time.now - @start_time
141
+ eta = elapsed * @total / @current - elapsed;
142
+ sprintf("ETA: %s", format_time(eta))
143
+ end
144
+ end
145
+
146
+ def elapsed
147
+ elapsed = Time.now - @start_time
148
+ sprintf("Time: %s", format_time(elapsed))
149
+ end
150
+
151
+ def format_time (t)
152
+ t = t.to_i
153
+ sec = t % 60
154
+ min = (t / 60) % 60
155
+ hour = t / 3600
156
+ sprintf("%02d:%02d:%02d", hour, min, sec);
157
+ end
158
+
159
+
160
+ def get_width
161
+ # FIXME: I don't know how portable it is.
162
+ default_width = 80
163
+ begin
164
+ tiocgwinsz = 0x5413
165
+ data = [0, 0, 0, 0].pack("SSSS")
166
+ if @out.ioctl(tiocgwinsz, data) >= 0 then
167
+ rows, cols, xpixels, ypixels = data.unpack("SSSS")
168
+ if cols >= 0 then cols else default_width end
169
+ else
170
+ default_width
171
+ end
172
+ rescue Exception
173
+ default_width
174
+ end
175
+ end
176
+ end
177
+
178
+ extend ProgressBar
179
+
6
180
  end
@@ -0,0 +1,208 @@
1
+ module Experiment
2
+
3
+ # This is the class behind the command line magic
4
+ class Runner
5
+
6
+ attr_reader :options
7
+
8
+ def initialize(arg, opt)
9
+ @arguments, @options = arg, opt
10
+ end
11
+
12
+
13
+ # Generates a new experiment condition
14
+ # Usage of the -m flag for writing a hypothesis is recommended
15
+ def generate
16
+ dir = "./experiments/" + @arguments.first
17
+ Dir.mkdir(dir)
18
+ File.open(dir + "/" + @arguments.first + ".rb", "w") do |req_file|
19
+ req_file.puts "# ## #{as_human_name @arguments.first} ##"
20
+ req_file.puts "# "+@options.description.split("\n").join("\n# ")
21
+ req_file.puts
22
+ req_file.puts
23
+ req_file.puts "# The first contious block of comment will be included in your report."
24
+ req_file.puts "# This includes the reference implementation."
25
+ req_file.puts "# Override any desired files in this directory."
26
+ Dir["./app/**/*.rb"].each do |f|
27
+ p = f.split("/") - File.expand_path(".").split("/")
28
+ req_file.puts "require File.dirname(__FILE__) + \"/../../#{p.join("/")}\""
29
+ end
30
+ req_file.puts "\nclass #{as_class_name @arguments.first} < MyExperiment\n\t\nend"
31
+ end
32
+ File.open(dir + "/config.yaml", "w") do |f|
33
+ f << "---\nexperiment:\n development:\n compute:\n"
34
+ end
35
+ end
36
+
37
+ # generate a new project in the current directory
38
+ def new_project
39
+ require 'fileutils'
40
+ dir = "./" + @arguments.first
41
+ Dir.mkdir(dir)
42
+ %w[app config experiments report results test tmp vendor].each do |d|
43
+ Dir.mkdir(dir + "/" + d)
44
+ end
45
+ basedir = File.dirname(__FILE__)
46
+ File.open(File.join(dir, "config", "config.yaml"), "w") do |f|
47
+ f << "---\nenvironments:\n development:\n compute:\n"
48
+ end
49
+ File.open(File.join(dir, ".gitignore"), "w") do |f|
50
+ f << "tmp/*"
51
+ end
52
+ FileUtils::cp File.join(basedir, "generator/readme_template.txt"), File.join(dir, "README")
53
+ FileUtils::cp File.join(basedir, "generator/Rakefile"), File.join(dir, "Rakefile")
54
+ FileUtils::cp File.join(basedir, "generator/experiment_template.rb"), File.join(dir, "experiments", "experiment.rb")
55
+ end
56
+
57
+ # Lists available experiments
58
+ def list
59
+ puts "Available experiments:"
60
+ puts " " + Dir["./experiments/*"].map{|a| File.basename(a) }.join(", ")
61
+ end
62
+
63
+ # Generates 2 files in the report directory
64
+ # method.mmd which sums up comments from experimental conditions
65
+ # data.csv which sums all results in a table
66
+ def report
67
+ dir = "./report/"
68
+ File.open(dir + "method.mmd", "w") do |f|
69
+ f.puts "# Methods #"
70
+ Dir["./experiments/*/*.rb"].each do |desc|
71
+ if File.basename(desc) == File.basename(File.dirname(desc)) + ".rb"
72
+ File.read(desc).split("\n").each do |line|
73
+ if m = line.match(/^\# (.+)/)
74
+ f.puts m[1]
75
+ else
76
+ break
77
+ end
78
+ end
79
+ f.puts
80
+ f.puts
81
+ end
82
+ end
83
+ end
84
+ require 'csv'
85
+ require "yaml"
86
+ require File.dirname(__FILE__) + "/stats"
87
+ CSV.open(dir + "/data.csv", "w") do |csv|
88
+ data = {}
89
+ Dir["./results/*/results.yaml"].each do |res|
90
+ d = YAML::load_file(res)
91
+ da = {}
92
+ d.each do |k, vals|
93
+ da[k.to_s + " mean"], da[k.to_s + " sd"] = Stats::mean(vals), Stats::standard_deviation(vals)
94
+ vals.each_with_index do |v, i|
95
+ da[k.to_s + " cv:" + i.to_s] = v
96
+ end
97
+ end
98
+ array_merge(data, da)
99
+ end
100
+ data.keys.map do |key|
101
+ # calculate stats
102
+ a = data[key]
103
+ [key] + a
104
+ end.transpose.each do |row|
105
+ csv << row
106
+ end
107
+ end
108
+
109
+ end
110
+
111
+
112
+ # runs experiments passed aa arguments
113
+ # use the -o option to override configuration
114
+ def run
115
+ require File.dirname(__FILE__) + "/base"
116
+
117
+ require "./experiments/experiment"
118
+ Experiment::Config::init @options.env
119
+
120
+ if @options.distributed
121
+ require "drb/drb"
122
+ require File.dirname(__FILE__) + "/work_server"
123
+ puts "Running in distributed mode. Run other machines with:\nexperiment worker --address #{local_ip}\n"
124
+ Notify::init @arguments.length * @options.cv, STDOUT, Experiment::Config::get(:growl_notifications, true)
125
+ ws = WorkServer.new @arguments, @options, local_ip
126
+ Notify::done
127
+ return true
128
+ else
129
+ Notify::init @arguments.length * @options.cv, STDOUT, Experiment::Config::get(:growl_notifications, true)
130
+ @arguments.each do |exp|
131
+ require "./experiments/#{exp}/#{exp}"
132
+ cla = eval(as_class_name(exp))
133
+ experiment = cla.new :normal, exp, @options.opts, @options.env
134
+ experiment.normal_run! @options.cv
135
+ end
136
+ Notify::done
137
+ end
138
+ end
139
+
140
+
141
+ # This is a Worker implementation. It requires an --address option
142
+ # of it's master server and will recieve tasks (experiments and
143
+ # cross-validations) and compute them.
144
+ def worker
145
+ require "drb/drb"
146
+ require File.dirname(__FILE__) + "/base"
147
+ Experiment::Config::init @options.env
148
+ loop do
149
+ @server_uri="druby://#{@options.master}:8787"
150
+ connect
151
+ Notify::init 0, STDOUT, false, @master
152
+ while item = @master.new_item
153
+ #puts item
154
+ exp = @master.experiment item
155
+ require "./experiments/experiment"
156
+ require "./experiments/#{exp}/#{exp}"
157
+ cla = eval(as_class_name(exp))
158
+ experiment = cla.new :slave, exp, @options.opts, @options.env
159
+ experiment.master = @master.instance item
160
+ experiment.slave_run!
161
+ end
162
+ end
163
+ end
164
+
165
+ private
166
+
167
+ require 'socket'
168
+
169
+ def connect
170
+ begin
171
+ puts "Connecting..."
172
+ DRb.start_service
173
+ @master = DRbObject.new_with_uri(@server_uri)
174
+ @master.ready?
175
+ rescue
176
+ sleep 10
177
+ connect
178
+ end
179
+ end
180
+
181
+ def local_ip
182
+ orig, Socket.do_not_reverse_lookup = Socket.do_not_reverse_lookup, true
183
+ UDPSocket.open do |s|
184
+ s.connect '64.233.187.99', 1
185
+ s.addr.last
186
+ end
187
+ ensure
188
+ Socket.do_not_reverse_lookup = orig
189
+ end
190
+
191
+
192
+ def array_merge(h1, h2)
193
+ h2.each do |key, value|
194
+ h1[key] ||= []
195
+ h1[key] << value
196
+ end
197
+ end
198
+
199
+ def as_class_name(str)
200
+ str.split(/[\_\-]+/).map(&:capitalize).join
201
+ end
202
+
203
+ def as_human_name(str)
204
+ str.split(/[\_\-]+/).map(&:capitalize).join(" ")
205
+ end
206
+ end
207
+
208
+ end