experiment 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,52 @@
1
1
  require "yaml"
2
2
 
3
3
  module Experiment
4
+ # You have a config directory containing a config.yaml file. This file contains
5
+ # several environments. The idea is that you might want to tweak your options
6
+ # differently when running on your laptop then when running on a university
7
+ # supercomputer.
8
+ #
9
+ # development is the default environment, you can set any other with the --env option.
10
+ #
11
+ # Experimental conditions also get their own config.yaml file. This
12
+ # file overrides the main config file so you can introduce in condition
13
+ # specific options.
14
+ #
15
+ # And finally when running an experiment you can use the -o or --options
16
+ # option to override any config you want.
17
+ #
18
+ # @example With the yamls like this:
19
+ # # config/config.yaml
20
+ # environments:
21
+ # development:
22
+ # ref_dir: /Users/kubowo/Desktop/points-vals
23
+ # master_dir: /Users/kubowo/Desktop/points-vals/s:writer
24
+ # alpha: 0.4
25
+ # compute:
26
+ # ref_dir: /afs/group/DB/points
27
+ # master_dir: /afs/group/DB/points/s:writer
28
+ # alpha: 0.4
29
+ #
30
+ # # experiments/my_condition/config.yaml
31
+ # experiment:
32
+ # development:
33
+ # alpha: 0.5
34
+ # compute:
35
+ # alpha: 0.6
36
+ #
37
+ # # And you run the experiment with
38
+ # $ experiment console my_condition --env compute -o "master_dir: /Users/kubowo/Desktop/points-vals/aaa/s:writer"
39
+ #
40
+ # # Then your final config will look like this:
41
+ # > Experiment::Config.to_hash
42
+ # => { :ref_dir => "/afs/group/DB/points",
43
+ # :master_dir => "/Users/kubowo/Desktop/points-vals/s:writer",
44
+ # :alpha => 0.6 }
45
+ # > Experiment::Config[:master_dir]
46
+ # => "/Users/kubowo/Desktop/points-vals/s:writer"
47
+ # > Experiment::Config::get :master_dir, :writer => 145
48
+ # => "/Users/kubowo/Desktop/points-vals/s145"
49
+ # @see https://github.com/gampleman/Experiment/wiki/Configuration
4
50
  class Config
5
51
  class << self
6
52
 
@@ -19,24 +65,44 @@ module Experiment
19
65
  @config.merge! parse(options)
20
66
  end
21
67
 
22
- # loads the main config file
68
+ # loads the main config file based on the environment
23
69
  def init(env = :development)
24
70
  conf = YAML::load_file("./config/config.yaml")
25
71
  @config = conf["environments"][env.to_s]
26
72
  end
27
73
 
74
+ # @group Accessing configuration
28
75
 
29
- # Allows access to any config option by key (either String or Symbol)
30
- def [](v)
31
- @config[v.to_s]
76
+ # Allows access to any config option by key
77
+ # @example
78
+ # Config[:decay] # looks up decay in hierarchy of config files
79
+ # @param [#to_s] key to llok up in config
80
+ def [](key)
81
+ @used ||= []
82
+ @used << key.to_s
83
+ @config[key.to_s]
32
84
  end
33
85
 
34
86
  # Allows access to any config option by key. Supports Interploations.
35
87
  # Interpolations are supported as opts argument
36
- # words preceded with a colon (:) are interpolated
37
- # Otionaly second argument may be a default value to use if option
38
- # not present.
88
+ #
89
+ # Words preceded with a colon (:) are interpolated
90
+ # @overload def get(key)
91
+ # Same as {[]}.
92
+ # @overload def get(key, default)
93
+ # Returns default if key not found in configuration.
94
+ # @overload def get(key, default=nil, interpolations)
95
+ # Interpolates values preceded by semicolon.
96
+ # Otionaly second argument may be a default value to use if option
97
+ # not present.
98
+ # @param [Hash] interpolations key will be replaced by value.
99
+ # @example
100
+ # Config.get :existing #=> "hello :what"
101
+ # Config.get :non_existent, "hello" #=> "hello"
102
+ # Config.get :exisitng, :what => "world" #=> "hello world"
39
103
  def get(v, *opts)
104
+ @used ||= []
105
+ @used << v.to_s
40
106
  default = opts.shift if opts.length == 2 || !opts.first.is_a?(Hash)
41
107
  out = @config[v.to_s] || default
42
108
  if opts = opts.first
@@ -48,21 +114,84 @@ module Experiment
48
114
  end
49
115
  end
50
116
 
117
+ # @endgroup
118
+
119
+ # Mainly for use on the console for development.
120
+ #
121
+ # Usage in experiments may result in a warning, since it may
122
+ # invalidate results.
51
123
  def set(opts)
124
+ @used ||= []
125
+ opts.keys.each {|key| puts "Warning: Overwriting '#{key}' that was already used in an experiment" if @used.include? key }
52
126
  @config ||= opts
53
- @config.merge opts
127
+ @config.merge! opts
54
128
  end
55
129
 
56
130
  # parses a string as passed into the CLI -o option
131
+ # @param [String] options should be in the form of key:value separated by
132
+ # commas
57
133
  def parse(options)
58
134
  return {} if options == ""
59
- Hash[options.split(/\, ?/).map{|a| a.split /\: ?/ }]
135
+ Hash[options.split(/\, ?/).map do |a|
136
+ a = a.split /\: ?/
137
+ case a.last
138
+ when /^\d+$/
139
+ a[1] = a[1].to_i
140
+ when /^\d+\.\d+$/
141
+ a[1] = a[1].to_f
142
+ end
143
+ a
144
+ end]
60
145
  end
61
146
 
62
- # returns current options as a Hash object
147
+ # returns current options that were already accessed
148
+ # @return [Hash]
63
149
  def to_h
64
- @config
150
+ @used ||= []
151
+ Hash[*@config.select{|k,v| @used.include? k }.flatten]
152
+ end
153
+
154
+ # returns all Config values currently loaded
155
+ # @return [Hash]
156
+ def to_hash
157
+ @used = @config.keys
158
+ @config
159
+ end
160
+
161
+ # Reads all the keys in config/config.yaml and provides
162
+ # optparse blocks for them.
163
+ # @private
164
+ # @param [OptParse] o Optparse instance to define options on.
165
+ # @param [OStruct] options The Options instance where to save parsed
166
+ # config and get reserved names from.
167
+ # @return [Boolean] Returns true if some parses were set.
168
+ def parsing_for_options(o, options)
169
+ return unless File.exists? "./config/config.yaml"
170
+ conf = YAML::load_file("./config/config.yaml")
171
+ num = 0
172
+ conf["environments"].each do |env, keys|
173
+ (keys || []).each do |key, value|
174
+ next if options.marshal_dump.keys.include? key.to_sym
175
+ #puts env.inspect, key.inspect, value.inspect
176
+ num += 1
177
+ cl = value.class == Fixnum ? Integer : value.class;
178
+ o.on("--#{key} VALUE", cl, "Default value #{value.inspect}") do |v|
179
+ if options.opts == ""
180
+ options.opts = "#{key}: #{v}"
181
+ else
182
+ options.opts += ", #{key}: #{v}"
183
+ end
184
+ end
185
+ end
186
+ end
187
+ num > 0
188
+ end
189
+
190
+ # @return [String]
191
+ def inspect
192
+ "Experiment::Config \"" + @config.to_a.map {|k,v| "#{k}: #{v}"}.join(", ") + '"'
65
193
  end
194
+
66
195
  end
67
196
 
68
197
  end
@@ -1,19 +1,62 @@
1
1
  module Experiment
2
+ # this module is included in Experiment::Base
3
+ # It incorporates most of the logic required for distributed
4
+ # computing support.
5
+ # @see https://github.com/gampleman/Experiment/wiki/Distributed-Mode
6
+ # @private
2
7
  module Distributed
8
+
9
+
10
+ # @group Called on slave
11
+
12
+ # master server DRb object
3
13
  attr_accessor :master
14
+
15
+ # Main function. Will continously request work from the server,
16
+ # execute it and send back results, then loops to the beggining.
17
+ def slave_run!
18
+ while work = @master.get_work
19
+ puts work.inspect
20
+ Experiment::Config.set work[:options]
21
+ @current_cv = work[:cv]
22
+
23
+ @dir = work[:dir]
24
+ #@data = work[:input]
25
+ File.open(@dir + "/raw-#{@current_cv}.txt", "w") do |output|
26
+ @ouptut_file = output
27
+ run_the_experiment
28
+ end
29
+ result = analyze_result!(@dir + "/raw-#{@current_cv}.txt", @dir + "/analyzed-#{@current_cv}.txt")
30
+ write_performance!
31
+ @master.submit_result @current_cv, result, @abm.first
32
+ end
33
+
34
+ end
35
+
36
+
37
+ # @endgroup
38
+
39
+ # @group Called on master
40
+
41
+ # Send work from the master server
42
+ # @return [Hash, false] either a spec what work to carry out or false
43
+ # when no work available
4
44
  def get_work()
5
45
  if cv = @started.index(false)
6
46
  @started[cv] = true
7
- {:cv => cv, :input => @data[cv], :dir => @dir, :options => Experiment::Config.to_h }
47
+ {:cv => cv, :input => @data[cv], :dir => @dir, :options => Experiment::Config.to_hash }
8
48
  else
9
49
  false
10
50
  end
11
51
  end
12
52
 
53
+ # returns true if all work has been disseminated
13
54
  def distribution_done?
14
55
  @started.all?
15
56
  end
16
57
 
58
+ # Sends the result of the computation back to the master server.
59
+ # Called on the master server object.
17
60
  def submit_result(cv, result, performance)
18
61
  @completed[cv] = true
19
62
  array_merge(@results, result)
@@ -23,25 +66,8 @@ module Distributed
23
66
  end
24
67
 
25
68
 
26
- def slave_run!
27
- while work = @master.get_work
28
- puts work.inspect
29
- Experiment::Config.set work[:options]
30
- @current_cv = work[:cv]
31
-
32
- @dir = work[:dir]
33
- File.open(@dir + "/raw-#{@current_cv}.txt", "w") do |output|
34
- @ouptut_file = output
35
- run_the_experiment(work[:input], output)
36
- end
37
- result = analyze_result!(@dir + "/raw-#{@current_cv}.txt", @dir + "/analyzed-#{@current_cv}.txt")
38
- write_performance!
39
- @master.submit_result @current_cv, result, @abm.first
40
- end
41
-
42
- end
43
-
44
69
 
70
+ # Strats up the master server
45
71
  def master_run!(cv)
46
72
 
47
73
  @cvs = cv || 1
@@ -49,19 +75,24 @@ module Distributed
49
75
  Notify.started @experiment
50
76
  split_up_data
51
77
  write_dir!
52
- specification!
53
78
  @completed = (1..@cvs).map {|a| false }
54
79
  @started = @completed.dup
55
80
  end
56
81
 
82
+ # Cleans up the master server after all work is done
57
83
  def master_done!
58
84
  @done = true
85
+ specification! true
59
86
  summarize_performance!
60
87
  summarize_results! @results
88
+ cleanup!
61
89
  Notify.completed @experiment
62
90
 
63
91
  #sleep 1
64
92
  #DRb.stop_service
65
93
  end
94
+
95
+ # @endgroup
96
+
66
97
  end
67
98
  end
@@ -0,0 +1,227 @@
1
+ require "CSV"
2
+ require File.dirname(__FILE__) + "/params"
3
+
4
+ module Experiment
5
+ class Factorial < Base
6
+
7
+
8
+ class << self # Class Methods
9
+ # Specify a parameter that will be used as a factor in the experiment
10
+ # @example
11
+ # param :decay_rate, [0.1, 0.3, 0.7]
12
+ # param :photons, [5, 10]
13
+ # # runs these 6 experiments:
14
+ # # | decay_rate | photons
15
+ # # | 0.1 | 5
16
+ # # | 0.1 | 10
17
+ # # | 0.3 | 5
18
+ # # | 0.3 | 10
19
+ # # | 0.7 | 5
20
+ # # | 0.7 | 10
21
+ # @example Contrived example of block usage
22
+ # param :user_iq do
23
+ # mean = gets "How much is 1 + 1?"
24
+ # if mean == '2'
25
+ # (100..160).to_a
26
+ # else
27
+ # (20..30).to_a
28
+ # end
29
+ # end
30
+ # @see Params
31
+ def param(name, value = nil, &block)
32
+ @@params ||= {}
33
+ if block_given?
34
+ @@params[name] = block.call
35
+ else
36
+ @@params[name] = value
37
+ end
38
+ end
39
+
40
+ alias_method :independent_variable, :param
41
+ end
42
+
43
+ attr_accessor :parent_dir
44
+
45
+ def initialize(*args)
46
+ super(*args)
47
+ @params ||= {}
48
+ end
49
+
50
+
51
+ # runs the whole experiment
52
+ def normal_run!(cv)
53
+ @cvs = cv || 1
54
+ @results = {}
55
+ puts "Running #{@experiment} with #{param_grid.length} experiments at #{cv} cross validations each..."
56
+ #experiments = Notify.total / cv
57
+ #Notify.total = (experiments - 1) * cv + cv * param_grid.length
58
+ #
59
+ Notify::init param_grid.length * @options.cv, STDOUT, Experiment::Config::get(:growl_notifications, false)
60
+ split_up_data
61
+ write_dir!
62
+ param_grid.each do |paramset|
63
+ Params.set paramset
64
+ results = {}
65
+ Notify.started @experiment + ' ' + param_string(paramset, ", ")
66
+ @cvs.times do |cv_num|
67
+ @bm = []
68
+ @current_cv = cv_num
69
+ File.open(@dir + "/raw-#{param_string(paramset)}-#{cv_num}.txt", "w") do |output|
70
+ @ouptut_file = output
71
+ run_the_experiment(@data[cv_num], output)
72
+ end
73
+ array_merge results, analyze_result!(@dir + "/raw-#{param_string(paramset)}-#{cv_num}.txt", @dir + "/analyzed-#{param_string(paramset)}-#{cv_num}.txt")
74
+ write_performance!
75
+
76
+ Notify.cv_done @experiment + ' ' + param_string(paramset, ", "), cv_num
77
+ #Notify.inc step
78
+
79
+ end
80
+ #print '.'
81
+ Notify.completed @experiment + ' ' + param_string(paramset, ", ")
82
+
83
+ @results[paramset] = results
84
+ end
85
+ Notify::done
86
+ specification!
87
+ summarize_performance!
88
+ summarize_results! @results
89
+ cleanup!
90
+
91
+ puts File.read(@dir + "/summary.mmd") if @options.summary
92
+ end
93
+
94
+
95
+
96
+
97
+ protected
98
+
99
+ def param_grid
100
+ keys, vals = @@params.keys, @@params.values
101
+ start = vals.shift
102
+ @@params = {}
103
+ @grid ||= start.product(*vals).map do |ar|
104
+ Hash[*keys.zip(ar).flatten]
105
+ end
106
+ end
107
+
108
+
109
+ # creates a summary of the results and writes to 'all.csv'
110
+ def summarize_results!(all_results)
111
+ summaries = {}
112
+ all_results.each do |paramset, results|
113
+ File.open(@dir + "/results-#{param_string(paramset)}.yaml", 'w' ) do |out|
114
+ YAML.dump(results, out)
115
+ end
116
+ summaries[paramset] = {}
117
+ # create an array of arrays
118
+ res = results.keys.map do |key|
119
+ # calculate stats
120
+ a = results[key]
121
+ if a.all? {|el| el.is_a? Numeric }
122
+ summaries[paramset]["#{key} mean"] = Stats::mean(a)
123
+ summaries[paramset]["#{key} SD"] = Stats::standard_deviation(a)
124
+ [key] + a + [Stats::mean(a), Stats::standard_deviation(a)]
125
+ else
126
+ [key] + a + ["--", "--"]
127
+ end
128
+ end
129
+
130
+ ls = results.keys.map{|v| [7, v.to_s.length].max }
131
+
132
+ ls = ["Std Deviation".length] + ls
133
+ res = header_column + res
134
+ res = res.transpose
135
+ out = build_table res, ls
136
+ File.open(@dir + "/#{paramset}-summary.mmd", 'w') do |f|
137
+ f << "## Results for #{@experiment} with parametres #{param_string(paramset, ", ")} ##\n\n"
138
+ f << out
139
+ end
140
+ end
141
+
142
+ # Build CSV file with all of the results
143
+ #puts summaries.inspect
144
+
145
+ summaries = summaries.to_a
146
+ #puts summaries.inspect
147
+ keys1 = summaries.first.first.keys
148
+ keys2 = summaries.first.last.keys
149
+ #puts keys1.inspect, keys2.inspect, "====="
150
+ CSV.open(@dir + "/results.csv", "w") do |csv|
151
+ csv << keys1 + keys2
152
+ summaries.each do |summary|
153
+ #puts summary.first.inspect
154
+ #puts summary.first.values_at(*keys1).inspect + summary.last.values_at(*keys2).inspect
155
+ csv << summary.first.values_at(*keys1) + summary.last.values_at(*keys2)
156
+
157
+ end
158
+ end
159
+
160
+ end
161
+
162
+ # Writes a yaml specification of all the options used to run the experiment
163
+ def specification!
164
+ File.open(@dir + '/specification.yaml', 'w' ) do |out|
165
+ YAML.dump({:name => @experiment, :date => Time.now, :configuration => Experiment::Config.to_h, :cross_validations => @cvs, :params => @@params}, out )
166
+ end
167
+ end
168
+
169
+
170
+ def param_string(par, split = ",")
171
+ out = []
172
+ par.each do |k,v|
173
+ out << "#{k}=#{v}"
174
+ end
175
+ out.join split
176
+ end
177
+
178
+ # This module is a basis for the distributed implementation
179
+ # it is a WiP
180
+ module DistributedFactorial
181
+ def master_sub_experiments(cv)
182
+ write_dir!
183
+ param_grid.map do |paramset|
184
+ if @options.opts == ""
185
+ @options.opts = paramset.map {|k,v| "#{k}:#{v}"}.join(",")
186
+ else
187
+ @options.opts += "," + paramset.map {|k,v| "#{k}:#{v}"}.join(",")
188
+ end
189
+ child = self.class.new :master, @experiment, @options
190
+ child.parent_dir = @dir
191
+ child.master_run! cv
192
+ child
193
+ end
194
+ end
195
+
196
+
197
+
198
+ # Strats up the master server
199
+ def master_run!(cv)
200
+ @dir = "#{parent_dir}/#{@options.opts}"
201
+ Dir.mkdir @dir
202
+ @cvs = cv || 1
203
+ @results = {}
204
+ #Notify.started @experiment
205
+ split_up_data
206
+ #write_dir!
207
+ exps = param_grid.product((1..@cvs).to_a)
208
+ @completed = Hash[*exps.map {|a| [a, false] }.flatten]
209
+ @started = @completed.dup
210
+ end
211
+
212
+ # Cleans up the master server after all work is done
213
+ def master_done!
214
+ @done = true
215
+ specification! true
216
+ summarize_performance!
217
+ summarize_results! @results
218
+ cleanup!
219
+ #Notify.completed @experiment
220
+
221
+ #sleep 1
222
+ #DRb.stop_service
223
+ end
224
+ end
225
+
226
+ end
227
+ end
@@ -1,11 +1,14 @@
1
1
  class MyExperiment < Experiment::Base
2
2
 
3
- def test_data
3
+ # uncomment to get rid of "raw" files:
4
+ # after_completion :delete_raw_files
5
+
6
+ def data_set
4
7
  # TODO: Specify an array of all the test data.
5
8
  # It will be split up automatically for you accross Cross-validations
6
9
  end
7
10
 
8
- def run_the_experiment(data, output)
11
+ def run_the_experiment
9
12
  # TODO: Define how you will run the experiment
10
13
  # Remeber, each seperate experiment inherits from this base class and includes
11
14
  # it's own files, so this should be a rather generic implementation
@@ -13,23 +16,20 @@ class MyExperiment < Experiment::Base
13
16
  # 1. prepare any nessecary setup like I/O lists, etc...
14
17
 
15
18
  # 2. do the experiment
16
- benchmark do
17
- output << # run your code here
19
+ measure "(optional) label" do
20
+ # run your code here
18
21
  end
19
22
 
20
23
  # 3. clean up
21
24
 
22
25
  end
23
26
 
24
- def analyze_result!(input, output)
27
+ # You might want to process your data
28
+ # def analyze_result!(input, output)
25
29
  # TODO perform an analysis of what your program did
26
30
 
27
31
  # remember to return a hash of meaningful data, best of all a summary
28
- end
29
-
30
- # you might want to override this method as well:
31
- # def summarize_results!(results)
32
- # super(results)
33
32
  # end
33
+
34
34
 
35
35
  end
@@ -10,12 +10,12 @@ doc
10
10
  experiments
11
11
  - This directory includes all experiments that were coded. They generally `require`
12
12
  files from the reference implementation and add modifications of there own.
13
- Each is explained in its `about.md` file.
13
+ Each is explained in it's RDoc header.
14
14
  report
15
15
  - Source files used to create the report (multi-markdown format, see http://fletcherpenney.net/multimarkdown).
16
16
  results
17
17
  - Has all the measurements from individual experiments. Naming convention:
18
- {name}-{classes}-cv{number of cross validations}-{shortened timestamp}.
18
+ {name}-cv{number of cross validations}-{shortened timestamp}.
19
19
  test
20
20
  - Unit tests.
21
21
  tmp