RubyGems - experiment - Versions diffs - 0.2.0 → 0.3.0 - Mend

experiment 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/Manifest.txt +5 -3
data/{README.rdoc → README.md} +26 -57
data/Rakefile +3 -1
data/bin/experiment +28 -9
data/lib/experiment/base.rb +165 -65
data/lib/experiment/config.rb +140 -11
data/lib/experiment/distributed.rb +51 -20
data/lib/experiment/factorial.rb +227 -0
data/lib/experiment/generator/{experiment_template.rb → experiment_template.rb.txt} +10 -10
data/lib/experiment/generator/readme_template.txt +2 -2
data/lib/experiment/notify.rb +150 -146
data/lib/experiment/params.rb +18 -0
data/lib/experiment/runner.rb +50 -8
data/lib/experiment/stats/descriptive.rb +58 -0
data/lib/experiment/work_server.rb +20 -5
data/lib/experiment.rb +1 -1
data/test/test_stats.rb +9 -3
metadata +12 -9
data/lib/experiment/stats.rb +0 -43

data/lib/experiment/config.rb CHANGED Viewed

@@ -1,6 +1,52 @@
 require "yaml"
 module Experiment
+  # You have a config directory containing a config.yaml file. This file contains
+  # several environments. The idea is that you might want to tweak your options
+  # differently when running on your laptop then when running on a university
+  # supercomputer.
+  #
+  # development is the default environment, you can set any other with the --env option.
+  #
+  # Experimental conditions also get their own config.yaml file. This
+  # file overrides the main config file so you can introduce in condition
+  # specific options.
+  #
+  # And finally when running an experiment you can use the -o or --options
+  # option to override any config you want.
+  #
+  # @example With the yamls like this:
+  #   # config/config.yaml
+  #   environments:
+  #     development:
+  #       ref_dir: /Users/kubowo/Desktop/points-vals
+  #       master_dir: /Users/kubowo/Desktop/points-vals/s:writer
+  #       alpha: 0.4
+  #     compute:
+  #       ref_dir: /afs/group/DB/points
+  #       master_dir: /afs/group/DB/points/s:writer
+  #       alpha: 0.4
+  #
+  #   # experiments/my_condition/config.yaml
+  #   experiment:
+  #     development:
+  #       alpha: 0.5
+  #     compute:
+  #       alpha: 0.6
+  #
+  #   # And you run the experiment with
+  #   $ experiment console my_condition --env compute -o "master_dir: /Users/kubowo/Desktop/points-vals/aaa/s:writer"
+  #
+  #   # Then your final config will look like this:
+  #   > Experiment::Config.to_hash
+  #   => { :ref_dir => "/afs/group/DB/points",
+  #          :master_dir => "/Users/kubowo/Desktop/points-vals/s:writer",
+  #          :alpha => 0.6 }
+  #   > Experiment::Config[:master_dir]
+  #   => "/Users/kubowo/Desktop/points-vals/s:writer"
+  #   > Experiment::Config::get :master_dir, :writer => 145
+  #   => "/Users/kubowo/Desktop/points-vals/s145"
+  # @see https://github.com/gampleman/Experiment/wiki/Configuration
 	class Config
 	  class << self
@@ -19,24 +65,44 @@ module Experiment
   			@config.merge! parse(options)
   		end
-      # loads the main config file
+      # loads the main config file based on the environment
   		def init(env = :development)
   		  conf = YAML::load_file("./config/config.yaml")
   		  @config = conf["environments"][env.to_s]
   		end
+      # @group Accessing configuration
-      # Allows access to any config option by key (either String or Symbol)
-  		def [](v)
-  			@config[v.to_s]
+      # Allows access to any config option by key
+      # @example
+      #   Config[:decay] # looks up decay in hierarchy of config files
+      # @param [#to_s] key to llok up in config
+  		def [](key)
+        @used ||= []
+        @used << key.to_s
+  			@config[key.to_s]
   		end
   		# Allows access to any config option by key. Supports Interploations.
   		# Interpolations are supported as opts argument
-  		# words preceded with a colon (:) are interpolated
-  		# Otionaly second argument may be a default value to use if option
-  		# not present.
+  		#
+  		# Words preceded with a colon (:) are interpolated
+  		# @overload def get(key)
+  		#   Same as {[]}.
+  		# @overload def get(key, default)
+  		#   Returns default if key not found in configuration.
+  		# @overload def get(key, default=nil, interpolations)
+  		#   Interpolates values preceded by semicolon.
+  		#   Otionaly second argument may be a default value to use if option
+  		#   not present.
+  		#   @param [Hash] interpolations key will be replaced by value.
+  		# @example
+  		#   Config.get :existing                   #=> "hello :what"
+  		#   Config.get :non_existent, "hello"      #=> "hello"
+  		#   Config.get :exisitng, :what => "world" #=> "hello world"
   		def get(v, *opts)
+  		  @used ||= []
+        @used << v.to_s
   		  default = opts.shift if opts.length == 2 || !opts.first.is_a?(Hash)
   		  out = @config[v.to_s] || default
   		  if opts = opts.first
@@ -48,21 +114,84 @@ module Experiment
 			  end
 		  end
+		  # @endgroup
+		  # Mainly for use on the console for development.
+		  #
+		  # Usage in experiments may result in a warning, since it may
+		  # invalidate results.
 		  def set(opts)
+		    @used ||= []
+		    opts.keys.each {|key| puts "Warning: Overwriting '#{key}' that was already used in an experiment" if @used.include? key }
 		    @config ||= opts
-		    @config.merge opts
+		    @config.merge! opts
 	    end
 		  # parses a string as passed into the CLI -o option
+		  # @param [String] options should be in the form of key:value separated by
+		  #   commas
   		def parse(options)
   		  return {} if options == ""
-  		  Hash[options.split(/\, ?/).map{|a| a.split /\: ?/ }]
+  		  Hash[options.split(/\, ?/).map do |a|
+  		    a = a.split /\: ?/
+  		    case a.last
+  		    when /^\d+$/
+  		      a[1] = a[1].to_i
+		      when /^\d+\.\d+$/
+		        a[1] = a[1].to_f
+  		    end
+  		    a
+  		  end]
 		  end
-		  # returns current options as a Hash object
+		  # returns current options that were already accessed
+		  # @return [Hash]
 		  def to_h
-		    @config
+		    @used ||= []
+		    Hash[*@config.select{|k,v| @used.include? k }.flatten]
+	    end
+	    # returns all Config values currently loaded
+	    # @return [Hash]
+	    def to_hash
+	      @used = @config.keys
+	      @config
+      end
+	    # Reads all the keys in config/config.yaml and provides
+	    # optparse blocks for them.
+	    # @private
+	    # @param [OptParse] o Optparse instance to define options on.
+	    # @param [OStruct] options The Options instance where to save parsed
+	    #   config and get reserved names from.
+	    # @return [Boolean] Returns true if some parses were set.
+	    def parsing_for_options(o, options)
+	      return unless File.exists? "./config/config.yaml"
+	      conf = YAML::load_file("./config/config.yaml")
+        num = 0
+	      conf["environments"].each do |env, keys|
+	        (keys || []).each do |key, value|
+	          next if options.marshal_dump.keys.include? key.to_sym
+	          #puts env.inspect, key.inspect, value.inspect
+	          num += 1
+	          cl = value.class == Fixnum ? Integer : value.class;
+	          o.on("--#{key} VALUE", cl, "Default value #{value.inspect}") do |v|
+	            if options.opts == ""
+	              options.opts = "#{key}: #{v}"
+	            else
+	              options.opts += ", #{key}: #{v}"
+              end
+            end
+          end
+        end
+        num > 0
+	    end
+	    # @return [String]
+	    def inspect
+	      "Experiment::Config \"" + @config.to_a.map {|k,v| "#{k}: #{v}"}.join(", ") + '"'
 	    end
 	  end
 	end

data/lib/experiment/distributed.rb CHANGED Viewed

@@ -1,19 +1,62 @@
 module Experiment
+# this module is included in Experiment::Base
+# It incorporates most of the logic required for distributed
+# computing support.
+# @see https://github.com/gampleman/Experiment/wiki/Distributed-Mode
+# @private
 module Distributed
+  # @group Called on slave
+  # master server DRb object
   attr_accessor :master
+  # Main function. Will continously request work from the server,
+  # execute it and send back results, then loops to the beggining.
+  def slave_run!
+    while work = @master.get_work
+      puts work.inspect
+      Experiment::Config.set work[:options]
+      @current_cv = work[:cv]
+      @dir = work[:dir]
+      #@data = work[:input]
+      File.open(@dir + "/raw-#{@current_cv}.txt", "w") do |output|
+			  @ouptut_file = output
+			  run_the_experiment
+			end
+			result = analyze_result!(@dir + "/raw-#{@current_cv}.txt", @dir + "/analyzed-#{@current_cv}.txt")
+			write_performance!
+			@master.submit_result @current_cv, result, @abm.first
+    end
+  end
+  # @endgroup
+  # @group Called on master
+  # Send work from the master server
+  # @return [Hash, false] either a spec what work to carry out or false
+  #   when no work available
   def get_work()
 	  if cv = @started.index(false)
 	    @started[cv] = true
-	    {:cv => cv, :input => @data[cv], :dir => @dir, :options => Experiment::Config.to_h }
+	    {:cv => cv, :input => @data[cv], :dir => @dir, :options => Experiment::Config.to_hash }
 	  else
 	    false
     end
   end
+  # returns true if all work has been disseminated
   def distribution_done?
     @started.all?
   end
+  # Sends the result of the computation back to the master server.
+  # Called on the master server object.
   def submit_result(cv, result, performance)
     @completed[cv] = true
     array_merge(@results, result)
@@ -23,25 +66,8 @@ module Distributed
   end
-  def slave_run!
-    while work = @master.get_work
-      puts work.inspect
-      Experiment::Config.set work[:options]
-      @current_cv = work[:cv]
-      @dir = work[:dir]
-      File.open(@dir + "/raw-#{@current_cv}.txt", "w") do |output|
-			  @ouptut_file = output
-			  run_the_experiment(work[:input], output)
-			end
-			result = analyze_result!(@dir + "/raw-#{@current_cv}.txt", @dir + "/analyzed-#{@current_cv}.txt")
-			write_performance!
-			@master.submit_result @current_cv, result, @abm.first
-    end
-  end
+  # Strats up the master server
   def master_run!(cv)
     @cvs = cv || 1
@@ -49,19 +75,24 @@ module Distributed
 		Notify.started @experiment
     split_up_data
 		write_dir!
-		specification!
 		@completed = (1..@cvs).map {|a| false }
 		@started = @completed.dup
   end
+  # Cleans up the master server after all work is done
   def master_done!
     @done = true
+    specification! true
     summarize_performance!
 		summarize_results! @results
+		cleanup!
 		Notify.completed @experiment
 		#sleep 1
     #DRb.stop_service
   end
+  # @endgroup
 end
 end

data/lib/experiment/factorial.rb ADDED Viewed

@@ -0,0 +1,227 @@
+require "CSV"
+require File.dirname(__FILE__) + "/params"
+module Experiment
+  class Factorial < Base
+    class << self # Class Methods
+  	  # Specify a parameter that will be used as a factor in the experiment
+    	# @example
+    	#   param :decay_rate, [0.1, 0.3, 0.7]
+    	#   param :photons, [5, 10]
+    	#   # runs these 6 experiments:
+    	#   # | decay_rate | photons
+    	#   # |        0.1 |   5
+    	#   # |        0.1 |  10
+    	#   # |        0.3 |   5
+    	#   # |        0.3 |  10
+    	#   # |        0.7 |   5
+    	#   # |        0.7 |  10
+    	# @example Contrived example of block usage
+    	#   param :user_iq do
+    	#     mean = gets "How much is 1 + 1?"
+    	#     if mean == '2'
+    	#       (100..160).to_a
+    	#     else
+    	#       (20..30).to_a
+    	#     end
+    	#   end
+    	# @see Params
+    	def param(name, value = nil, &block)
+    	  @@params ||= {}
+    	  if block_given?
+    	    @@params[name] = block.call
+  	    else
+  	      @@params[name] = value
+        end
+  	  end
+  	  alias_method :independent_variable, :param
+  	end
+    attr_accessor :parent_dir
+    def initialize(*args)
+      super(*args)
+      @params ||= {}
+    end
+    # runs the whole experiment
+  	def normal_run!(cv)
+  		@cvs = cv || 1
+      @results = {}
+      puts "Running #{@experiment} with #{param_grid.length} experiments at #{cv} cross validations each..."
+  		#experiments = Notify.total / cv
+  		#Notify.total = (experiments - 1) * cv + cv * param_grid.length
+  		#
+      Notify::init param_grid.length * @options.cv, STDOUT, Experiment::Config::get(:growl_notifications, false)
+      split_up_data
+  		write_dir!
+      param_grid.each do |paramset|
+        Params.set paramset
+        results = {}
+        Notify.started @experiment + ' ' + param_string(paramset, ", ")
+        @cvs.times do |cv_num|
+    			@bm = []
+    			@current_cv = cv_num
+    			File.open(@dir + "/raw-#{param_string(paramset)}-#{cv_num}.txt", "w") do |output|
+    			  @ouptut_file = output
+    			    run_the_experiment(@data[cv_num], output)
+    			end
+    			array_merge results, analyze_result!(@dir + "/raw-#{param_string(paramset)}-#{cv_num}.txt", @dir + "/analyzed-#{param_string(paramset)}-#{cv_num}.txt")
+    			write_performance!
+    			Notify.cv_done @experiment + ' ' + param_string(paramset, ", "), cv_num
+    			#Notify.inc step
+    		end
+    		#print '.'
+    		Notify.completed @experiment + ' ' + param_string(paramset, ", ")
+    		@results[paramset] = results
+      end
+  		Notify::done
+  		specification!
+  		summarize_performance!
+  		summarize_results! @results
+  		cleanup!
+  		puts File.read(@dir + "/summary.mmd") if @options.summary
+  	end
+	  protected
+	  def param_grid
+	    keys, vals = @@params.keys, @@params.values
+	    start = vals.shift
+	    @@params = {}
+	    @grid ||= start.product(*vals).map do |ar|
+	      Hash[*keys.zip(ar).flatten]
+      end
+	  end
+	  # creates a summary of the results and writes to 'all.csv'
+  	def summarize_results!(all_results)
+  	  summaries = {}
+  	  all_results.each do |paramset, results|
+  	    File.open(@dir + "/results-#{param_string(paramset)}.yaml", 'w' ) do |out|
+    			YAML.dump(results, out)
+    		end
+    		summaries[paramset] = {}
+    		# create an array of arrays
+    		res = results.keys.map do |key|
+    		  # calculate stats
+    		  a = results[key]
+    		  if a.all? {|el| el.is_a? Numeric }
+    		    summaries[paramset]["#{key} mean"] = Stats::mean(a)
+    		    summaries[paramset]["#{key} SD"] = Stats::standard_deviation(a)
+    		    [key] + a + [Stats::mean(a), Stats::standard_deviation(a)]
+  		    else
+  		      [key] + a + ["--", "--"]
+  	      end
+  		  end
+  		  ls = results.keys.map{|v| [7, v.to_s.length].max }
+    		ls = ["Std Deviation".length] + ls
+    		res = header_column + res
+    		res = res.transpose
+    		out = build_table res, ls
+    		File.open(@dir + "/#{paramset}-summary.mmd", 'w') do |f|
+    		  f << "## Results for #{@experiment} with parametres #{param_string(paramset, ", ")} ##\n\n"
+    		  f << out
+  		  end
+		  end
+		  # Build CSV file with all of the results
+		  #puts summaries.inspect
+		  summaries = summaries.to_a
+      #puts summaries.inspect
+		  keys1 = summaries.first.first.keys
+		  keys2 = summaries.first.last.keys
+		  #puts keys1.inspect, keys2.inspect, "====="
+      CSV.open(@dir + "/results.csv", "w") do |csv|
+  	    csv << keys1 + keys2
+  	    summaries.each do |summary|
+  	      #puts summary.first.inspect
+  	      #puts summary.first.values_at(*keys1).inspect + summary.last.values_at(*keys2).inspect
+  	      csv << summary.first.values_at(*keys1) + summary.last.values_at(*keys2)
+  	    end
+  	  end
+	  end
+	  # Writes a yaml specification of all the options used to run the experiment
+  	def specification!
+  		File.open(@dir + '/specification.yaml', 'w' ) do |out|
+  			YAML.dump({:name => @experiment, :date => Time.now, :configuration => Experiment::Config.to_h, :cross_validations => @cvs, :params => @@params}, out )
+  		end
+  	end
+    def param_string(par, split = ",")
+      out = []
+      par.each do |k,v|
+        out << "#{k}=#{v}"
+      end
+      out.join split
+    end
+    # This module is a basis for the distributed implementation
+    # it is a WiP
+    module DistributedFactorial
+      def master_sub_experiments(cv)
+        write_dir!
+        param_grid.map do |paramset|
+          if @options.opts == ""
+            @options.opts = paramset.map {|k,v| "#{k}:#{v}"}.join(",")
+          else
+            @options.opts += "," + paramset.map {|k,v| "#{k}:#{v}"}.join(",")
+          end
+          child = self.class.new :master, @experiment, @options
+          child.parent_dir = @dir
+          child.master_run! cv
+          child
+        end
+      end
+      # Strats up the master server
+      def master_run!(cv)
+        @dir = "#{parent_dir}/#{@options.opts}"
+    		Dir.mkdir @dir
+        @cvs = cv || 1
+        @results = {}
+    		#Notify.started @experiment
+        split_up_data
+    		#write_dir!
+    		exps = param_grid.product((1..@cvs).to_a)
+    		@completed = Hash[*exps.map {|a| [a, false] }.flatten]
+    		@started = @completed.dup
+      end
+      # Cleans up the master server after all work is done
+      def master_done!
+        @done = true
+        specification! true
+        summarize_performance!
+    		summarize_results! @results
+    		cleanup!
+    		#Notify.completed @experiment
+    		#sleep 1
+        #DRb.stop_service
+      end
+    end
+  end
+end

data/lib/experiment/generator/{experiment_template.rb → experiment_template.rb.txt} RENAMED Viewed

@@ -1,11 +1,14 @@
 class MyExperiment < Experiment::Base
-  def test_data
+  # uncomment to get rid of "raw" files:
+  # after_completion :delete_raw_files
+  def data_set
     # TODO: Specify an array of all the test data.
     # It will be split up automatically for you accross Cross-validations
   end
-  def run_the_experiment(data, output)
+  def run_the_experiment
     # TODO: Define how you will run the experiment
     # Remeber, each seperate experiment inherits from this base class and includes
     # it's own files, so this should be a rather generic implementation
@@ -13,23 +16,20 @@ class MyExperiment < Experiment::Base
     # 1. prepare any nessecary setup like I/O lists, etc...
     # 2. do the experiment
-    benchmark do
-      output << # run your code here
+    measure "(optional) label" do
+      # run your code here
     end
     # 3. clean up
   end
-  def analyze_result!(input, output)
+  # You might want to process your data
+  # def analyze_result!(input, output)
     # TODO perform an analysis of what your program did
     # remember to return a hash of meaningful data, best of all a summary
-  end
-  # you might want to override this method as well:
-  # def summarize_results!(results)
-  #   super(results)
   # end
 end

data/lib/experiment/generator/readme_template.txt CHANGED Viewed

@@ -10,12 +10,12 @@ doc
 experiments
  - This directory includes all experiments that were coded. They generally `require`
    files from the reference implementation and add modifications of there own.
-   Each is explained in its `about.md` file.
+   Each is explained in it's RDoc header.
 report
  - Source files used to create the report (multi-markdown format, see http://fletcherpenney.net/multimarkdown).
 results
  - Has all the measurements from individual experiments. Naming convention:
-   {name}-{classes}-cv{number of cross validations}-{shortened timestamp}.
+   {name}-cv{number of cross validations}-{shortened timestamp}.
 test
  - Unit tests.
 tmp