RubyGems - finite_mdp - Versions diffs - 0.0.1 → 0.1.1 - Mend

finite_mdp 0.0.1 → 0.1.1

Files changed (6) hide show

data/README.rdoc +4 -1
data/lib/finite_mdp/model.rb +48 -5
data/lib/finite_mdp/solver.rb +52 -5
data/lib/finite_mdp/version.rb +4 -1
data/test/{finite_mdp_test.rb → finite_mdp/finite_mdp_test.rb} +37 -2
metadata +59 -58

data/README.rdoc CHANGED Viewed

@@ -2,6 +2,8 @@
 * https://github.com/jdleesmiller/finite_mdp
+{<img src="https://secure.travis-ci.org/jdleesmiller/finite_mdp.png"/>}[http://travis-ci.org/jdleesmiller/finite_mdp]
 == SYNOPSIS
 Solve small, finite Markov Decision Process (MDP) models.
@@ -159,7 +161,7 @@ absorbing state with zero reward, called :stop.
      [-0.04, -0.04, -0.04, -0.04]],
      [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
-  # sanity check: probabilities in a row must sum to 1
+  # sanity check: successor state probabilities must sum to 1
   model.check_transition_probabilities_sum
   solver = FiniteMDP::Solver.new(model, 1) # discount factor 1
@@ -197,6 +199,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
 Tested on
 * ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
 * ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
+* ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
 == INSTALLATION

data/lib/finite_mdp/model.rb CHANGED Viewed

@@ -172,6 +172,24 @@ module FiniteMDP::Model
     raise NotImplementedError
   end
+  #
+  # Sum of the transition probabilities for each (state, action) pair; the sums
+  # should be one in a valid model.
+  #
+  # @return [Hash<[State, Action], Float>]
+  #
+  def transition_probability_sums
+    prs = []
+    states.each do |state|
+      actions(state).each do |action|
+        pr = next_states(state, action).map{|next_state|
+          transition_probability(state, action, next_state)}.inject(:+)
+        prs << [[state, action], pr]
+      end
+    end
+    Hash[prs]
+  end
   #
   # Raise an error if the sum of the transition probabilities for any (state,
   # action) pair is not sufficiently close to 1.
@@ -181,15 +199,40 @@ module FiniteMDP::Model
   # @return [nil]
   #
   def check_transition_probabilities_sum tol=1e-6
+    transition_probability_sums.each do |(state, action), pr|
+      raise "transition probabilities for state #{state.inspect} and
+          action #{action.inspect} sum to #{pr}" if pr < 1 - tol
+    end
+    nil
+  end
+  #
+  # Set of states that have no transitions out.
+  #
+  # At present, this library can't solve a model with terminal states. However,
+  # you can add a dummy state (e.g. <tt>:stop</tt>) with zero reward that
+  # transitions back to itself with probability one.
+  #
+  # Note that if a state has transitions out, but all of them have probability
+  # zero, this method does not detect it as a terminal state. You can check for
+  # these using {#transition_probability_sums} instead.
+  #
+  # @return [Set]
+  #
+  def terminal_states
+    all_states = Set[]
+    out_states = Set[]
     states.each do |state|
+      all_states << state
+      any_out_transitions = false
       actions(state).each do |action|
-        pr = next_states(state, action).map{|next_state|
-          transition_probability(state, action, next_state)}.inject(:+)
-        raise "transition probabilities for state #{state.inspect} and
-          action #{action.inspect} sum to #{pr}" if pr < 1 - tol
+        ns = next_states(state, action)
+        all_states.merge ns
+        any_out_transitions ||= !ns.empty?
       end
+      out_states << state if any_out_transitions
     end
-    nil
+    all_states - out_states
   end
 end

data/lib/finite_mdp/solver.rb CHANGED Viewed

@@ -73,7 +73,7 @@ class FiniteMDP::Solver
   #
   # @return [Model] the model being solved; read only; do not change the model
-  # while it is being solved
+  #         while it is being solved
   #
   attr_reader :model
@@ -90,6 +90,27 @@ class FiniteMDP::Solver
     Hash[model.states.zip(@array_value)]
   end
+  #
+  # Current state-action value estimates; whereas {#value} returns $V(s)$, this
+  # returns $Q(s,a)$, in the usual notation.
+  #
+  # @return [Hash<[state, action], Float>]
+  #
+  def state_action_value
+    q = {}
+    states = model.states
+    @array_model.each_with_index do |actions, state_n|
+      state = states[state_n]
+      state_actions = model.actions(state)
+      actions.each_with_index do |next_state_ns, action_n|
+        q_sa = next_state_ns.map {|next_state_n, pr, r|
+          pr * (r + @discount * @array_value[next_state_n])}.inject(:+)
+        q[[state, state_actions[action_n]]] = q_sa
+      end
+    end
+    q
+  end
   #
   # Current estimate of the optimal action for each state.
   #
@@ -232,6 +253,13 @@ class FiniteMDP::Solver
   #
   # @return [Boolean] true iff iteration converged to within tolerance
   #
+  # @yield [num_iters, delta] at the end of each iteration
+  #
+  # @yieldparam [Integer] num_iters iterations done so far
+  #
+  # @yieldparam [Float] delta largest change in the value function in the last
+  #             iteration
+  #
   def value_iteration tolerance, max_iters=nil
     delta = Float::MAX
     num_iters = 0
@@ -240,7 +268,8 @@ class FiniteMDP::Solver
       num_iters += 1
       break if delta < tolerance
-      break if max_iters && num_iters > max_iters
+      break if max_iters && num_iters >= max_iters
+      yield num_iters, delta if block_given?
     end
     delta < tolerance
   end
@@ -263,6 +292,18 @@ class FiniteMDP::Solver
   #
   # @return [Boolean] true iff a stable policy was obtained
   #
+  # @yield [num_policy_iters, num_value_iters, delta] at the end of each
+  #        policy evaluation iteration
+  #
+  # @yieldparam [Integer] num_policy_iters policy improvement iterations done so
+  #             far
+  #
+  # @yieldparam [Integer] num_value_iters policy evaluation iterations done so
+  #             far for the current policy improvement iteration
+  #
+  # @yieldparam [Float] delta largest change in the value function in the last
+  #             policy evaluation iteration
+  #
   def policy_iteration value_tolerance, max_value_iters=nil,
     max_policy_iters=nil
@@ -276,14 +317,15 @@ class FiniteMDP::Solver
         num_value_iters += 1
         break if value_delta < value_tolerance
-        break if max_value_iters && num_value_iters > max_value_iters
+        break if max_value_iters && num_value_iters >= max_value_iters
+        yield num_policy_iters, num_value_iters, value_delta if block_given?
       end
       # policy improvement
       stable = improve_policy
       num_policy_iters += 1
       break if stable
-      break if max_policy_iters && num_policy_iters > max_policy_iters
+      break if max_policy_iters && num_policy_iters >= max_policy_iters
     end
     stable
   end
@@ -297,6 +339,10 @@ class FiniteMDP::Solver
   #
   # @return [Boolean] true iff a stable policy was obtained
   #
+  # @yield [num_iters] at the end of each iteration
+  #
+  # @yieldparam [Integer] num_iters policy improvement iterations done so far
+  #
   def policy_iteration_exact max_iters=nil
     stable = false
     num_iters = 0
@@ -305,7 +351,8 @@ class FiniteMDP::Solver
       stable = improve_policy
       num_iters += 1
       break if stable
-      break if max_iters && num_iters > max_iters
+      break if max_iters && num_iters >= max_iters
+      yield num_iters if block_given?
     end
     stable
   end

data/lib/finite_mdp/version.rb CHANGED Viewed

@@ -1,3 +1,6 @@
 module FiniteMDP
-  VERSION = '0.0.1'
+  VERSION_MAJOR = 0
+  VERSION_MINOR = 1
+  VERSION_PATCH = 1
+  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
 end

data/test/{finite_mdp_test.rb → finite_mdp/finite_mdp_test.rb} RENAMED Viewed

@@ -9,10 +9,16 @@ require 'set'
 class TestFiniteMDP < Test::Unit::TestCase
   include FiniteMDP
+  def assert_close expected, actual, tol=1e-6
+    assert (expected - actual).abs < tol,
+      "expected #{actual} to be within #{tol} of #{expected}"
+  end
   # check that we get the same model back; model parameters must be set before
   # calling; see test_recycling_robot
   def check_recycling_robot_model model, sparse
     model.check_transition_probabilities_sum
+    assert_equal Set[], model.terminal_states
     assert_equal Set[:high, :low],    Set[*model.states]
     assert_equal Set[:search, :wait], Set[*model.actions(:high)]
@@ -113,13 +119,31 @@ class TestFiniteMDP < Test::Unit::TestCase
     # try solving with policy iteration using iterative policy evaluation
     solver = Solver.new(table_model, 0.95, Hash.new {:wait})
-    assert solver.policy_iteration(1e-4, 2, 20), "did not find stable policy"
+    assert solver.policy_iteration(1e-4, 2, 50), "did not find stable policy"
     assert_equal({:high => :search, :low => :recharge}, solver.policy)
     # try solving with policy iteration using exact policy evaluation
-    solver = Solver.new(table_model, 0.95, Hash.new {:wait})
+    gamma = 0.95
+    solver = Solver.new(table_model, gamma, Hash.new {:wait})
     assert solver.policy_iteration_exact(20), "did not find stable policy"
     assert_equal({:high => :search, :low => :recharge}, solver.policy)
+    # check the corresponding state-action values (Q(s,a) values)
+    v = solver.value
+    q_high_search  =    @alpha  * (@r_search + gamma * v[:high]) +
+                     (1-@alpha) * (@r_search + gamma * v[:low])
+    q_high_wait    = @r_wait + gamma * v[:high]
+    q_low_search   = (1-@beta) * (@r_rescue + gamma * v[:high]) +
+                        @beta  * (@r_search + gamma * v[:low])
+    q_low_wait     = @r_wait + gamma * v[:low]
+    q_low_recharge = 0 + gamma * v[:high]
+    q = solver.state_action_value
+    assert_close q[[:high, :search]],  q_high_search
+    assert_close q[[:high, :wait]],    q_high_wait
+    assert_close q[[:low, :search]],   q_low_search
+    assert_close q[[:low, :wait]],     q_low_wait
+    assert_close q[[:low, :recharge]], q_low_recharge
   end
   #
@@ -239,6 +263,7 @@ class TestFiniteMDP < Test::Unit::TestCase
        [-0.04, -0.04, -0.04, -0.04]],
        [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
     model.check_transition_probabilities_sum
+    assert_equal Set[], model.terminal_states
     assert_equal Set[
       [0, 0], [0, 1], [0, 2], [0, 3],
@@ -278,6 +303,7 @@ class TestFiniteMDP < Test::Unit::TestCase
        [   r,   r,    r,   r]],
        [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
     model.check_transition_probabilities_sum
+    assert_equal Set[], model.terminal_states # no actual terminals
     check_grid_solutions model,
       ["> > >  ",
@@ -294,6 +320,7 @@ class TestFiniteMDP < Test::Unit::TestCase
        [   r,   r,    r,   r]],
        [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
     model.check_transition_probabilities_sum
+    assert_equal Set[], model.terminal_states # no actual terminals
     check_grid_solutions model,
       ["> > >  ",
@@ -310,6 +337,7 @@ class TestFiniteMDP < Test::Unit::TestCase
        [   r,   r,    r,   r]],
        [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
     model.check_transition_probabilities_sum
+    assert_equal Set[], model.terminal_states # no actual terminals
     check_grid_solutions model,
       ["> > >  ",
@@ -343,5 +371,12 @@ class TestFiniteMDP < Test::Unit::TestCase
     assert  p1.eql?(p3)
     assert_equal p1.hash, p3.hash
   end
+  def test_incomplete_model
+    # model with a transition from a to b but no transitions from b
+    table_model = TableModel.new [
+      [:a, :a_a, :b, 1, 0]]
+    assert_equal Set[:b], table_model.terminal_states
+  end
 end

metadata CHANGED Viewed

@@ -1,94 +1,95 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: finite_mdp
-version: !ruby/object:Gem::Version
+version: !ruby/object:Gem::Version
+  version: 0.1.1
   prerelease:
-  version: 0.0.1
 platform: ruby
-authors:
+authors:
 - John Lees-Miller
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-04-17 00:00:00 Z
-dependencies:
-- !ruby/object:Gem::Dependency
+date: 2012-02-13 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
   name: narray
-  prerelease: false
-  requirement: &id001 !ruby/object:Gem::Requirement
+  requirement: &85251730 !ruby/object:Gem::Requirement
     none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 0.5.9
+    requirements:
     - - ~>
-      - !ruby/object:Gem::Version
-        version: "0"
+      - !ruby/object:Gem::Version
+        version: 0.5.9
   type: :runtime
-  version_requirements: *id001
-- !ruby/object:Gem::Dependency
-  name: gemma
   prerelease: false
-  requirement: &id002 !ruby/object:Gem::Requirement
+  version_requirements: *85251730
+- !ruby/object:Gem::Dependency
+  name: gemma
+  requirement: &85251480 !ruby/object:Gem::Requirement
     none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 1.0.1
+    requirements:
     - - ~>
-      - !ruby/object:Gem::Version
-        version: "1.0"
+      - !ruby/object:Gem::Version
+        version: 2.1.0
   type: :development
-  version_requirements: *id002
-description: Solve small finite Markov Decision Process models.
-email:
+  prerelease: false
+  version_requirements: *85251480
+description: ! 'This library provides several ways of describing a
+  finite Markov Decision Process (MDP) model (see FiniteMDP::Model) and some
+  reasonably efficient implementations of policy iteration and value iteration to
+  solve it (see FiniteMDP::Solver).'
+email:
 - jdleesmiller@gmail.com
 executables: []
 extensions: []
-extra_rdoc_files:
+extra_rdoc_files:
 - README.rdoc
-files:
-- lib/finite_mdp/hash_model.rb
-- lib/finite_mdp/vector_valued.rb
-- lib/finite_mdp/model.rb
-- lib/finite_mdp/version.rb
+files:
+- lib/finite_mdp.rb
 - lib/finite_mdp/solver.rb
+- lib/finite_mdp/version.rb
 - lib/finite_mdp/table_model.rb
-- lib/finite_mdp.rb
+- lib/finite_mdp/hash_model.rb
+- lib/finite_mdp/model.rb
+- lib/finite_mdp/vector_valued.rb
 - README.rdoc
-- test/finite_mdp_test.rb
+- test/finite_mdp/finite_mdp_test.rb
 homepage: http://github.com/jdleesmiller/finite_mdp
 licenses: []
 post_install_message:
-rdoc_options:
+rdoc_options:
 - --main
 - README.rdoc
 - --title
-- finite_mdp-0.0.1 Documentation
-require_paths:
+- finite_mdp-0.1.1 Documentation
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
+required_ruby_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+      segments:
+      - 0
+      hash: -310962355
+required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+      segments:
+      - 0
+      hash: -310962355
 requirements: []
 rubyforge_project: finite_mdp
-rubygems_version: 1.7.2
+rubygems_version: 1.8.10
 signing_key:
 specification_version: 3
-summary: Solve small finite Markov Decision Process models.
-test_files:
-- test/finite_mdp_test.rb
+summary: Solve small, finite Markov Decision Process models.
+test_files:
+- test/finite_mdp/finite_mdp_test.rb
 has_rdoc: