RubyGems - finite_mdp - Versions diffs - 0.2.0 → 0.3.0 - Mend

finite_mdp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.rdoc +8 -12
data/lib/finite_mdp/array_model.rb +226 -0
data/lib/finite_mdp/hash_model.rb +10 -9
data/lib/finite_mdp/model.rb +19 -18
data/lib/finite_mdp/solver.rb +96 -83
data/lib/finite_mdp/table_model.rb +28 -19
data/lib/finite_mdp/vector_valued.rb +5 -5
data/lib/finite_mdp/version.rb +2 -1
data/lib/finite_mdp.rb +3 -2
data/test/finite_mdp/finite_mdp_test.rb +151 -98
metadata +33 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4dea8cd1de91ae04618cf3d04df9f84f9c3e1818
-  data.tar.gz: 09309d55f5d88bebf5d1a2c989f4ff403a7c5b75
+  metadata.gz: abf81db7b691f5238c134d835f819f75609fa17c
+  data.tar.gz: 4d897e26e7cc8e8aaffd5c5ce80855d3de55fbb2
 SHA512:
-  metadata.gz: f4bbba688034130bc37f7192c4a4073b246886b9e43398fa2a5f3bd85407d271b67579babd9573fbe97fcd452912765405bce2fc3a7d26760d7b3e69154dace3
-  data.tar.gz: 2745aa18e046181cd8f45f1cb956007be71a6dc02f3aee7a977d3607d9ae977cc7d75cbb5177b0c2e1bb330edd519c6fd59f69278cbcb6f1a300c9ebfb7a4332
+  metadata.gz: 8711791575db42460dc233ab92a787697731e833c3f121a351ddb1a3dc690c411ec00ff99a0dd996b33fd1be6cae4ea1a423354c013e709f4cb12968b1e3d0c8
+  data.tar.gz: aae51101f51e9d60f9b648b58a3ea6443f8228b8ddee4b00c1d55861d8fd89f8c48d4b35174c4fbda2dbdff037d7631d64ecc3f45957e529e5720087e43fe2b3

data/README.rdoc CHANGED Viewed

@@ -94,10 +94,10 @@ absorbing state with zero reward, called :stop.
     # can move north, east, south or west on the grid
     MOVES = {
-      '^' => [-1,  0],
-      '>' => [ 0,  1],
-      'v' => [ 1,  0],
-      '<' => [ 0, -1]}
+      '^' => [-1,  0],
+      '>' => [ 0,  1],
+      'v' => [ 1,  0],
+      '<' => [ 0, -1]}
     # agent can move north, south, east or west (unless it's in the :stop
     # state); if it tries to move off the grid or into an obstacle, it stays
@@ -169,8 +169,8 @@ absorbing state with zero reward, called :stop.
   puts model.pretty_policy(solver.policy)
   # output: (matches Figure 17.2(a))
-  # > > >
-  # ^   ^
+  # > > >
+  # ^   ^
   # ^ < < <
   puts model.pretty_value(solver.value)
@@ -196,10 +196,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
 == REQUIREMENTS
-Tested on
-* ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
-* ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
-* ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
+This gem requires ruby 2.2 or higher.
 == INSTALLATION
@@ -209,7 +206,7 @@ Tested on
 (The MIT License)
-Copyright (c) 2011 John Lees-Miller
+Copyright (c) 2016 John Lees-Miller
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -229,4 +226,3 @@ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/lib/finite_mdp/array_model.rb ADDED Viewed

@@ -0,0 +1,226 @@
+# frozen_string_literal: true
+#
+# A finite markov decision process model for which the states, transition
+# probabilities and rewards are stored in a sparse nested array format:
+#   model[state_num][action_num] = [[next_state_num, probability, reward], ...]
+#
+# Note: The action_num is not consistent between states --- each state's action
+# array contains only the actions that apply in that state.
+#
+# This class also maintains a {StateActionMap} to map between the state and
+# action numbers and the original states and actions.
+#
+class FiniteMDP::ArrayModel
+  include FiniteMDP::Model
+  #
+  # Map between states and actions and their corresponding indexes. This is used
+  # with an {ArrayModel}, which works only with the indexes internally.
+  #
+  class StateActionMap
+    def initialize(map = [])
+      @map = map
+    end
+    attr_reader :map
+    def add(state, actions)
+      @map << [state, actions]
+    end
+    def states
+      @map.map { |state, _actions| state }
+    end
+    def actions(state)
+      _state, actions = @map[state_index(state)]
+      actions
+    end
+    def state_action_index(state, action)
+      index = state_index(state)
+      [index, @map[index][1].index(action)]
+    end
+    def state(index)
+      @map[index][0]
+    end
+    def state_index(state)
+      @map.index { |test_state, _actions| test_state == state }
+    end
+    #
+    # Build from a model.
+    #
+    # @param [Model] model
+    #
+    # @param [Boolean] ordered assume states are orderable; default is to
+    #        inspect the first state
+    #
+    def self.from_model(model, ordered = nil)
+      model_states = model.states
+      ordered = model_states.first.respond_to?(:>=) if ordered.nil?
+      map = ordered ? OrderedStateActionMap.new : StateActionMap.new
+      model_states.each do |state|
+        map.add(state, model.actions(state))
+      end
+      map
+    end
+  end
+  #
+  # A {StateActionMap} for states that support ordering. Lookups are more
+  # efficient than for an ordinary {StateActionMap}, which does not assume that
+  # states can be ordered.
+  #
+  class OrderedStateActionMap < StateActionMap
+    def add(state, actions)
+      index = state_index(state)
+      @map.insert(index || @map.size, [state, actions])
+    end
+    def state_index(state)
+      (0...@map.size).bsearch { |i| @map[i][0] >= state }
+    end
+  end
+  #
+  # @param [Array<Array<Array>>] array see notes for {ArrayModel}
+  # @param [StateActionMap] state_action_map
+  #
+  def initialize(array, state_action_map)
+    @array = array
+    @state_action_map = state_action_map
+  end
+  #
+  # @return [Array<Array<Array>>>] array see notes for {ArrayModel}
+  #
+  attr_reader :array
+  #
+  # @return [StateActionMap]
+  #
+  attr_reader :state_action_map
+  #
+  # States in this model; see {Model#states}.
+  #
+  # @return [Array<state>] not empty; no duplicate states
+  #
+  def states
+    @state_action_map.states
+  end
+  #
+  # Number of states in the model.
+  #
+  # @return [Fixnum] positive
+  #
+  def num_states
+    @state_action_map.map.size
+  end
+  #
+  # Actions that are valid for the given state; see {Model#actions}.
+  #
+  # @param [state] state
+  #
+  # @return [Array<state>] not empty; no duplicate actions
+  #
+  def actions(state)
+    @state_action_map.actions(state)
+  end
+  #
+  # Possible successor states after taking the given action in the given state;
+  # see {Model#next_states}.
+  #
+  # @param [state] state
+  #
+  # @param [action] action
+  #
+  # @return [Array<state>] not empty; no duplicates
+  #
+  def next_states(state, action)
+    state_index, action_index =
+      @state_action_map.state_action_index(state, action)
+    @array[state_index][action_index].map do |next_state_index, _pr, _reward|
+      @state_action_map.state(next_state_index)
+    end
+  end
+  #
+  # Probability of the given transition; see {Model#transition_probability}.
+  #
+  # @param [state] state
+  #
+  # @param [action] action
+  #
+  # @param [state] next_state
+  #
+  # @return [Float] in [0, 1]; zero if the transition is not in the model
+  #
+  def transition_probability(state, action, next_state)
+    state_index, action_index =
+      @state_action_map.state_action_index(state, action)
+    next_state_index = @state_action_map.state_index(next_state)
+    @array[state_index][action_index].each do |index, probability, _reward|
+      return probability if index == next_state_index
+    end
+    0
+  end
+  #
+  # Reward for a given transition; see {Model#reward}.
+  #
+  # @param [state] state
+  #
+  # @param [action] action
+  #
+  # @param [state] next_state
+  #
+  # @return [Float, nil] nil if the transition is not in the model
+  #
+  def reward(state, action, next_state)
+    state_index, action_index =
+      @state_action_map.state_action_index(state, action)
+    next_state_index = @state_action_map.state_index(next_state)
+    @array[state_index][action_index].each do |index, _probability, reward|
+      return reward if index == next_state_index
+    end
+    nil
+  end
+  #
+  # Convert a generic model into a hash model.
+  #
+  # @param [Model] model
+  #
+  # @param [Boolean] sparse do not store entries for transitions with zero
+  #        probability
+  #
+  # @param [Boolean] ordered assume states are orderable; default is to inspect
+  #        the first state
+  #
+  # @return [ArrayModel]
+  #
+  def self.from_model(model, sparse = true, ordered = nil)
+    state_action_map = StateActionMap.from_model(model, ordered)
+    array = state_action_map.states.map do |state|
+      state_action_map.actions(state).map do |action|
+        model.next_states(state, action).map do |next_state|
+          pr = model.transition_probability(state, action, next_state)
+          next unless pr > 0 || !sparse
+          reward = model.reward(state, action, next_state)
+          [state_action_map.state_index(next_state), pr, reward]
+        end.compact
+      end
+    end
+    FiniteMDP::ArrayModel.new(array, state_action_map)
+  end
+end

data/lib/finite_mdp/hash_model.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 # A finite markov decision process model for which the transition
 # probabilities and rewards are specified using nested hash tables.
@@ -18,7 +19,7 @@ class FiniteMDP::HashModel
   # @param [Hash<state, Hash<action, Hash<state, [Float, Float]>>>] hash see
   #        notes for {HashModel} for an explanation of this structure
   #
-  def initialize hash
+  def initialize(hash)
     @hash = hash
   end
@@ -44,7 +45,7 @@ class FiniteMDP::HashModel
   #
   # @return [Array<action>] not empty; no duplicate actions
   #
-  def actions state
+  def actions(state)
     hash[state].keys
   end
@@ -58,7 +59,7 @@ class FiniteMDP::HashModel
   #
   # @return [Array<state>] not empty; no duplicate states
   #
-  def next_states state, action
+  def next_states(state, action)
     hash[state][action].keys
   end
@@ -73,7 +74,7 @@ class FiniteMDP::HashModel
   #
   # @return [Float] in [0, 1]; zero if the transition is not in the hash
   #
-  def transition_probability state, action, next_state
+  def transition_probability(state, action, next_state)
     probability, _reward = hash[state][action][next_state]
     probability || 0
   end
@@ -89,7 +90,7 @@ class FiniteMDP::HashModel
   #
   # @return [Float, nil] nil if the transition is not in the hash
   #
-  def reward state, action, next_state
+  def reward(state, action, next_state)
     _probability, reward = hash[state][action][next_state]
     reward
   end
@@ -104,7 +105,7 @@ class FiniteMDP::HashModel
   #
   # @return [HashModel] not nil
   #
-  def self.from_model model, sparse=true
+  def self.from_model(model, sparse = true)
     hash = {}
     model.states.each do |state|
       hash[state] ||= {}
@@ -112,12 +113,12 @@ class FiniteMDP::HashModel
         hash[state][action] ||= {}
         model.next_states(state, action).each do |next_state|
           pr = model.transition_probability(state, action, next_state)
-          hash[state][action][next_state] = [pr,
-            model.reward(state, action, next_state)] if pr > 0 || !sparse
+          next unless pr > 0 || !sparse
+          hash[state][action][next_state] =
+            [pr, model.reward(state, action, next_state)]
         end
       end
     end
     FiniteMDP::HashModel.new(hash)
   end
 end

data/lib/finite_mdp/model.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 # Interface that defines a finite markov decision process model.
 #
@@ -93,13 +94,13 @@ module FiniteMDP::Model
   # All states must have at least one valid action; see notes for {Model}
   # regarding how to encode a terminal state.
   #
-  # @param [state] state
+  # @param [state] _state
   #
   # @return [Array<action>] not empty; no duplicate actions
   #
   # @abstract
   #
-  def actions state
+  def actions(_state)
     raise NotImplementedError
   end
@@ -115,13 +116,13 @@ module FiniteMDP::Model
   # ignores them in its internal representation, so you can usually forget about
   # this method.
   #
-  # @param [state] state
+  # @param [state] _state
   #
-  # @param [action] action
+  # @param [action] _action
   #
   # @return [Array<state>] not empty; no duplicate states
   #
-  def next_states state, action
+  def next_states(_state, _action)
     states
   end
@@ -134,18 +135,18 @@ module FiniteMDP::Model
   # {TableModel#transition_probability} return zero in this case, but this is
   # not part of the contract.
   #
-  # @param [state] state
+  # @param [state] _state
   #
-  # @param [action] action
+  # @param [action] _action
   #
-  # @param [state] next_state
+  # @param [state] _next_state
   #
   # @return [Float] in [0, 1]; undefined if the transition is not in the model
   #  (see notes above)
   #
   # @abstract
   #
-  def transition_probability state, action, next_state
+  def transition_probability(_state, _action, _next_state)
     raise NotImplementedError
   end
@@ -157,18 +158,18 @@ module FiniteMDP::Model
   # undefined. Note that {HashModel#reward} and {TableModel#reward} return
   # <tt>nil</tt> in this case, but this is not part of the contract.
   #
-  # @param [state] state
+  # @param [state] _state
   #
-  # @param [action] action
+  # @param [action] _action
   #
-  # @param [state] next_state
+  # @param [state] _next_state
   #
   # @return [Float, nil] nil only if the transition is not in the model (but the
   #  result is undefined in this case -- it need not be nil; see notes above)
   #
   # @abstract
   #
-  def reward state, action, next_state
+  def reward(_state, _action, _next_state)
     raise NotImplementedError
   end
@@ -182,8 +183,9 @@ module FiniteMDP::Model
     prs = []
     states.each do |state|
       actions(state).each do |action|
-        pr = next_states(state, action).map{|next_state|
-          transition_probability(state, action, next_state)}.inject(:+)
+        pr = next_states(state, action).map do |next_state|
+          transition_probability(state, action, next_state)
+        end.inject(:+)
         prs << [[state, action], pr]
       end
     end
@@ -198,7 +200,7 @@ module FiniteMDP::Model
   #
   # @return [nil]
   #
-  def check_transition_probabilities_sum tol=1e-6
+  def check_transition_probabilities_sum(tol = 1e-6)
     transition_probability_sums.each do |(state, action), pr|
       raise "transition probabilities for state #{state.inspect} and
           action #{action.inspect} sum to #{pr}" if pr < 1 - tol
@@ -230,9 +232,8 @@ module FiniteMDP::Model
         all_states.merge ns
         any_out_transitions ||= !ns.empty?
       end
-      out_states << state if any_out_transitions
+      out_states << state if any_out_transitions
     end
     all_states - out_states
   end
 end