finite_mdp 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4dea8cd1de91ae04618cf3d04df9f84f9c3e1818
4
- data.tar.gz: 09309d55f5d88bebf5d1a2c989f4ff403a7c5b75
3
+ metadata.gz: abf81db7b691f5238c134d835f819f75609fa17c
4
+ data.tar.gz: 4d897e26e7cc8e8aaffd5c5ce80855d3de55fbb2
5
5
  SHA512:
6
- metadata.gz: f4bbba688034130bc37f7192c4a4073b246886b9e43398fa2a5f3bd85407d271b67579babd9573fbe97fcd452912765405bce2fc3a7d26760d7b3e69154dace3
7
- data.tar.gz: 2745aa18e046181cd8f45f1cb956007be71a6dc02f3aee7a977d3607d9ae977cc7d75cbb5177b0c2e1bb330edd519c6fd59f69278cbcb6f1a300c9ebfb7a4332
6
+ metadata.gz: 8711791575db42460dc233ab92a787697731e833c3f121a351ddb1a3dc690c411ec00ff99a0dd996b33fd1be6cae4ea1a423354c013e709f4cb12968b1e3d0c8
7
+ data.tar.gz: aae51101f51e9d60f9b648b58a3ea6443f8228b8ddee4b00c1d55861d8fd89f8c48d4b35174c4fbda2dbdff037d7631d64ecc3f45957e529e5720087e43fe2b3
data/README.rdoc CHANGED
@@ -94,10 +94,10 @@ absorbing state with zero reward, called :stop.
94
94
 
95
95
  # can move north, east, south or west on the grid
96
96
  MOVES = {
97
- '^' => [-1, 0],
98
- '>' => [ 0, 1],
99
- 'v' => [ 1, 0],
100
- '<' => [ 0, -1]}
97
+ '^' => [-1, 0],
98
+ '>' => [ 0, 1],
99
+ 'v' => [ 1, 0],
100
+ '<' => [ 0, -1]}
101
101
 
102
102
  # agent can move north, south, east or west (unless it's in the :stop
103
103
  # state); if it tries to move off the grid or into an obstacle, it stays
@@ -169,8 +169,8 @@ absorbing state with zero reward, called :stop.
169
169
 
170
170
  puts model.pretty_policy(solver.policy)
171
171
  # output: (matches Figure 17.2(a))
172
- # > > >
173
- # ^ ^
172
+ # > > >
173
+ # ^ ^
174
174
  # ^ < < <
175
175
 
176
176
  puts model.pretty_value(solver.value)
@@ -196,10 +196,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
196
196
 
197
197
  == REQUIREMENTS
198
198
 
199
- Tested on
200
- * ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
201
- * ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
202
- * ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
199
+ This gem requires ruby 2.2 or higher.
203
200
 
204
201
  == INSTALLATION
205
202
 
@@ -209,7 +206,7 @@ Tested on
209
206
 
210
207
  (The MIT License)
211
208
 
212
- Copyright (c) 2011 John Lees-Miller
209
+ Copyright (c) 2016 John Lees-Miller
213
210
 
214
211
  Permission is hereby granted, free of charge, to any person obtaining
215
212
  a copy of this software and associated documentation files (the
@@ -229,4 +226,3 @@ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
229
226
  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
230
227
  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
231
228
  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
232
-
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # A finite markov decision process model for which the states, transition
4
+ # probabilities and rewards are stored in a sparse nested array format:
5
+ # model[state_num][action_num] = [[next_state_num, probability, reward], ...]
6
+ #
7
+ # Note: The action_num is not consistent between states --- each state's action
8
+ # array contains only the actions that apply in that state.
9
+ #
10
+ # This class also maintains a {StateActionMap} to map between the state and
11
+ # action numbers and the original states and actions.
12
+ #
13
+ class FiniteMDP::ArrayModel
14
+ include FiniteMDP::Model
15
+
16
+ #
17
+ # Map between states and actions and their corresponding indexes. This is used
18
+ # with an {ArrayModel}, which works only with the indexes internally.
19
+ #
20
+ class StateActionMap
21
+ def initialize(map = [])
22
+ @map = map
23
+ end
24
+
25
+ attr_reader :map
26
+
27
+ def add(state, actions)
28
+ @map << [state, actions]
29
+ end
30
+
31
+ def states
32
+ @map.map { |state, _actions| state }
33
+ end
34
+
35
+ def actions(state)
36
+ _state, actions = @map[state_index(state)]
37
+ actions
38
+ end
39
+
40
+ def state_action_index(state, action)
41
+ index = state_index(state)
42
+ [index, @map[index][1].index(action)]
43
+ end
44
+
45
+ def state(index)
46
+ @map[index][0]
47
+ end
48
+
49
+ def state_index(state)
50
+ @map.index { |test_state, _actions| test_state == state }
51
+ end
52
+
53
+ #
54
+ # Build from a model.
55
+ #
56
+ # @param [Model] model
57
+ #
58
+ # @param [Boolean] ordered assume states are orderable; default is to
59
+ # inspect the first state
60
+ #
61
+ def self.from_model(model, ordered = nil)
62
+ model_states = model.states
63
+
64
+ ordered = model_states.first.respond_to?(:>=) if ordered.nil?
65
+ map = ordered ? OrderedStateActionMap.new : StateActionMap.new
66
+ model_states.each do |state|
67
+ map.add(state, model.actions(state))
68
+ end
69
+ map
70
+ end
71
+ end
72
+
73
+ #
74
+ # A {StateActionMap} for states that support ordering. Lookups are more
75
+ # efficient than for an ordinary {StateActionMap}, which does not assume that
76
+ # states can be ordered.
77
+ #
78
+ class OrderedStateActionMap < StateActionMap
79
+ def add(state, actions)
80
+ index = state_index(state)
81
+ @map.insert(index || @map.size, [state, actions])
82
+ end
83
+
84
+ def state_index(state)
85
+ (0...@map.size).bsearch { |i| @map[i][0] >= state }
86
+ end
87
+ end
88
+
89
+ #
90
+ # @param [Array<Array<Array>>] array see notes for {ArrayModel}
91
+ # @param [StateActionMap] state_action_map
92
+ #
93
+ def initialize(array, state_action_map)
94
+ @array = array
95
+ @state_action_map = state_action_map
96
+ end
97
+
98
+ #
99
+ # @return [Array<Array<Array>>>] array see notes for {ArrayModel}
100
+ #
101
+ attr_reader :array
102
+
103
+ #
104
+ # @return [StateActionMap]
105
+ #
106
+ attr_reader :state_action_map
107
+
108
+ #
109
+ # States in this model; see {Model#states}.
110
+ #
111
+ # @return [Array<state>] not empty; no duplicate states
112
+ #
113
+ def states
114
+ @state_action_map.states
115
+ end
116
+
117
+ #
118
+ # Number of states in the model.
119
+ #
120
+ # @return [Fixnum] positive
121
+ #
122
+ def num_states
123
+ @state_action_map.map.size
124
+ end
125
+
126
+ #
127
+ # Actions that are valid for the given state; see {Model#actions}.
128
+ #
129
+ # @param [state] state
130
+ #
131
+ # @return [Array<state>] not empty; no duplicate actions
132
+ #
133
+ def actions(state)
134
+ @state_action_map.actions(state)
135
+ end
136
+
137
+ #
138
+ # Possible successor states after taking the given action in the given state;
139
+ # see {Model#next_states}.
140
+ #
141
+ # @param [state] state
142
+ #
143
+ # @param [action] action
144
+ #
145
+ # @return [Array<state>] not empty; no duplicates
146
+ #
147
+ def next_states(state, action)
148
+ state_index, action_index =
149
+ @state_action_map.state_action_index(state, action)
150
+ @array[state_index][action_index].map do |next_state_index, _pr, _reward|
151
+ @state_action_map.state(next_state_index)
152
+ end
153
+ end
154
+
155
+ #
156
+ # Probability of the given transition; see {Model#transition_probability}.
157
+ #
158
+ # @param [state] state
159
+ #
160
+ # @param [action] action
161
+ #
162
+ # @param [state] next_state
163
+ #
164
+ # @return [Float] in [0, 1]; zero if the transition is not in the model
165
+ #
166
+ def transition_probability(state, action, next_state)
167
+ state_index, action_index =
168
+ @state_action_map.state_action_index(state, action)
169
+ next_state_index = @state_action_map.state_index(next_state)
170
+ @array[state_index][action_index].each do |index, probability, _reward|
171
+ return probability if index == next_state_index
172
+ end
173
+ 0
174
+ end
175
+
176
+ #
177
+ # Reward for a given transition; see {Model#reward}.
178
+ #
179
+ # @param [state] state
180
+ #
181
+ # @param [action] action
182
+ #
183
+ # @param [state] next_state
184
+ #
185
+ # @return [Float, nil] nil if the transition is not in the model
186
+ #
187
+ def reward(state, action, next_state)
188
+ state_index, action_index =
189
+ @state_action_map.state_action_index(state, action)
190
+ next_state_index = @state_action_map.state_index(next_state)
191
+ @array[state_index][action_index].each do |index, _probability, reward|
192
+ return reward if index == next_state_index
193
+ end
194
+ nil
195
+ end
196
+
197
+ #
198
+ # Convert a generic model into a hash model.
199
+ #
200
+ # @param [Model] model
201
+ #
202
+ # @param [Boolean] sparse do not store entries for transitions with zero
203
+ # probability
204
+ #
205
+ # @param [Boolean] ordered assume states are orderable; default is to inspect
206
+ # the first state
207
+ #
208
+ # @return [ArrayModel]
209
+ #
210
+ def self.from_model(model, sparse = true, ordered = nil)
211
+ state_action_map = StateActionMap.from_model(model, ordered)
212
+
213
+ array = state_action_map.states.map do |state|
214
+ state_action_map.actions(state).map do |action|
215
+ model.next_states(state, action).map do |next_state|
216
+ pr = model.transition_probability(state, action, next_state)
217
+ next unless pr > 0 || !sparse
218
+ reward = model.reward(state, action, next_state)
219
+ [state_action_map.state_index(next_state), pr, reward]
220
+ end.compact
221
+ end
222
+ end
223
+
224
+ FiniteMDP::ArrayModel.new(array, state_action_map)
225
+ end
226
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  #
2
3
  # A finite markov decision process model for which the transition
3
4
  # probabilities and rewards are specified using nested hash tables.
@@ -18,7 +19,7 @@ class FiniteMDP::HashModel
18
19
  # @param [Hash<state, Hash<action, Hash<state, [Float, Float]>>>] hash see
19
20
  # notes for {HashModel} for an explanation of this structure
20
21
  #
21
- def initialize hash
22
+ def initialize(hash)
22
23
  @hash = hash
23
24
  end
24
25
 
@@ -44,7 +45,7 @@ class FiniteMDP::HashModel
44
45
  #
45
46
  # @return [Array<action>] not empty; no duplicate actions
46
47
  #
47
- def actions state
48
+ def actions(state)
48
49
  hash[state].keys
49
50
  end
50
51
 
@@ -58,7 +59,7 @@ class FiniteMDP::HashModel
58
59
  #
59
60
  # @return [Array<state>] not empty; no duplicate states
60
61
  #
61
- def next_states state, action
62
+ def next_states(state, action)
62
63
  hash[state][action].keys
63
64
  end
64
65
 
@@ -73,7 +74,7 @@ class FiniteMDP::HashModel
73
74
  #
74
75
  # @return [Float] in [0, 1]; zero if the transition is not in the hash
75
76
  #
76
- def transition_probability state, action, next_state
77
+ def transition_probability(state, action, next_state)
77
78
  probability, _reward = hash[state][action][next_state]
78
79
  probability || 0
79
80
  end
@@ -89,7 +90,7 @@ class FiniteMDP::HashModel
89
90
  #
90
91
  # @return [Float, nil] nil if the transition is not in the hash
91
92
  #
92
- def reward state, action, next_state
93
+ def reward(state, action, next_state)
93
94
  _probability, reward = hash[state][action][next_state]
94
95
  reward
95
96
  end
@@ -104,7 +105,7 @@ class FiniteMDP::HashModel
104
105
  #
105
106
  # @return [HashModel] not nil
106
107
  #
107
- def self.from_model model, sparse=true
108
+ def self.from_model(model, sparse = true)
108
109
  hash = {}
109
110
  model.states.each do |state|
110
111
  hash[state] ||= {}
@@ -112,12 +113,12 @@ class FiniteMDP::HashModel
112
113
  hash[state][action] ||= {}
113
114
  model.next_states(state, action).each do |next_state|
114
115
  pr = model.transition_probability(state, action, next_state)
115
- hash[state][action][next_state] = [pr,
116
- model.reward(state, action, next_state)] if pr > 0 || !sparse
116
+ next unless pr > 0 || !sparse
117
+ hash[state][action][next_state] =
118
+ [pr, model.reward(state, action, next_state)]
117
119
  end
118
120
  end
119
121
  end
120
122
  FiniteMDP::HashModel.new(hash)
121
123
  end
122
124
  end
123
-
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  #
2
3
  # Interface that defines a finite markov decision process model.
3
4
  #
@@ -93,13 +94,13 @@ module FiniteMDP::Model
93
94
  # All states must have at least one valid action; see notes for {Model}
94
95
  # regarding how to encode a terminal state.
95
96
  #
96
- # @param [state] state
97
+ # @param [state] _state
97
98
  #
98
99
  # @return [Array<action>] not empty; no duplicate actions
99
100
  #
100
101
  # @abstract
101
102
  #
102
- def actions state
103
+ def actions(_state)
103
104
  raise NotImplementedError
104
105
  end
105
106
 
@@ -115,13 +116,13 @@ module FiniteMDP::Model
115
116
  # ignores them in its internal representation, so you can usually forget about
116
117
  # this method.
117
118
  #
118
- # @param [state] state
119
+ # @param [state] _state
119
120
  #
120
- # @param [action] action
121
+ # @param [action] _action
121
122
  #
122
123
  # @return [Array<state>] not empty; no duplicate states
123
124
  #
124
- def next_states state, action
125
+ def next_states(_state, _action)
125
126
  states
126
127
  end
127
128
 
@@ -134,18 +135,18 @@ module FiniteMDP::Model
134
135
  # {TableModel#transition_probability} return zero in this case, but this is
135
136
  # not part of the contract.
136
137
  #
137
- # @param [state] state
138
+ # @param [state] _state
138
139
  #
139
- # @param [action] action
140
+ # @param [action] _action
140
141
  #
141
- # @param [state] next_state
142
+ # @param [state] _next_state
142
143
  #
143
144
  # @return [Float] in [0, 1]; undefined if the transition is not in the model
144
145
  # (see notes above)
145
146
  #
146
147
  # @abstract
147
148
  #
148
- def transition_probability state, action, next_state
149
+ def transition_probability(_state, _action, _next_state)
149
150
  raise NotImplementedError
150
151
  end
151
152
 
@@ -157,18 +158,18 @@ module FiniteMDP::Model
157
158
  # undefined. Note that {HashModel#reward} and {TableModel#reward} return
158
159
  # <tt>nil</tt> in this case, but this is not part of the contract.
159
160
  #
160
- # @param [state] state
161
+ # @param [state] _state
161
162
  #
162
- # @param [action] action
163
+ # @param [action] _action
163
164
  #
164
- # @param [state] next_state
165
+ # @param [state] _next_state
165
166
  #
166
167
  # @return [Float, nil] nil only if the transition is not in the model (but the
167
168
  # result is undefined in this case -- it need not be nil; see notes above)
168
169
  #
169
170
  # @abstract
170
171
  #
171
- def reward state, action, next_state
172
+ def reward(_state, _action, _next_state)
172
173
  raise NotImplementedError
173
174
  end
174
175
 
@@ -182,8 +183,9 @@ module FiniteMDP::Model
182
183
  prs = []
183
184
  states.each do |state|
184
185
  actions(state).each do |action|
185
- pr = next_states(state, action).map{|next_state|
186
- transition_probability(state, action, next_state)}.inject(:+)
186
+ pr = next_states(state, action).map do |next_state|
187
+ transition_probability(state, action, next_state)
188
+ end.inject(:+)
187
189
  prs << [[state, action], pr]
188
190
  end
189
191
  end
@@ -198,7 +200,7 @@ module FiniteMDP::Model
198
200
  #
199
201
  # @return [nil]
200
202
  #
201
- def check_transition_probabilities_sum tol=1e-6
203
+ def check_transition_probabilities_sum(tol = 1e-6)
202
204
  transition_probability_sums.each do |(state, action), pr|
203
205
  raise "transition probabilities for state #{state.inspect} and
204
206
  action #{action.inspect} sum to #{pr}" if pr < 1 - tol
@@ -230,9 +232,8 @@ module FiniteMDP::Model
230
232
  all_states.merge ns
231
233
  any_out_transitions ||= !ns.empty?
232
234
  end
233
- out_states << state if any_out_transitions
235
+ out_states << state if any_out_transitions
234
236
  end
235
237
  all_states - out_states
236
238
  end
237
239
  end
238
-