finite_mdp 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.rdoc +8 -12
- data/lib/finite_mdp/array_model.rb +226 -0
- data/lib/finite_mdp/hash_model.rb +10 -9
- data/lib/finite_mdp/model.rb +19 -18
- data/lib/finite_mdp/solver.rb +96 -83
- data/lib/finite_mdp/table_model.rb +28 -19
- data/lib/finite_mdp/vector_valued.rb +5 -5
- data/lib/finite_mdp/version.rb +2 -1
- data/lib/finite_mdp.rb +3 -2
- data/test/finite_mdp/finite_mdp_test.rb +151 -98
- metadata +33 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abf81db7b691f5238c134d835f819f75609fa17c
|
4
|
+
data.tar.gz: 4d897e26e7cc8e8aaffd5c5ce80855d3de55fbb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8711791575db42460dc233ab92a787697731e833c3f121a351ddb1a3dc690c411ec00ff99a0dd996b33fd1be6cae4ea1a423354c013e709f4cb12968b1e3d0c8
|
7
|
+
data.tar.gz: aae51101f51e9d60f9b648b58a3ea6443f8228b8ddee4b00c1d55861d8fd89f8c48d4b35174c4fbda2dbdff037d7631d64ecc3f45957e529e5720087e43fe2b3
|
data/README.rdoc
CHANGED
@@ -94,10 +94,10 @@ absorbing state with zero reward, called :stop.
|
|
94
94
|
|
95
95
|
# can move north, east, south or west on the grid
|
96
96
|
MOVES = {
|
97
|
-
'^' => [-1, 0],
|
98
|
-
'>' => [ 0, 1],
|
99
|
-
'v' => [ 1, 0],
|
100
|
-
'<' => [ 0, -1]}
|
97
|
+
'^' => [-1, 0],
|
98
|
+
'>' => [ 0, 1],
|
99
|
+
'v' => [ 1, 0],
|
100
|
+
'<' => [ 0, -1]}
|
101
101
|
|
102
102
|
# agent can move north, south, east or west (unless it's in the :stop
|
103
103
|
# state); if it tries to move off the grid or into an obstacle, it stays
|
@@ -169,8 +169,8 @@ absorbing state with zero reward, called :stop.
|
|
169
169
|
|
170
170
|
puts model.pretty_policy(solver.policy)
|
171
171
|
# output: (matches Figure 17.2(a))
|
172
|
-
# > > >
|
173
|
-
# ^ ^
|
172
|
+
# > > >
|
173
|
+
# ^ ^
|
174
174
|
# ^ < < <
|
175
175
|
|
176
176
|
puts model.pretty_value(solver.value)
|
@@ -196,10 +196,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
|
|
196
196
|
|
197
197
|
== REQUIREMENTS
|
198
198
|
|
199
|
-
|
200
|
-
* ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
|
201
|
-
* ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
|
202
|
-
* ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
|
199
|
+
This gem requires ruby 2.2 or higher.
|
203
200
|
|
204
201
|
== INSTALLATION
|
205
202
|
|
@@ -209,7 +206,7 @@ Tested on
|
|
209
206
|
|
210
207
|
(The MIT License)
|
211
208
|
|
212
|
-
Copyright (c)
|
209
|
+
Copyright (c) 2016 John Lees-Miller
|
213
210
|
|
214
211
|
Permission is hereby granted, free of charge, to any person obtaining
|
215
212
|
a copy of this software and associated documentation files (the
|
@@ -229,4 +226,3 @@ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
229
226
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
230
227
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
231
228
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
232
|
-
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# A finite markov decision process model for which the states, transition
|
4
|
+
# probabilities and rewards are stored in a sparse nested array format:
|
5
|
+
# model[state_num][action_num] = [[next_state_num, probability, reward], ...]
|
6
|
+
#
|
7
|
+
# Note: The action_num is not consistent between states --- each state's action
|
8
|
+
# array contains only the actions that apply in that state.
|
9
|
+
#
|
10
|
+
# This class also maintains a {StateActionMap} to map between the state and
|
11
|
+
# action numbers and the original states and actions.
|
12
|
+
#
|
13
|
+
class FiniteMDP::ArrayModel
|
14
|
+
include FiniteMDP::Model
|
15
|
+
|
16
|
+
#
|
17
|
+
# Map between states and actions and their corresponding indexes. This is used
|
18
|
+
# with an {ArrayModel}, which works only with the indexes internally.
|
19
|
+
#
|
20
|
+
class StateActionMap
|
21
|
+
def initialize(map = [])
|
22
|
+
@map = map
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :map
|
26
|
+
|
27
|
+
def add(state, actions)
|
28
|
+
@map << [state, actions]
|
29
|
+
end
|
30
|
+
|
31
|
+
def states
|
32
|
+
@map.map { |state, _actions| state }
|
33
|
+
end
|
34
|
+
|
35
|
+
def actions(state)
|
36
|
+
_state, actions = @map[state_index(state)]
|
37
|
+
actions
|
38
|
+
end
|
39
|
+
|
40
|
+
def state_action_index(state, action)
|
41
|
+
index = state_index(state)
|
42
|
+
[index, @map[index][1].index(action)]
|
43
|
+
end
|
44
|
+
|
45
|
+
def state(index)
|
46
|
+
@map[index][0]
|
47
|
+
end
|
48
|
+
|
49
|
+
def state_index(state)
|
50
|
+
@map.index { |test_state, _actions| test_state == state }
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Build from a model.
|
55
|
+
#
|
56
|
+
# @param [Model] model
|
57
|
+
#
|
58
|
+
# @param [Boolean] ordered assume states are orderable; default is to
|
59
|
+
# inspect the first state
|
60
|
+
#
|
61
|
+
def self.from_model(model, ordered = nil)
|
62
|
+
model_states = model.states
|
63
|
+
|
64
|
+
ordered = model_states.first.respond_to?(:>=) if ordered.nil?
|
65
|
+
map = ordered ? OrderedStateActionMap.new : StateActionMap.new
|
66
|
+
model_states.each do |state|
|
67
|
+
map.add(state, model.actions(state))
|
68
|
+
end
|
69
|
+
map
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# A {StateActionMap} for states that support ordering. Lookups are more
|
75
|
+
# efficient than for an ordinary {StateActionMap}, which does not assume that
|
76
|
+
# states can be ordered.
|
77
|
+
#
|
78
|
+
class OrderedStateActionMap < StateActionMap
|
79
|
+
def add(state, actions)
|
80
|
+
index = state_index(state)
|
81
|
+
@map.insert(index || @map.size, [state, actions])
|
82
|
+
end
|
83
|
+
|
84
|
+
def state_index(state)
|
85
|
+
(0...@map.size).bsearch { |i| @map[i][0] >= state }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# @param [Array<Array<Array>>] array see notes for {ArrayModel}
|
91
|
+
# @param [StateActionMap] state_action_map
|
92
|
+
#
|
93
|
+
def initialize(array, state_action_map)
|
94
|
+
@array = array
|
95
|
+
@state_action_map = state_action_map
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# @return [Array<Array<Array>>>] array see notes for {ArrayModel}
|
100
|
+
#
|
101
|
+
attr_reader :array
|
102
|
+
|
103
|
+
#
|
104
|
+
# @return [StateActionMap]
|
105
|
+
#
|
106
|
+
attr_reader :state_action_map
|
107
|
+
|
108
|
+
#
|
109
|
+
# States in this model; see {Model#states}.
|
110
|
+
#
|
111
|
+
# @return [Array<state>] not empty; no duplicate states
|
112
|
+
#
|
113
|
+
def states
|
114
|
+
@state_action_map.states
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# Number of states in the model.
|
119
|
+
#
|
120
|
+
# @return [Fixnum] positive
|
121
|
+
#
|
122
|
+
def num_states
|
123
|
+
@state_action_map.map.size
|
124
|
+
end
|
125
|
+
|
126
|
+
#
|
127
|
+
# Actions that are valid for the given state; see {Model#actions}.
|
128
|
+
#
|
129
|
+
# @param [state] state
|
130
|
+
#
|
131
|
+
# @return [Array<state>] not empty; no duplicate actions
|
132
|
+
#
|
133
|
+
def actions(state)
|
134
|
+
@state_action_map.actions(state)
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
# Possible successor states after taking the given action in the given state;
|
139
|
+
# see {Model#next_states}.
|
140
|
+
#
|
141
|
+
# @param [state] state
|
142
|
+
#
|
143
|
+
# @param [action] action
|
144
|
+
#
|
145
|
+
# @return [Array<state>] not empty; no duplicates
|
146
|
+
#
|
147
|
+
def next_states(state, action)
|
148
|
+
state_index, action_index =
|
149
|
+
@state_action_map.state_action_index(state, action)
|
150
|
+
@array[state_index][action_index].map do |next_state_index, _pr, _reward|
|
151
|
+
@state_action_map.state(next_state_index)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
#
|
156
|
+
# Probability of the given transition; see {Model#transition_probability}.
|
157
|
+
#
|
158
|
+
# @param [state] state
|
159
|
+
#
|
160
|
+
# @param [action] action
|
161
|
+
#
|
162
|
+
# @param [state] next_state
|
163
|
+
#
|
164
|
+
# @return [Float] in [0, 1]; zero if the transition is not in the model
|
165
|
+
#
|
166
|
+
def transition_probability(state, action, next_state)
|
167
|
+
state_index, action_index =
|
168
|
+
@state_action_map.state_action_index(state, action)
|
169
|
+
next_state_index = @state_action_map.state_index(next_state)
|
170
|
+
@array[state_index][action_index].each do |index, probability, _reward|
|
171
|
+
return probability if index == next_state_index
|
172
|
+
end
|
173
|
+
0
|
174
|
+
end
|
175
|
+
|
176
|
+
#
|
177
|
+
# Reward for a given transition; see {Model#reward}.
|
178
|
+
#
|
179
|
+
# @param [state] state
|
180
|
+
#
|
181
|
+
# @param [action] action
|
182
|
+
#
|
183
|
+
# @param [state] next_state
|
184
|
+
#
|
185
|
+
# @return [Float, nil] nil if the transition is not in the model
|
186
|
+
#
|
187
|
+
def reward(state, action, next_state)
|
188
|
+
state_index, action_index =
|
189
|
+
@state_action_map.state_action_index(state, action)
|
190
|
+
next_state_index = @state_action_map.state_index(next_state)
|
191
|
+
@array[state_index][action_index].each do |index, _probability, reward|
|
192
|
+
return reward if index == next_state_index
|
193
|
+
end
|
194
|
+
nil
|
195
|
+
end
|
196
|
+
|
197
|
+
#
|
198
|
+
# Convert a generic model into a hash model.
|
199
|
+
#
|
200
|
+
# @param [Model] model
|
201
|
+
#
|
202
|
+
# @param [Boolean] sparse do not store entries for transitions with zero
|
203
|
+
# probability
|
204
|
+
#
|
205
|
+
# @param [Boolean] ordered assume states are orderable; default is to inspect
|
206
|
+
# the first state
|
207
|
+
#
|
208
|
+
# @return [ArrayModel]
|
209
|
+
#
|
210
|
+
def self.from_model(model, sparse = true, ordered = nil)
|
211
|
+
state_action_map = StateActionMap.from_model(model, ordered)
|
212
|
+
|
213
|
+
array = state_action_map.states.map do |state|
|
214
|
+
state_action_map.actions(state).map do |action|
|
215
|
+
model.next_states(state, action).map do |next_state|
|
216
|
+
pr = model.transition_probability(state, action, next_state)
|
217
|
+
next unless pr > 0 || !sparse
|
218
|
+
reward = model.reward(state, action, next_state)
|
219
|
+
[state_action_map.state_index(next_state), pr, reward]
|
220
|
+
end.compact
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
FiniteMDP::ArrayModel.new(array, state_action_map)
|
225
|
+
end
|
226
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# A finite markov decision process model for which the transition
|
3
4
|
# probabilities and rewards are specified using nested hash tables.
|
@@ -18,7 +19,7 @@ class FiniteMDP::HashModel
|
|
18
19
|
# @param [Hash<state, Hash<action, Hash<state, [Float, Float]>>>] hash see
|
19
20
|
# notes for {HashModel} for an explanation of this structure
|
20
21
|
#
|
21
|
-
def initialize
|
22
|
+
def initialize(hash)
|
22
23
|
@hash = hash
|
23
24
|
end
|
24
25
|
|
@@ -44,7 +45,7 @@ class FiniteMDP::HashModel
|
|
44
45
|
#
|
45
46
|
# @return [Array<action>] not empty; no duplicate actions
|
46
47
|
#
|
47
|
-
def actions
|
48
|
+
def actions(state)
|
48
49
|
hash[state].keys
|
49
50
|
end
|
50
51
|
|
@@ -58,7 +59,7 @@ class FiniteMDP::HashModel
|
|
58
59
|
#
|
59
60
|
# @return [Array<state>] not empty; no duplicate states
|
60
61
|
#
|
61
|
-
def next_states
|
62
|
+
def next_states(state, action)
|
62
63
|
hash[state][action].keys
|
63
64
|
end
|
64
65
|
|
@@ -73,7 +74,7 @@ class FiniteMDP::HashModel
|
|
73
74
|
#
|
74
75
|
# @return [Float] in [0, 1]; zero if the transition is not in the hash
|
75
76
|
#
|
76
|
-
def transition_probability
|
77
|
+
def transition_probability(state, action, next_state)
|
77
78
|
probability, _reward = hash[state][action][next_state]
|
78
79
|
probability || 0
|
79
80
|
end
|
@@ -89,7 +90,7 @@ class FiniteMDP::HashModel
|
|
89
90
|
#
|
90
91
|
# @return [Float, nil] nil if the transition is not in the hash
|
91
92
|
#
|
92
|
-
def reward
|
93
|
+
def reward(state, action, next_state)
|
93
94
|
_probability, reward = hash[state][action][next_state]
|
94
95
|
reward
|
95
96
|
end
|
@@ -104,7 +105,7 @@ class FiniteMDP::HashModel
|
|
104
105
|
#
|
105
106
|
# @return [HashModel] not nil
|
106
107
|
#
|
107
|
-
def self.from_model
|
108
|
+
def self.from_model(model, sparse = true)
|
108
109
|
hash = {}
|
109
110
|
model.states.each do |state|
|
110
111
|
hash[state] ||= {}
|
@@ -112,12 +113,12 @@ class FiniteMDP::HashModel
|
|
112
113
|
hash[state][action] ||= {}
|
113
114
|
model.next_states(state, action).each do |next_state|
|
114
115
|
pr = model.transition_probability(state, action, next_state)
|
115
|
-
|
116
|
-
|
116
|
+
next unless pr > 0 || !sparse
|
117
|
+
hash[state][action][next_state] =
|
118
|
+
[pr, model.reward(state, action, next_state)]
|
117
119
|
end
|
118
120
|
end
|
119
121
|
end
|
120
122
|
FiniteMDP::HashModel.new(hash)
|
121
123
|
end
|
122
124
|
end
|
123
|
-
|
data/lib/finite_mdp/model.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# Interface that defines a finite markov decision process model.
|
3
4
|
#
|
@@ -93,13 +94,13 @@ module FiniteMDP::Model
|
|
93
94
|
# All states must have at least one valid action; see notes for {Model}
|
94
95
|
# regarding how to encode a terminal state.
|
95
96
|
#
|
96
|
-
# @param [state]
|
97
|
+
# @param [state] _state
|
97
98
|
#
|
98
99
|
# @return [Array<action>] not empty; no duplicate actions
|
99
100
|
#
|
100
101
|
# @abstract
|
101
102
|
#
|
102
|
-
def actions
|
103
|
+
def actions(_state)
|
103
104
|
raise NotImplementedError
|
104
105
|
end
|
105
106
|
|
@@ -115,13 +116,13 @@ module FiniteMDP::Model
|
|
115
116
|
# ignores them in its internal representation, so you can usually forget about
|
116
117
|
# this method.
|
117
118
|
#
|
118
|
-
# @param [state]
|
119
|
+
# @param [state] _state
|
119
120
|
#
|
120
|
-
# @param [action]
|
121
|
+
# @param [action] _action
|
121
122
|
#
|
122
123
|
# @return [Array<state>] not empty; no duplicate states
|
123
124
|
#
|
124
|
-
def next_states
|
125
|
+
def next_states(_state, _action)
|
125
126
|
states
|
126
127
|
end
|
127
128
|
|
@@ -134,18 +135,18 @@ module FiniteMDP::Model
|
|
134
135
|
# {TableModel#transition_probability} return zero in this case, but this is
|
135
136
|
# not part of the contract.
|
136
137
|
#
|
137
|
-
# @param [state]
|
138
|
+
# @param [state] _state
|
138
139
|
#
|
139
|
-
# @param [action]
|
140
|
+
# @param [action] _action
|
140
141
|
#
|
141
|
-
# @param [state]
|
142
|
+
# @param [state] _next_state
|
142
143
|
#
|
143
144
|
# @return [Float] in [0, 1]; undefined if the transition is not in the model
|
144
145
|
# (see notes above)
|
145
146
|
#
|
146
147
|
# @abstract
|
147
148
|
#
|
148
|
-
def transition_probability
|
149
|
+
def transition_probability(_state, _action, _next_state)
|
149
150
|
raise NotImplementedError
|
150
151
|
end
|
151
152
|
|
@@ -157,18 +158,18 @@ module FiniteMDP::Model
|
|
157
158
|
# undefined. Note that {HashModel#reward} and {TableModel#reward} return
|
158
159
|
# <tt>nil</tt> in this case, but this is not part of the contract.
|
159
160
|
#
|
160
|
-
# @param [state]
|
161
|
+
# @param [state] _state
|
161
162
|
#
|
162
|
-
# @param [action]
|
163
|
+
# @param [action] _action
|
163
164
|
#
|
164
|
-
# @param [state]
|
165
|
+
# @param [state] _next_state
|
165
166
|
#
|
166
167
|
# @return [Float, nil] nil only if the transition is not in the model (but the
|
167
168
|
# result is undefined in this case -- it need not be nil; see notes above)
|
168
169
|
#
|
169
170
|
# @abstract
|
170
171
|
#
|
171
|
-
def reward
|
172
|
+
def reward(_state, _action, _next_state)
|
172
173
|
raise NotImplementedError
|
173
174
|
end
|
174
175
|
|
@@ -182,8 +183,9 @@ module FiniteMDP::Model
|
|
182
183
|
prs = []
|
183
184
|
states.each do |state|
|
184
185
|
actions(state).each do |action|
|
185
|
-
pr = next_states(state, action).map
|
186
|
-
transition_probability(state, action, next_state)
|
186
|
+
pr = next_states(state, action).map do |next_state|
|
187
|
+
transition_probability(state, action, next_state)
|
188
|
+
end.inject(:+)
|
187
189
|
prs << [[state, action], pr]
|
188
190
|
end
|
189
191
|
end
|
@@ -198,7 +200,7 @@ module FiniteMDP::Model
|
|
198
200
|
#
|
199
201
|
# @return [nil]
|
200
202
|
#
|
201
|
-
def check_transition_probabilities_sum
|
203
|
+
def check_transition_probabilities_sum(tol = 1e-6)
|
202
204
|
transition_probability_sums.each do |(state, action), pr|
|
203
205
|
raise "transition probabilities for state #{state.inspect} and
|
204
206
|
action #{action.inspect} sum to #{pr}" if pr < 1 - tol
|
@@ -230,9 +232,8 @@ module FiniteMDP::Model
|
|
230
232
|
all_states.merge ns
|
231
233
|
any_out_transitions ||= !ns.empty?
|
232
234
|
end
|
233
|
-
out_states << state if any_out_transitions
|
235
|
+
out_states << state if any_out_transitions
|
234
236
|
end
|
235
237
|
all_states - out_states
|
236
238
|
end
|
237
239
|
end
|
238
|
-
|