finite_mdp 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.rdoc +8 -12
- data/lib/finite_mdp/array_model.rb +226 -0
- data/lib/finite_mdp/hash_model.rb +10 -9
- data/lib/finite_mdp/model.rb +19 -18
- data/lib/finite_mdp/solver.rb +96 -83
- data/lib/finite_mdp/table_model.rb +28 -19
- data/lib/finite_mdp/vector_valued.rb +5 -5
- data/lib/finite_mdp/version.rb +2 -1
- data/lib/finite_mdp.rb +3 -2
- data/test/finite_mdp/finite_mdp_test.rb +151 -98
- metadata +33 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abf81db7b691f5238c134d835f819f75609fa17c
|
4
|
+
data.tar.gz: 4d897e26e7cc8e8aaffd5c5ce80855d3de55fbb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8711791575db42460dc233ab92a787697731e833c3f121a351ddb1a3dc690c411ec00ff99a0dd996b33fd1be6cae4ea1a423354c013e709f4cb12968b1e3d0c8
|
7
|
+
data.tar.gz: aae51101f51e9d60f9b648b58a3ea6443f8228b8ddee4b00c1d55861d8fd89f8c48d4b35174c4fbda2dbdff037d7631d64ecc3f45957e529e5720087e43fe2b3
|
data/README.rdoc
CHANGED
@@ -94,10 +94,10 @@ absorbing state with zero reward, called :stop.
|
|
94
94
|
|
95
95
|
# can move north, east, south or west on the grid
|
96
96
|
MOVES = {
|
97
|
-
'^' => [-1, 0],
|
98
|
-
'>' => [ 0, 1],
|
99
|
-
'v' => [ 1, 0],
|
100
|
-
'<' => [ 0, -1]}
|
97
|
+
'^' => [-1, 0],
|
98
|
+
'>' => [ 0, 1],
|
99
|
+
'v' => [ 1, 0],
|
100
|
+
'<' => [ 0, -1]}
|
101
101
|
|
102
102
|
# agent can move north, south, east or west (unless it's in the :stop
|
103
103
|
# state); if it tries to move off the grid or into an obstacle, it stays
|
@@ -169,8 +169,8 @@ absorbing state with zero reward, called :stop.
|
|
169
169
|
|
170
170
|
puts model.pretty_policy(solver.policy)
|
171
171
|
# output: (matches Figure 17.2(a))
|
172
|
-
# > > >
|
173
|
-
# ^ ^
|
172
|
+
# > > >
|
173
|
+
# ^ ^
|
174
174
|
# ^ < < <
|
175
175
|
|
176
176
|
puts model.pretty_value(solver.value)
|
@@ -196,10 +196,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
|
|
196
196
|
|
197
197
|
== REQUIREMENTS
|
198
198
|
|
199
|
-
|
200
|
-
* ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
|
201
|
-
* ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
|
202
|
-
* ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
|
199
|
+
This gem requires ruby 2.2 or higher.
|
203
200
|
|
204
201
|
== INSTALLATION
|
205
202
|
|
@@ -209,7 +206,7 @@ Tested on
|
|
209
206
|
|
210
207
|
(The MIT License)
|
211
208
|
|
212
|
-
Copyright (c)
|
209
|
+
Copyright (c) 2016 John Lees-Miller
|
213
210
|
|
214
211
|
Permission is hereby granted, free of charge, to any person obtaining
|
215
212
|
a copy of this software and associated documentation files (the
|
@@ -229,4 +226,3 @@ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
229
226
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
230
227
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
231
228
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
232
|
-
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# A finite markov decision process model for which the states, transition
|
4
|
+
# probabilities and rewards are stored in a sparse nested array format:
|
5
|
+
# model[state_num][action_num] = [[next_state_num, probability, reward], ...]
|
6
|
+
#
|
7
|
+
# Note: The action_num is not consistent between states --- each state's action
|
8
|
+
# array contains only the actions that apply in that state.
|
9
|
+
#
|
10
|
+
# This class also maintains a {StateActionMap} to map between the state and
|
11
|
+
# action numbers and the original states and actions.
|
12
|
+
#
|
13
|
+
class FiniteMDP::ArrayModel
|
14
|
+
include FiniteMDP::Model
|
15
|
+
|
16
|
+
#
|
17
|
+
# Map between states and actions and their corresponding indexes. This is used
|
18
|
+
# with an {ArrayModel}, which works only with the indexes internally.
|
19
|
+
#
|
20
|
+
class StateActionMap
|
21
|
+
def initialize(map = [])
|
22
|
+
@map = map
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :map
|
26
|
+
|
27
|
+
def add(state, actions)
|
28
|
+
@map << [state, actions]
|
29
|
+
end
|
30
|
+
|
31
|
+
def states
|
32
|
+
@map.map { |state, _actions| state }
|
33
|
+
end
|
34
|
+
|
35
|
+
def actions(state)
|
36
|
+
_state, actions = @map[state_index(state)]
|
37
|
+
actions
|
38
|
+
end
|
39
|
+
|
40
|
+
def state_action_index(state, action)
|
41
|
+
index = state_index(state)
|
42
|
+
[index, @map[index][1].index(action)]
|
43
|
+
end
|
44
|
+
|
45
|
+
def state(index)
|
46
|
+
@map[index][0]
|
47
|
+
end
|
48
|
+
|
49
|
+
def state_index(state)
|
50
|
+
@map.index { |test_state, _actions| test_state == state }
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Build from a model.
|
55
|
+
#
|
56
|
+
# @param [Model] model
|
57
|
+
#
|
58
|
+
# @param [Boolean] ordered assume states are orderable; default is to
|
59
|
+
# inspect the first state
|
60
|
+
#
|
61
|
+
def self.from_model(model, ordered = nil)
|
62
|
+
model_states = model.states
|
63
|
+
|
64
|
+
ordered = model_states.first.respond_to?(:>=) if ordered.nil?
|
65
|
+
map = ordered ? OrderedStateActionMap.new : StateActionMap.new
|
66
|
+
model_states.each do |state|
|
67
|
+
map.add(state, model.actions(state))
|
68
|
+
end
|
69
|
+
map
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# A {StateActionMap} for states that support ordering. Lookups are more
|
75
|
+
# efficient than for an ordinary {StateActionMap}, which does not assume that
|
76
|
+
# states can be ordered.
|
77
|
+
#
|
78
|
+
class OrderedStateActionMap < StateActionMap
|
79
|
+
def add(state, actions)
|
80
|
+
index = state_index(state)
|
81
|
+
@map.insert(index || @map.size, [state, actions])
|
82
|
+
end
|
83
|
+
|
84
|
+
def state_index(state)
|
85
|
+
(0...@map.size).bsearch { |i| @map[i][0] >= state }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# @param [Array<Array<Array>>] array see notes for {ArrayModel}
|
91
|
+
# @param [StateActionMap] state_action_map
|
92
|
+
#
|
93
|
+
def initialize(array, state_action_map)
|
94
|
+
@array = array
|
95
|
+
@state_action_map = state_action_map
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# @return [Array<Array<Array>>>] array see notes for {ArrayModel}
|
100
|
+
#
|
101
|
+
attr_reader :array
|
102
|
+
|
103
|
+
#
|
104
|
+
# @return [StateActionMap]
|
105
|
+
#
|
106
|
+
attr_reader :state_action_map
|
107
|
+
|
108
|
+
#
|
109
|
+
# States in this model; see {Model#states}.
|
110
|
+
#
|
111
|
+
# @return [Array<state>] not empty; no duplicate states
|
112
|
+
#
|
113
|
+
def states
|
114
|
+
@state_action_map.states
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# Number of states in the model.
|
119
|
+
#
|
120
|
+
# @return [Fixnum] positive
|
121
|
+
#
|
122
|
+
def num_states
|
123
|
+
@state_action_map.map.size
|
124
|
+
end
|
125
|
+
|
126
|
+
#
|
127
|
+
# Actions that are valid for the given state; see {Model#actions}.
|
128
|
+
#
|
129
|
+
# @param [state] state
|
130
|
+
#
|
131
|
+
# @return [Array<state>] not empty; no duplicate actions
|
132
|
+
#
|
133
|
+
def actions(state)
|
134
|
+
@state_action_map.actions(state)
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
# Possible successor states after taking the given action in the given state;
|
139
|
+
# see {Model#next_states}.
|
140
|
+
#
|
141
|
+
# @param [state] state
|
142
|
+
#
|
143
|
+
# @param [action] action
|
144
|
+
#
|
145
|
+
# @return [Array<state>] not empty; no duplicates
|
146
|
+
#
|
147
|
+
def next_states(state, action)
|
148
|
+
state_index, action_index =
|
149
|
+
@state_action_map.state_action_index(state, action)
|
150
|
+
@array[state_index][action_index].map do |next_state_index, _pr, _reward|
|
151
|
+
@state_action_map.state(next_state_index)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
#
|
156
|
+
# Probability of the given transition; see {Model#transition_probability}.
|
157
|
+
#
|
158
|
+
# @param [state] state
|
159
|
+
#
|
160
|
+
# @param [action] action
|
161
|
+
#
|
162
|
+
# @param [state] next_state
|
163
|
+
#
|
164
|
+
# @return [Float] in [0, 1]; zero if the transition is not in the model
|
165
|
+
#
|
166
|
+
def transition_probability(state, action, next_state)
|
167
|
+
state_index, action_index =
|
168
|
+
@state_action_map.state_action_index(state, action)
|
169
|
+
next_state_index = @state_action_map.state_index(next_state)
|
170
|
+
@array[state_index][action_index].each do |index, probability, _reward|
|
171
|
+
return probability if index == next_state_index
|
172
|
+
end
|
173
|
+
0
|
174
|
+
end
|
175
|
+
|
176
|
+
#
|
177
|
+
# Reward for a given transition; see {Model#reward}.
|
178
|
+
#
|
179
|
+
# @param [state] state
|
180
|
+
#
|
181
|
+
# @param [action] action
|
182
|
+
#
|
183
|
+
# @param [state] next_state
|
184
|
+
#
|
185
|
+
# @return [Float, nil] nil if the transition is not in the model
|
186
|
+
#
|
187
|
+
def reward(state, action, next_state)
|
188
|
+
state_index, action_index =
|
189
|
+
@state_action_map.state_action_index(state, action)
|
190
|
+
next_state_index = @state_action_map.state_index(next_state)
|
191
|
+
@array[state_index][action_index].each do |index, _probability, reward|
|
192
|
+
return reward if index == next_state_index
|
193
|
+
end
|
194
|
+
nil
|
195
|
+
end
|
196
|
+
|
197
|
+
#
|
198
|
+
# Convert a generic model into a hash model.
|
199
|
+
#
|
200
|
+
# @param [Model] model
|
201
|
+
#
|
202
|
+
# @param [Boolean] sparse do not store entries for transitions with zero
|
203
|
+
# probability
|
204
|
+
#
|
205
|
+
# @param [Boolean] ordered assume states are orderable; default is to inspect
|
206
|
+
# the first state
|
207
|
+
#
|
208
|
+
# @return [ArrayModel]
|
209
|
+
#
|
210
|
+
def self.from_model(model, sparse = true, ordered = nil)
|
211
|
+
state_action_map = StateActionMap.from_model(model, ordered)
|
212
|
+
|
213
|
+
array = state_action_map.states.map do |state|
|
214
|
+
state_action_map.actions(state).map do |action|
|
215
|
+
model.next_states(state, action).map do |next_state|
|
216
|
+
pr = model.transition_probability(state, action, next_state)
|
217
|
+
next unless pr > 0 || !sparse
|
218
|
+
reward = model.reward(state, action, next_state)
|
219
|
+
[state_action_map.state_index(next_state), pr, reward]
|
220
|
+
end.compact
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
FiniteMDP::ArrayModel.new(array, state_action_map)
|
225
|
+
end
|
226
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# A finite markov decision process model for which the transition
|
3
4
|
# probabilities and rewards are specified using nested hash tables.
|
@@ -18,7 +19,7 @@ class FiniteMDP::HashModel
|
|
18
19
|
# @param [Hash<state, Hash<action, Hash<state, [Float, Float]>>>] hash see
|
19
20
|
# notes for {HashModel} for an explanation of this structure
|
20
21
|
#
|
21
|
-
def initialize
|
22
|
+
def initialize(hash)
|
22
23
|
@hash = hash
|
23
24
|
end
|
24
25
|
|
@@ -44,7 +45,7 @@ class FiniteMDP::HashModel
|
|
44
45
|
#
|
45
46
|
# @return [Array<action>] not empty; no duplicate actions
|
46
47
|
#
|
47
|
-
def actions
|
48
|
+
def actions(state)
|
48
49
|
hash[state].keys
|
49
50
|
end
|
50
51
|
|
@@ -58,7 +59,7 @@ class FiniteMDP::HashModel
|
|
58
59
|
#
|
59
60
|
# @return [Array<state>] not empty; no duplicate states
|
60
61
|
#
|
61
|
-
def next_states
|
62
|
+
def next_states(state, action)
|
62
63
|
hash[state][action].keys
|
63
64
|
end
|
64
65
|
|
@@ -73,7 +74,7 @@ class FiniteMDP::HashModel
|
|
73
74
|
#
|
74
75
|
# @return [Float] in [0, 1]; zero if the transition is not in the hash
|
75
76
|
#
|
76
|
-
def transition_probability
|
77
|
+
def transition_probability(state, action, next_state)
|
77
78
|
probability, _reward = hash[state][action][next_state]
|
78
79
|
probability || 0
|
79
80
|
end
|
@@ -89,7 +90,7 @@ class FiniteMDP::HashModel
|
|
89
90
|
#
|
90
91
|
# @return [Float, nil] nil if the transition is not in the hash
|
91
92
|
#
|
92
|
-
def reward
|
93
|
+
def reward(state, action, next_state)
|
93
94
|
_probability, reward = hash[state][action][next_state]
|
94
95
|
reward
|
95
96
|
end
|
@@ -104,7 +105,7 @@ class FiniteMDP::HashModel
|
|
104
105
|
#
|
105
106
|
# @return [HashModel] not nil
|
106
107
|
#
|
107
|
-
def self.from_model
|
108
|
+
def self.from_model(model, sparse = true)
|
108
109
|
hash = {}
|
109
110
|
model.states.each do |state|
|
110
111
|
hash[state] ||= {}
|
@@ -112,12 +113,12 @@ class FiniteMDP::HashModel
|
|
112
113
|
hash[state][action] ||= {}
|
113
114
|
model.next_states(state, action).each do |next_state|
|
114
115
|
pr = model.transition_probability(state, action, next_state)
|
115
|
-
|
116
|
-
|
116
|
+
next unless pr > 0 || !sparse
|
117
|
+
hash[state][action][next_state] =
|
118
|
+
[pr, model.reward(state, action, next_state)]
|
117
119
|
end
|
118
120
|
end
|
119
121
|
end
|
120
122
|
FiniteMDP::HashModel.new(hash)
|
121
123
|
end
|
122
124
|
end
|
123
|
-
|
data/lib/finite_mdp/model.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#
|
2
3
|
# Interface that defines a finite markov decision process model.
|
3
4
|
#
|
@@ -93,13 +94,13 @@ module FiniteMDP::Model
|
|
93
94
|
# All states must have at least one valid action; see notes for {Model}
|
94
95
|
# regarding how to encode a terminal state.
|
95
96
|
#
|
96
|
-
# @param [state]
|
97
|
+
# @param [state] _state
|
97
98
|
#
|
98
99
|
# @return [Array<action>] not empty; no duplicate actions
|
99
100
|
#
|
100
101
|
# @abstract
|
101
102
|
#
|
102
|
-
def actions
|
103
|
+
def actions(_state)
|
103
104
|
raise NotImplementedError
|
104
105
|
end
|
105
106
|
|
@@ -115,13 +116,13 @@ module FiniteMDP::Model
|
|
115
116
|
# ignores them in its internal representation, so you can usually forget about
|
116
117
|
# this method.
|
117
118
|
#
|
118
|
-
# @param [state]
|
119
|
+
# @param [state] _state
|
119
120
|
#
|
120
|
-
# @param [action]
|
121
|
+
# @param [action] _action
|
121
122
|
#
|
122
123
|
# @return [Array<state>] not empty; no duplicate states
|
123
124
|
#
|
124
|
-
def next_states
|
125
|
+
def next_states(_state, _action)
|
125
126
|
states
|
126
127
|
end
|
127
128
|
|
@@ -134,18 +135,18 @@ module FiniteMDP::Model
|
|
134
135
|
# {TableModel#transition_probability} return zero in this case, but this is
|
135
136
|
# not part of the contract.
|
136
137
|
#
|
137
|
-
# @param [state]
|
138
|
+
# @param [state] _state
|
138
139
|
#
|
139
|
-
# @param [action]
|
140
|
+
# @param [action] _action
|
140
141
|
#
|
141
|
-
# @param [state]
|
142
|
+
# @param [state] _next_state
|
142
143
|
#
|
143
144
|
# @return [Float] in [0, 1]; undefined if the transition is not in the model
|
144
145
|
# (see notes above)
|
145
146
|
#
|
146
147
|
# @abstract
|
147
148
|
#
|
148
|
-
def transition_probability
|
149
|
+
def transition_probability(_state, _action, _next_state)
|
149
150
|
raise NotImplementedError
|
150
151
|
end
|
151
152
|
|
@@ -157,18 +158,18 @@ module FiniteMDP::Model
|
|
157
158
|
# undefined. Note that {HashModel#reward} and {TableModel#reward} return
|
158
159
|
# <tt>nil</tt> in this case, but this is not part of the contract.
|
159
160
|
#
|
160
|
-
# @param [state]
|
161
|
+
# @param [state] _state
|
161
162
|
#
|
162
|
-
# @param [action]
|
163
|
+
# @param [action] _action
|
163
164
|
#
|
164
|
-
# @param [state]
|
165
|
+
# @param [state] _next_state
|
165
166
|
#
|
166
167
|
# @return [Float, nil] nil only if the transition is not in the model (but the
|
167
168
|
# result is undefined in this case -- it need not be nil; see notes above)
|
168
169
|
#
|
169
170
|
# @abstract
|
170
171
|
#
|
171
|
-
def reward
|
172
|
+
def reward(_state, _action, _next_state)
|
172
173
|
raise NotImplementedError
|
173
174
|
end
|
174
175
|
|
@@ -182,8 +183,9 @@ module FiniteMDP::Model
|
|
182
183
|
prs = []
|
183
184
|
states.each do |state|
|
184
185
|
actions(state).each do |action|
|
185
|
-
pr = next_states(state, action).map
|
186
|
-
transition_probability(state, action, next_state)
|
186
|
+
pr = next_states(state, action).map do |next_state|
|
187
|
+
transition_probability(state, action, next_state)
|
188
|
+
end.inject(:+)
|
187
189
|
prs << [[state, action], pr]
|
188
190
|
end
|
189
191
|
end
|
@@ -198,7 +200,7 @@ module FiniteMDP::Model
|
|
198
200
|
#
|
199
201
|
# @return [nil]
|
200
202
|
#
|
201
|
-
def check_transition_probabilities_sum
|
203
|
+
def check_transition_probabilities_sum(tol = 1e-6)
|
202
204
|
transition_probability_sums.each do |(state, action), pr|
|
203
205
|
raise "transition probabilities for state #{state.inspect} and
|
204
206
|
action #{action.inspect} sum to #{pr}" if pr < 1 - tol
|
@@ -230,9 +232,8 @@ module FiniteMDP::Model
|
|
230
232
|
all_states.merge ns
|
231
233
|
any_out_transitions ||= !ns.empty?
|
232
234
|
end
|
233
|
-
out_states << state if any_out_transitions
|
235
|
+
out_states << state if any_out_transitions
|
234
236
|
end
|
235
237
|
all_states - out_states
|
236
238
|
end
|
237
239
|
end
|
238
|
-
|