finite_mdp 0.0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +4 -1
- data/lib/finite_mdp/model.rb +48 -5
- data/lib/finite_mdp/solver.rb +52 -5
- data/lib/finite_mdp/version.rb +4 -1
- data/test/{finite_mdp_test.rb → finite_mdp/finite_mdp_test.rb} +37 -2
- metadata +59 -58
data/README.rdoc
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
* https://github.com/jdleesmiller/finite_mdp
|
4
4
|
|
5
|
+
{<img src="https://secure.travis-ci.org/jdleesmiller/finite_mdp.png"/>}[http://travis-ci.org/jdleesmiller/finite_mdp]
|
6
|
+
|
5
7
|
== SYNOPSIS
|
6
8
|
|
7
9
|
Solve small, finite Markov Decision Process (MDP) models.
|
@@ -159,7 +161,7 @@ absorbing state with zero reward, called :stop.
|
|
159
161
|
[-0.04, -0.04, -0.04, -0.04]],
|
160
162
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
161
163
|
|
162
|
-
# sanity check:
|
164
|
+
# sanity check: successor state probabilities must sum to 1
|
163
165
|
model.check_transition_probabilities_sum
|
164
166
|
|
165
167
|
solver = FiniteMDP::Solver.new(model, 1) # discount factor 1
|
@@ -197,6 +199,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
|
|
197
199
|
Tested on
|
198
200
|
* ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
|
199
201
|
* ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
|
202
|
+
* ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
|
200
203
|
|
201
204
|
== INSTALLATION
|
202
205
|
|
data/lib/finite_mdp/model.rb
CHANGED
@@ -172,6 +172,24 @@ module FiniteMDP::Model
|
|
172
172
|
raise NotImplementedError
|
173
173
|
end
|
174
174
|
|
175
|
+
#
|
176
|
+
# Sum of the transition probabilities for each (state, action) pair; the sums
|
177
|
+
# should be one in a valid model.
|
178
|
+
#
|
179
|
+
# @return [Hash<[State, Action], Float>]
|
180
|
+
#
|
181
|
+
def transition_probability_sums
|
182
|
+
prs = []
|
183
|
+
states.each do |state|
|
184
|
+
actions(state).each do |action|
|
185
|
+
pr = next_states(state, action).map{|next_state|
|
186
|
+
transition_probability(state, action, next_state)}.inject(:+)
|
187
|
+
prs << [[state, action], pr]
|
188
|
+
end
|
189
|
+
end
|
190
|
+
Hash[prs]
|
191
|
+
end
|
192
|
+
|
175
193
|
#
|
176
194
|
# Raise an error if the sum of the transition probabilities for any (state,
|
177
195
|
# action) pair is not sufficiently close to 1.
|
@@ -181,15 +199,40 @@ module FiniteMDP::Model
|
|
181
199
|
# @return [nil]
|
182
200
|
#
|
183
201
|
def check_transition_probabilities_sum tol=1e-6
|
202
|
+
transition_probability_sums.each do |(state, action), pr|
|
203
|
+
raise "transition probabilities for state #{state.inspect} and
|
204
|
+
action #{action.inspect} sum to #{pr}" if pr < 1 - tol
|
205
|
+
end
|
206
|
+
nil
|
207
|
+
end
|
208
|
+
|
209
|
+
#
|
210
|
+
# Set of states that have no transitions out.
|
211
|
+
#
|
212
|
+
# At present, this library can't solve a model with terminal states. However,
|
213
|
+
# you can add a dummy state (e.g. <tt>:stop</tt>) with zero reward that
|
214
|
+
# transitions back to itself with probability one.
|
215
|
+
#
|
216
|
+
# Note that if a state has transitions out, but all of them have probability
|
217
|
+
# zero, this method does not detect it as a terminal state. You can check for
|
218
|
+
# these using {#transition_probability_sums} instead.
|
219
|
+
#
|
220
|
+
# @return [Set]
|
221
|
+
#
|
222
|
+
def terminal_states
|
223
|
+
all_states = Set[]
|
224
|
+
out_states = Set[]
|
184
225
|
states.each do |state|
|
226
|
+
all_states << state
|
227
|
+
any_out_transitions = false
|
185
228
|
actions(state).each do |action|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
action #{action.inspect} sum to #{pr}" if pr < 1 - tol
|
229
|
+
ns = next_states(state, action)
|
230
|
+
all_states.merge ns
|
231
|
+
any_out_transitions ||= !ns.empty?
|
190
232
|
end
|
233
|
+
out_states << state if any_out_transitions
|
191
234
|
end
|
192
|
-
|
235
|
+
all_states - out_states
|
193
236
|
end
|
194
237
|
end
|
195
238
|
|
data/lib/finite_mdp/solver.rb
CHANGED
@@ -73,7 +73,7 @@ class FiniteMDP::Solver
|
|
73
73
|
|
74
74
|
#
|
75
75
|
# @return [Model] the model being solved; read only; do not change the model
|
76
|
-
#
|
76
|
+
# while it is being solved
|
77
77
|
#
|
78
78
|
attr_reader :model
|
79
79
|
|
@@ -90,6 +90,27 @@ class FiniteMDP::Solver
|
|
90
90
|
Hash[model.states.zip(@array_value)]
|
91
91
|
end
|
92
92
|
|
93
|
+
#
|
94
|
+
# Current state-action value estimates; whereas {#value} returns $V(s)$, this
|
95
|
+
# returns $Q(s,a)$, in the usual notation.
|
96
|
+
#
|
97
|
+
# @return [Hash<[state, action], Float>]
|
98
|
+
#
|
99
|
+
def state_action_value
|
100
|
+
q = {}
|
101
|
+
states = model.states
|
102
|
+
@array_model.each_with_index do |actions, state_n|
|
103
|
+
state = states[state_n]
|
104
|
+
state_actions = model.actions(state)
|
105
|
+
actions.each_with_index do |next_state_ns, action_n|
|
106
|
+
q_sa = next_state_ns.map {|next_state_n, pr, r|
|
107
|
+
pr * (r + @discount * @array_value[next_state_n])}.inject(:+)
|
108
|
+
q[[state, state_actions[action_n]]] = q_sa
|
109
|
+
end
|
110
|
+
end
|
111
|
+
q
|
112
|
+
end
|
113
|
+
|
93
114
|
#
|
94
115
|
# Current estimate of the optimal action for each state.
|
95
116
|
#
|
@@ -232,6 +253,13 @@ class FiniteMDP::Solver
|
|
232
253
|
#
|
233
254
|
# @return [Boolean] true iff iteration converged to within tolerance
|
234
255
|
#
|
256
|
+
# @yield [num_iters, delta] at the end of each iteration
|
257
|
+
#
|
258
|
+
# @yieldparam [Integer] num_iters iterations done so far
|
259
|
+
#
|
260
|
+
# @yieldparam [Float] delta largest change in the value function in the last
|
261
|
+
# iteration
|
262
|
+
#
|
235
263
|
def value_iteration tolerance, max_iters=nil
|
236
264
|
delta = Float::MAX
|
237
265
|
num_iters = 0
|
@@ -240,7 +268,8 @@ class FiniteMDP::Solver
|
|
240
268
|
num_iters += 1
|
241
269
|
|
242
270
|
break if delta < tolerance
|
243
|
-
break if max_iters && num_iters
|
271
|
+
break if max_iters && num_iters >= max_iters
|
272
|
+
yield num_iters, delta if block_given?
|
244
273
|
end
|
245
274
|
delta < tolerance
|
246
275
|
end
|
@@ -263,6 +292,18 @@ class FiniteMDP::Solver
|
|
263
292
|
#
|
264
293
|
# @return [Boolean] true iff a stable policy was obtained
|
265
294
|
#
|
295
|
+
# @yield [num_policy_iters, num_value_iters, delta] at the end of each
|
296
|
+
# policy evaluation iteration
|
297
|
+
#
|
298
|
+
# @yieldparam [Integer] num_policy_iters policy improvement iterations done so
|
299
|
+
# far
|
300
|
+
#
|
301
|
+
# @yieldparam [Integer] num_value_iters policy evaluation iterations done so
|
302
|
+
# far for the current policy improvement iteration
|
303
|
+
#
|
304
|
+
# @yieldparam [Float] delta largest change in the value function in the last
|
305
|
+
# policy evaluation iteration
|
306
|
+
#
|
266
307
|
def policy_iteration value_tolerance, max_value_iters=nil,
|
267
308
|
max_policy_iters=nil
|
268
309
|
|
@@ -276,14 +317,15 @@ class FiniteMDP::Solver
|
|
276
317
|
num_value_iters += 1
|
277
318
|
|
278
319
|
break if value_delta < value_tolerance
|
279
|
-
break if max_value_iters && num_value_iters
|
320
|
+
break if max_value_iters && num_value_iters >= max_value_iters
|
321
|
+
yield num_policy_iters, num_value_iters, value_delta if block_given?
|
280
322
|
end
|
281
323
|
|
282
324
|
# policy improvement
|
283
325
|
stable = improve_policy
|
284
326
|
num_policy_iters += 1
|
285
327
|
break if stable
|
286
|
-
break if max_policy_iters && num_policy_iters
|
328
|
+
break if max_policy_iters && num_policy_iters >= max_policy_iters
|
287
329
|
end
|
288
330
|
stable
|
289
331
|
end
|
@@ -297,6 +339,10 @@ class FiniteMDP::Solver
|
|
297
339
|
#
|
298
340
|
# @return [Boolean] true iff a stable policy was obtained
|
299
341
|
#
|
342
|
+
# @yield [num_iters] at the end of each iteration
|
343
|
+
#
|
344
|
+
# @yieldparam [Integer] num_iters policy improvement iterations done so far
|
345
|
+
#
|
300
346
|
def policy_iteration_exact max_iters=nil
|
301
347
|
stable = false
|
302
348
|
num_iters = 0
|
@@ -305,7 +351,8 @@ class FiniteMDP::Solver
|
|
305
351
|
stable = improve_policy
|
306
352
|
num_iters += 1
|
307
353
|
break if stable
|
308
|
-
break if max_iters && num_iters
|
354
|
+
break if max_iters && num_iters >= max_iters
|
355
|
+
yield num_iters if block_given?
|
309
356
|
end
|
310
357
|
stable
|
311
358
|
end
|
data/lib/finite_mdp/version.rb
CHANGED
@@ -9,10 +9,16 @@ require 'set'
|
|
9
9
|
class TestFiniteMDP < Test::Unit::TestCase
|
10
10
|
include FiniteMDP
|
11
11
|
|
12
|
+
def assert_close expected, actual, tol=1e-6
|
13
|
+
assert (expected - actual).abs < tol,
|
14
|
+
"expected #{actual} to be within #{tol} of #{expected}"
|
15
|
+
end
|
16
|
+
|
12
17
|
# check that we get the same model back; model parameters must be set before
|
13
18
|
# calling; see test_recycling_robot
|
14
19
|
def check_recycling_robot_model model, sparse
|
15
20
|
model.check_transition_probabilities_sum
|
21
|
+
assert_equal Set[], model.terminal_states
|
16
22
|
|
17
23
|
assert_equal Set[:high, :low], Set[*model.states]
|
18
24
|
assert_equal Set[:search, :wait], Set[*model.actions(:high)]
|
@@ -113,13 +119,31 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
113
119
|
|
114
120
|
# try solving with policy iteration using iterative policy evaluation
|
115
121
|
solver = Solver.new(table_model, 0.95, Hash.new {:wait})
|
116
|
-
assert solver.policy_iteration(1e-4, 2,
|
122
|
+
assert solver.policy_iteration(1e-4, 2, 50), "did not find stable policy"
|
117
123
|
assert_equal({:high => :search, :low => :recharge}, solver.policy)
|
118
124
|
|
119
125
|
# try solving with policy iteration using exact policy evaluation
|
120
|
-
|
126
|
+
gamma = 0.95
|
127
|
+
solver = Solver.new(table_model, gamma, Hash.new {:wait})
|
121
128
|
assert solver.policy_iteration_exact(20), "did not find stable policy"
|
122
129
|
assert_equal({:high => :search, :low => :recharge}, solver.policy)
|
130
|
+
|
131
|
+
# check the corresponding state-action values (Q(s,a) values)
|
132
|
+
v = solver.value
|
133
|
+
q_high_search = @alpha * (@r_search + gamma * v[:high]) +
|
134
|
+
(1-@alpha) * (@r_search + gamma * v[:low])
|
135
|
+
q_high_wait = @r_wait + gamma * v[:high]
|
136
|
+
q_low_search = (1-@beta) * (@r_rescue + gamma * v[:high]) +
|
137
|
+
@beta * (@r_search + gamma * v[:low])
|
138
|
+
q_low_wait = @r_wait + gamma * v[:low]
|
139
|
+
q_low_recharge = 0 + gamma * v[:high]
|
140
|
+
|
141
|
+
q = solver.state_action_value
|
142
|
+
assert_close q[[:high, :search]], q_high_search
|
143
|
+
assert_close q[[:high, :wait]], q_high_wait
|
144
|
+
assert_close q[[:low, :search]], q_low_search
|
145
|
+
assert_close q[[:low, :wait]], q_low_wait
|
146
|
+
assert_close q[[:low, :recharge]], q_low_recharge
|
123
147
|
end
|
124
148
|
|
125
149
|
#
|
@@ -239,6 +263,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
239
263
|
[-0.04, -0.04, -0.04, -0.04]],
|
240
264
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
241
265
|
model.check_transition_probabilities_sum
|
266
|
+
assert_equal Set[], model.terminal_states
|
242
267
|
|
243
268
|
assert_equal Set[
|
244
269
|
[0, 0], [0, 1], [0, 2], [0, 3],
|
@@ -278,6 +303,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
278
303
|
[ r, r, r, r]],
|
279
304
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
280
305
|
model.check_transition_probabilities_sum
|
306
|
+
assert_equal Set[], model.terminal_states # no actual terminals
|
281
307
|
|
282
308
|
check_grid_solutions model,
|
283
309
|
["> > > ",
|
@@ -294,6 +320,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
294
320
|
[ r, r, r, r]],
|
295
321
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
296
322
|
model.check_transition_probabilities_sum
|
323
|
+
assert_equal Set[], model.terminal_states # no actual terminals
|
297
324
|
|
298
325
|
check_grid_solutions model,
|
299
326
|
["> > > ",
|
@@ -310,6 +337,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
310
337
|
[ r, r, r, r]],
|
311
338
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
312
339
|
model.check_transition_probabilities_sum
|
340
|
+
assert_equal Set[], model.terminal_states # no actual terminals
|
313
341
|
|
314
342
|
check_grid_solutions model,
|
315
343
|
["> > > ",
|
@@ -343,5 +371,12 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
343
371
|
assert p1.eql?(p3)
|
344
372
|
assert_equal p1.hash, p3.hash
|
345
373
|
end
|
374
|
+
|
375
|
+
def test_incomplete_model
|
376
|
+
# model with a transition from a to b but no transitions from b
|
377
|
+
table_model = TableModel.new [
|
378
|
+
[:a, :a_a, :b, 1, 0]]
|
379
|
+
assert_equal Set[:b], table_model.terminal_states
|
380
|
+
end
|
346
381
|
end
|
347
382
|
|
metadata
CHANGED
@@ -1,94 +1,95 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: finite_mdp
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
4
5
|
prerelease:
|
5
|
-
version: 0.0.1
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- John Lees-Miller
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-02-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
16
15
|
name: narray
|
17
|
-
|
18
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &85251730 !ruby/object:Gem::Requirement
|
19
17
|
none: false
|
20
|
-
requirements:
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.5.9
|
18
|
+
requirements:
|
24
19
|
- - ~>
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version:
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.5.9
|
27
22
|
type: :runtime
|
28
|
-
version_requirements: *id001
|
29
|
-
- !ruby/object:Gem::Dependency
|
30
|
-
name: gemma
|
31
23
|
prerelease: false
|
32
|
-
|
24
|
+
version_requirements: *85251730
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: gemma
|
27
|
+
requirement: &85251480 !ruby/object:Gem::Requirement
|
33
28
|
none: false
|
34
|
-
requirements:
|
35
|
-
- - ">="
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 1.0.1
|
29
|
+
requirements:
|
38
30
|
- - ~>
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version:
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.1.0
|
41
33
|
type: :development
|
42
|
-
|
43
|
-
|
44
|
-
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *85251480
|
36
|
+
description: ! 'This library provides several ways of describing a
|
37
|
+
|
38
|
+
finite Markov Decision Process (MDP) model (see FiniteMDP::Model) and some
|
39
|
+
|
40
|
+
reasonably efficient implementations of policy iteration and value iteration to
|
41
|
+
|
42
|
+
solve it (see FiniteMDP::Solver).'
|
43
|
+
email:
|
45
44
|
- jdleesmiller@gmail.com
|
46
45
|
executables: []
|
47
|
-
|
48
46
|
extensions: []
|
49
|
-
|
50
|
-
extra_rdoc_files:
|
47
|
+
extra_rdoc_files:
|
51
48
|
- README.rdoc
|
52
|
-
files:
|
53
|
-
- lib/finite_mdp
|
54
|
-
- lib/finite_mdp/vector_valued.rb
|
55
|
-
- lib/finite_mdp/model.rb
|
56
|
-
- lib/finite_mdp/version.rb
|
49
|
+
files:
|
50
|
+
- lib/finite_mdp.rb
|
57
51
|
- lib/finite_mdp/solver.rb
|
52
|
+
- lib/finite_mdp/version.rb
|
58
53
|
- lib/finite_mdp/table_model.rb
|
59
|
-
- lib/finite_mdp.rb
|
54
|
+
- lib/finite_mdp/hash_model.rb
|
55
|
+
- lib/finite_mdp/model.rb
|
56
|
+
- lib/finite_mdp/vector_valued.rb
|
60
57
|
- README.rdoc
|
61
|
-
- test/finite_mdp_test.rb
|
58
|
+
- test/finite_mdp/finite_mdp_test.rb
|
62
59
|
homepage: http://github.com/jdleesmiller/finite_mdp
|
63
60
|
licenses: []
|
64
|
-
|
65
61
|
post_install_message:
|
66
|
-
rdoc_options:
|
62
|
+
rdoc_options:
|
67
63
|
- --main
|
68
64
|
- README.rdoc
|
69
65
|
- --title
|
70
|
-
- finite_mdp-0.
|
71
|
-
require_paths:
|
66
|
+
- finite_mdp-0.1.1 Documentation
|
67
|
+
require_paths:
|
72
68
|
- lib
|
73
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
70
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version:
|
79
|
-
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
hash: -310962355
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
79
|
none: false
|
81
|
-
requirements:
|
82
|
-
- -
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
version:
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
segments:
|
85
|
+
- 0
|
86
|
+
hash: -310962355
|
85
87
|
requirements: []
|
86
|
-
|
87
88
|
rubyforge_project: finite_mdp
|
88
|
-
rubygems_version: 1.
|
89
|
+
rubygems_version: 1.8.10
|
89
90
|
signing_key:
|
90
91
|
specification_version: 3
|
91
|
-
summary: Solve small finite Markov Decision Process models.
|
92
|
-
test_files:
|
93
|
-
- test/finite_mdp_test.rb
|
92
|
+
summary: Solve small, finite Markov Decision Process models.
|
93
|
+
test_files:
|
94
|
+
- test/finite_mdp/finite_mdp_test.rb
|
94
95
|
has_rdoc:
|