finite_mdp 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +4 -1
- data/lib/finite_mdp/model.rb +48 -5
- data/lib/finite_mdp/solver.rb +52 -5
- data/lib/finite_mdp/version.rb +4 -1
- data/test/{finite_mdp_test.rb → finite_mdp/finite_mdp_test.rb} +37 -2
- metadata +59 -58
data/README.rdoc
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
* https://github.com/jdleesmiller/finite_mdp
|
4
4
|
|
5
|
+
{<img src="https://secure.travis-ci.org/jdleesmiller/finite_mdp.png"/>}[http://travis-ci.org/jdleesmiller/finite_mdp]
|
6
|
+
|
5
7
|
== SYNOPSIS
|
6
8
|
|
7
9
|
Solve small, finite Markov Decision Process (MDP) models.
|
@@ -159,7 +161,7 @@ absorbing state with zero reward, called :stop.
|
|
159
161
|
[-0.04, -0.04, -0.04, -0.04]],
|
160
162
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
161
163
|
|
162
|
-
# sanity check:
|
164
|
+
# sanity check: successor state probabilities must sum to 1
|
163
165
|
model.check_transition_probabilities_sum
|
164
166
|
|
165
167
|
solver = FiniteMDP::Solver.new(model, 1) # discount factor 1
|
@@ -197,6 +199,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
|
|
197
199
|
Tested on
|
198
200
|
* ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
|
199
201
|
* ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
|
202
|
+
* ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
|
200
203
|
|
201
204
|
== INSTALLATION
|
202
205
|
|
data/lib/finite_mdp/model.rb
CHANGED
@@ -172,6 +172,24 @@ module FiniteMDP::Model
|
|
172
172
|
raise NotImplementedError
|
173
173
|
end
|
174
174
|
|
175
|
+
#
|
176
|
+
# Sum of the transition probabilities for each (state, action) pair; the sums
|
177
|
+
# should be one in a valid model.
|
178
|
+
#
|
179
|
+
# @return [Hash<[State, Action], Float>]
|
180
|
+
#
|
181
|
+
def transition_probability_sums
|
182
|
+
prs = []
|
183
|
+
states.each do |state|
|
184
|
+
actions(state).each do |action|
|
185
|
+
pr = next_states(state, action).map{|next_state|
|
186
|
+
transition_probability(state, action, next_state)}.inject(:+)
|
187
|
+
prs << [[state, action], pr]
|
188
|
+
end
|
189
|
+
end
|
190
|
+
Hash[prs]
|
191
|
+
end
|
192
|
+
|
175
193
|
#
|
176
194
|
# Raise an error if the sum of the transition probabilities for any (state,
|
177
195
|
# action) pair is not sufficiently close to 1.
|
@@ -181,15 +199,40 @@ module FiniteMDP::Model
|
|
181
199
|
# @return [nil]
|
182
200
|
#
|
183
201
|
def check_transition_probabilities_sum tol=1e-6
|
202
|
+
transition_probability_sums.each do |(state, action), pr|
|
203
|
+
raise "transition probabilities for state #{state.inspect} and
|
204
|
+
action #{action.inspect} sum to #{pr}" if pr < 1 - tol
|
205
|
+
end
|
206
|
+
nil
|
207
|
+
end
|
208
|
+
|
209
|
+
#
|
210
|
+
# Set of states that have no transitions out.
|
211
|
+
#
|
212
|
+
# At present, this library can't solve a model with terminal states. However,
|
213
|
+
# you can add a dummy state (e.g. <tt>:stop</tt>) with zero reward that
|
214
|
+
# transitions back to itself with probability one.
|
215
|
+
#
|
216
|
+
# Note that if a state has transitions out, but all of them have probability
|
217
|
+
# zero, this method does not detect it as a terminal state. You can check for
|
218
|
+
# these using {#transition_probability_sums} instead.
|
219
|
+
#
|
220
|
+
# @return [Set]
|
221
|
+
#
|
222
|
+
def terminal_states
|
223
|
+
all_states = Set[]
|
224
|
+
out_states = Set[]
|
184
225
|
states.each do |state|
|
226
|
+
all_states << state
|
227
|
+
any_out_transitions = false
|
185
228
|
actions(state).each do |action|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
action #{action.inspect} sum to #{pr}" if pr < 1 - tol
|
229
|
+
ns = next_states(state, action)
|
230
|
+
all_states.merge ns
|
231
|
+
any_out_transitions ||= !ns.empty?
|
190
232
|
end
|
233
|
+
out_states << state if any_out_transitions
|
191
234
|
end
|
192
|
-
|
235
|
+
all_states - out_states
|
193
236
|
end
|
194
237
|
end
|
195
238
|
|
data/lib/finite_mdp/solver.rb
CHANGED
@@ -73,7 +73,7 @@ class FiniteMDP::Solver
|
|
73
73
|
|
74
74
|
#
|
75
75
|
# @return [Model] the model being solved; read only; do not change the model
|
76
|
-
#
|
76
|
+
# while it is being solved
|
77
77
|
#
|
78
78
|
attr_reader :model
|
79
79
|
|
@@ -90,6 +90,27 @@ class FiniteMDP::Solver
|
|
90
90
|
Hash[model.states.zip(@array_value)]
|
91
91
|
end
|
92
92
|
|
93
|
+
#
|
94
|
+
# Current state-action value estimates; whereas {#value} returns $V(s)$, this
|
95
|
+
# returns $Q(s,a)$, in the usual notation.
|
96
|
+
#
|
97
|
+
# @return [Hash<[state, action], Float>]
|
98
|
+
#
|
99
|
+
def state_action_value
|
100
|
+
q = {}
|
101
|
+
states = model.states
|
102
|
+
@array_model.each_with_index do |actions, state_n|
|
103
|
+
state = states[state_n]
|
104
|
+
state_actions = model.actions(state)
|
105
|
+
actions.each_with_index do |next_state_ns, action_n|
|
106
|
+
q_sa = next_state_ns.map {|next_state_n, pr, r|
|
107
|
+
pr * (r + @discount * @array_value[next_state_n])}.inject(:+)
|
108
|
+
q[[state, state_actions[action_n]]] = q_sa
|
109
|
+
end
|
110
|
+
end
|
111
|
+
q
|
112
|
+
end
|
113
|
+
|
93
114
|
#
|
94
115
|
# Current estimate of the optimal action for each state.
|
95
116
|
#
|
@@ -232,6 +253,13 @@ class FiniteMDP::Solver
|
|
232
253
|
#
|
233
254
|
# @return [Boolean] true iff iteration converged to within tolerance
|
234
255
|
#
|
256
|
+
# @yield [num_iters, delta] at the end of each iteration
|
257
|
+
#
|
258
|
+
# @yieldparam [Integer] num_iters iterations done so far
|
259
|
+
#
|
260
|
+
# @yieldparam [Float] delta largest change in the value function in the last
|
261
|
+
# iteration
|
262
|
+
#
|
235
263
|
def value_iteration tolerance, max_iters=nil
|
236
264
|
delta = Float::MAX
|
237
265
|
num_iters = 0
|
@@ -240,7 +268,8 @@ class FiniteMDP::Solver
|
|
240
268
|
num_iters += 1
|
241
269
|
|
242
270
|
break if delta < tolerance
|
243
|
-
break if max_iters && num_iters
|
271
|
+
break if max_iters && num_iters >= max_iters
|
272
|
+
yield num_iters, delta if block_given?
|
244
273
|
end
|
245
274
|
delta < tolerance
|
246
275
|
end
|
@@ -263,6 +292,18 @@ class FiniteMDP::Solver
|
|
263
292
|
#
|
264
293
|
# @return [Boolean] true iff a stable policy was obtained
|
265
294
|
#
|
295
|
+
# @yield [num_policy_iters, num_value_iters, delta] at the end of each
|
296
|
+
# policy evaluation iteration
|
297
|
+
#
|
298
|
+
# @yieldparam [Integer] num_policy_iters policy improvement iterations done so
|
299
|
+
# far
|
300
|
+
#
|
301
|
+
# @yieldparam [Integer] num_value_iters policy evaluation iterations done so
|
302
|
+
# far for the current policy improvement iteration
|
303
|
+
#
|
304
|
+
# @yieldparam [Float] delta largest change in the value function in the last
|
305
|
+
# policy evaluation iteration
|
306
|
+
#
|
266
307
|
def policy_iteration value_tolerance, max_value_iters=nil,
|
267
308
|
max_policy_iters=nil
|
268
309
|
|
@@ -276,14 +317,15 @@ class FiniteMDP::Solver
|
|
276
317
|
num_value_iters += 1
|
277
318
|
|
278
319
|
break if value_delta < value_tolerance
|
279
|
-
break if max_value_iters && num_value_iters
|
320
|
+
break if max_value_iters && num_value_iters >= max_value_iters
|
321
|
+
yield num_policy_iters, num_value_iters, value_delta if block_given?
|
280
322
|
end
|
281
323
|
|
282
324
|
# policy improvement
|
283
325
|
stable = improve_policy
|
284
326
|
num_policy_iters += 1
|
285
327
|
break if stable
|
286
|
-
break if max_policy_iters && num_policy_iters
|
328
|
+
break if max_policy_iters && num_policy_iters >= max_policy_iters
|
287
329
|
end
|
288
330
|
stable
|
289
331
|
end
|
@@ -297,6 +339,10 @@ class FiniteMDP::Solver
|
|
297
339
|
#
|
298
340
|
# @return [Boolean] true iff a stable policy was obtained
|
299
341
|
#
|
342
|
+
# @yield [num_iters] at the end of each iteration
|
343
|
+
#
|
344
|
+
# @yieldparam [Integer] num_iters policy improvement iterations done so far
|
345
|
+
#
|
300
346
|
def policy_iteration_exact max_iters=nil
|
301
347
|
stable = false
|
302
348
|
num_iters = 0
|
@@ -305,7 +351,8 @@ class FiniteMDP::Solver
|
|
305
351
|
stable = improve_policy
|
306
352
|
num_iters += 1
|
307
353
|
break if stable
|
308
|
-
break if max_iters && num_iters
|
354
|
+
break if max_iters && num_iters >= max_iters
|
355
|
+
yield num_iters if block_given?
|
309
356
|
end
|
310
357
|
stable
|
311
358
|
end
|
data/lib/finite_mdp/version.rb
CHANGED
@@ -9,10 +9,16 @@ require 'set'
|
|
9
9
|
class TestFiniteMDP < Test::Unit::TestCase
|
10
10
|
include FiniteMDP
|
11
11
|
|
12
|
+
def assert_close expected, actual, tol=1e-6
|
13
|
+
assert (expected - actual).abs < tol,
|
14
|
+
"expected #{actual} to be within #{tol} of #{expected}"
|
15
|
+
end
|
16
|
+
|
12
17
|
# check that we get the same model back; model parameters must be set before
|
13
18
|
# calling; see test_recycling_robot
|
14
19
|
def check_recycling_robot_model model, sparse
|
15
20
|
model.check_transition_probabilities_sum
|
21
|
+
assert_equal Set[], model.terminal_states
|
16
22
|
|
17
23
|
assert_equal Set[:high, :low], Set[*model.states]
|
18
24
|
assert_equal Set[:search, :wait], Set[*model.actions(:high)]
|
@@ -113,13 +119,31 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
113
119
|
|
114
120
|
# try solving with policy iteration using iterative policy evaluation
|
115
121
|
solver = Solver.new(table_model, 0.95, Hash.new {:wait})
|
116
|
-
assert solver.policy_iteration(1e-4, 2,
|
122
|
+
assert solver.policy_iteration(1e-4, 2, 50), "did not find stable policy"
|
117
123
|
assert_equal({:high => :search, :low => :recharge}, solver.policy)
|
118
124
|
|
119
125
|
# try solving with policy iteration using exact policy evaluation
|
120
|
-
|
126
|
+
gamma = 0.95
|
127
|
+
solver = Solver.new(table_model, gamma, Hash.new {:wait})
|
121
128
|
assert solver.policy_iteration_exact(20), "did not find stable policy"
|
122
129
|
assert_equal({:high => :search, :low => :recharge}, solver.policy)
|
130
|
+
|
131
|
+
# check the corresponding state-action values (Q(s,a) values)
|
132
|
+
v = solver.value
|
133
|
+
q_high_search = @alpha * (@r_search + gamma * v[:high]) +
|
134
|
+
(1-@alpha) * (@r_search + gamma * v[:low])
|
135
|
+
q_high_wait = @r_wait + gamma * v[:high]
|
136
|
+
q_low_search = (1-@beta) * (@r_rescue + gamma * v[:high]) +
|
137
|
+
@beta * (@r_search + gamma * v[:low])
|
138
|
+
q_low_wait = @r_wait + gamma * v[:low]
|
139
|
+
q_low_recharge = 0 + gamma * v[:high]
|
140
|
+
|
141
|
+
q = solver.state_action_value
|
142
|
+
assert_close q[[:high, :search]], q_high_search
|
143
|
+
assert_close q[[:high, :wait]], q_high_wait
|
144
|
+
assert_close q[[:low, :search]], q_low_search
|
145
|
+
assert_close q[[:low, :wait]], q_low_wait
|
146
|
+
assert_close q[[:low, :recharge]], q_low_recharge
|
123
147
|
end
|
124
148
|
|
125
149
|
#
|
@@ -239,6 +263,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
239
263
|
[-0.04, -0.04, -0.04, -0.04]],
|
240
264
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
241
265
|
model.check_transition_probabilities_sum
|
266
|
+
assert_equal Set[], model.terminal_states
|
242
267
|
|
243
268
|
assert_equal Set[
|
244
269
|
[0, 0], [0, 1], [0, 2], [0, 3],
|
@@ -278,6 +303,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
278
303
|
[ r, r, r, r]],
|
279
304
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
280
305
|
model.check_transition_probabilities_sum
|
306
|
+
assert_equal Set[], model.terminal_states # no actual terminals
|
281
307
|
|
282
308
|
check_grid_solutions model,
|
283
309
|
["> > > ",
|
@@ -294,6 +320,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
294
320
|
[ r, r, r, r]],
|
295
321
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
296
322
|
model.check_transition_probabilities_sum
|
323
|
+
assert_equal Set[], model.terminal_states # no actual terminals
|
297
324
|
|
298
325
|
check_grid_solutions model,
|
299
326
|
["> > > ",
|
@@ -310,6 +337,7 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
310
337
|
[ r, r, r, r]],
|
311
338
|
[[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
|
312
339
|
model.check_transition_probabilities_sum
|
340
|
+
assert_equal Set[], model.terminal_states # no actual terminals
|
313
341
|
|
314
342
|
check_grid_solutions model,
|
315
343
|
["> > > ",
|
@@ -343,5 +371,12 @@ class TestFiniteMDP < Test::Unit::TestCase
|
|
343
371
|
assert p1.eql?(p3)
|
344
372
|
assert_equal p1.hash, p3.hash
|
345
373
|
end
|
374
|
+
|
375
|
+
def test_incomplete_model
|
376
|
+
# model with a transition from a to b but no transitions from b
|
377
|
+
table_model = TableModel.new [
|
378
|
+
[:a, :a_a, :b, 1, 0]]
|
379
|
+
assert_equal Set[:b], table_model.terminal_states
|
380
|
+
end
|
346
381
|
end
|
347
382
|
|
metadata
CHANGED
@@ -1,94 +1,95 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: finite_mdp
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
4
5
|
prerelease:
|
5
|
-
version: 0.0.1
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- John Lees-Miller
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-02-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
16
15
|
name: narray
|
17
|
-
|
18
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &85251730 !ruby/object:Gem::Requirement
|
19
17
|
none: false
|
20
|
-
requirements:
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.5.9
|
18
|
+
requirements:
|
24
19
|
- - ~>
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version:
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.5.9
|
27
22
|
type: :runtime
|
28
|
-
version_requirements: *id001
|
29
|
-
- !ruby/object:Gem::Dependency
|
30
|
-
name: gemma
|
31
23
|
prerelease: false
|
32
|
-
|
24
|
+
version_requirements: *85251730
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: gemma
|
27
|
+
requirement: &85251480 !ruby/object:Gem::Requirement
|
33
28
|
none: false
|
34
|
-
requirements:
|
35
|
-
- - ">="
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 1.0.1
|
29
|
+
requirements:
|
38
30
|
- - ~>
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version:
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.1.0
|
41
33
|
type: :development
|
42
|
-
|
43
|
-
|
44
|
-
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *85251480
|
36
|
+
description: ! 'This library provides several ways of describing a
|
37
|
+
|
38
|
+
finite Markov Decision Process (MDP) model (see FiniteMDP::Model) and some
|
39
|
+
|
40
|
+
reasonably efficient implementations of policy iteration and value iteration to
|
41
|
+
|
42
|
+
solve it (see FiniteMDP::Solver).'
|
43
|
+
email:
|
45
44
|
- jdleesmiller@gmail.com
|
46
45
|
executables: []
|
47
|
-
|
48
46
|
extensions: []
|
49
|
-
|
50
|
-
extra_rdoc_files:
|
47
|
+
extra_rdoc_files:
|
51
48
|
- README.rdoc
|
52
|
-
files:
|
53
|
-
- lib/finite_mdp
|
54
|
-
- lib/finite_mdp/vector_valued.rb
|
55
|
-
- lib/finite_mdp/model.rb
|
56
|
-
- lib/finite_mdp/version.rb
|
49
|
+
files:
|
50
|
+
- lib/finite_mdp.rb
|
57
51
|
- lib/finite_mdp/solver.rb
|
52
|
+
- lib/finite_mdp/version.rb
|
58
53
|
- lib/finite_mdp/table_model.rb
|
59
|
-
- lib/finite_mdp.rb
|
54
|
+
- lib/finite_mdp/hash_model.rb
|
55
|
+
- lib/finite_mdp/model.rb
|
56
|
+
- lib/finite_mdp/vector_valued.rb
|
60
57
|
- README.rdoc
|
61
|
-
- test/finite_mdp_test.rb
|
58
|
+
- test/finite_mdp/finite_mdp_test.rb
|
62
59
|
homepage: http://github.com/jdleesmiller/finite_mdp
|
63
60
|
licenses: []
|
64
|
-
|
65
61
|
post_install_message:
|
66
|
-
rdoc_options:
|
62
|
+
rdoc_options:
|
67
63
|
- --main
|
68
64
|
- README.rdoc
|
69
65
|
- --title
|
70
|
-
- finite_mdp-0.
|
71
|
-
require_paths:
|
66
|
+
- finite_mdp-0.1.1 Documentation
|
67
|
+
require_paths:
|
72
68
|
- lib
|
73
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
70
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version:
|
79
|
-
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
hash: -310962355
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
79
|
none: false
|
81
|
-
requirements:
|
82
|
-
- -
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
version:
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
segments:
|
85
|
+
- 0
|
86
|
+
hash: -310962355
|
85
87
|
requirements: []
|
86
|
-
|
87
88
|
rubyforge_project: finite_mdp
|
88
|
-
rubygems_version: 1.
|
89
|
+
rubygems_version: 1.8.10
|
89
90
|
signing_key:
|
90
91
|
specification_version: 3
|
91
|
-
summary: Solve small finite Markov Decision Process models.
|
92
|
-
test_files:
|
93
|
-
- test/finite_mdp_test.rb
|
92
|
+
summary: Solve small, finite Markov Decision Process models.
|
93
|
+
test_files:
|
94
|
+
- test/finite_mdp/finite_mdp_test.rb
|
94
95
|
has_rdoc:
|