finite_mdp 0.0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  * https://github.com/jdleesmiller/finite_mdp
4
4
 
5
+ {<img src="https://secure.travis-ci.org/jdleesmiller/finite_mdp.png"/>}[http://travis-ci.org/jdleesmiller/finite_mdp]
6
+
5
7
  == SYNOPSIS
6
8
 
7
9
  Solve small, finite Markov Decision Process (MDP) models.
@@ -159,7 +161,7 @@ absorbing state with zero reward, called :stop.
159
161
  [-0.04, -0.04, -0.04, -0.04]],
160
162
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
161
163
 
162
- # sanity check: probabilities in a row must sum to 1
164
+ # sanity check: successor state probabilities must sum to 1
163
165
  model.check_transition_probabilities_sum
164
166
 
165
167
  solver = FiniteMDP::Solver.new(model, 1) # discount factor 1
@@ -197,6 +199,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
197
199
  Tested on
198
200
  * ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
199
201
  * ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
202
+ * ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
200
203
 
201
204
  == INSTALLATION
202
205
 
@@ -172,6 +172,24 @@ module FiniteMDP::Model
172
172
  raise NotImplementedError
173
173
  end
174
174
 
175
+ #
176
+ # Sum of the transition probabilities for each (state, action) pair; the sums
177
+ # should be one in a valid model.
178
+ #
179
+ # @return [Hash<[State, Action], Float>]
180
+ #
181
+ def transition_probability_sums
182
+ prs = []
183
+ states.each do |state|
184
+ actions(state).each do |action|
185
+ pr = next_states(state, action).map{|next_state|
186
+ transition_probability(state, action, next_state)}.inject(:+)
187
+ prs << [[state, action], pr]
188
+ end
189
+ end
190
+ Hash[prs]
191
+ end
192
+
175
193
  #
176
194
  # Raise an error if the sum of the transition probabilities for any (state,
177
195
  # action) pair is not sufficiently close to 1.
@@ -181,15 +199,40 @@ module FiniteMDP::Model
181
199
  # @return [nil]
182
200
  #
183
201
  def check_transition_probabilities_sum tol=1e-6
202
+ transition_probability_sums.each do |(state, action), pr|
203
+ raise "transition probabilities for state #{state.inspect} and
204
+ action #{action.inspect} sum to #{pr}" if pr < 1 - tol
205
+ end
206
+ nil
207
+ end
208
+
209
+ #
210
+ # Set of states that have no transitions out.
211
+ #
212
+ # At present, this library can't solve a model with terminal states. However,
213
+ # you can add a dummy state (e.g. <tt>:stop</tt>) with zero reward that
214
+ # transitions back to itself with probability one.
215
+ #
216
+ # Note that if a state has transitions out, but all of them have probability
217
+ # zero, this method does not detect it as a terminal state. You can check for
218
+ # these using {#transition_probability_sums} instead.
219
+ #
220
+ # @return [Set]
221
+ #
222
+ def terminal_states
223
+ all_states = Set[]
224
+ out_states = Set[]
184
225
  states.each do |state|
226
+ all_states << state
227
+ any_out_transitions = false
185
228
  actions(state).each do |action|
186
- pr = next_states(state, action).map{|next_state|
187
- transition_probability(state, action, next_state)}.inject(:+)
188
- raise "transition probabilities for state #{state.inspect} and
189
- action #{action.inspect} sum to #{pr}" if pr < 1 - tol
229
+ ns = next_states(state, action)
230
+ all_states.merge ns
231
+ any_out_transitions ||= !ns.empty?
190
232
  end
233
+ out_states << state if any_out_transitions
191
234
  end
192
- nil
235
+ all_states - out_states
193
236
  end
194
237
  end
195
238
 
@@ -73,7 +73,7 @@ class FiniteMDP::Solver
73
73
 
74
74
  #
75
75
  # @return [Model] the model being solved; read only; do not change the model
76
- # while it is being solved
76
+ # while it is being solved
77
77
  #
78
78
  attr_reader :model
79
79
 
@@ -90,6 +90,27 @@ class FiniteMDP::Solver
90
90
  Hash[model.states.zip(@array_value)]
91
91
  end
92
92
 
93
+ #
94
+ # Current state-action value estimates; whereas {#value} returns $V(s)$, this
95
+ # returns $Q(s,a)$, in the usual notation.
96
+ #
97
+ # @return [Hash<[state, action], Float>]
98
+ #
99
+ def state_action_value
100
+ q = {}
101
+ states = model.states
102
+ @array_model.each_with_index do |actions, state_n|
103
+ state = states[state_n]
104
+ state_actions = model.actions(state)
105
+ actions.each_with_index do |next_state_ns, action_n|
106
+ q_sa = next_state_ns.map {|next_state_n, pr, r|
107
+ pr * (r + @discount * @array_value[next_state_n])}.inject(:+)
108
+ q[[state, state_actions[action_n]]] = q_sa
109
+ end
110
+ end
111
+ q
112
+ end
113
+
93
114
  #
94
115
  # Current estimate of the optimal action for each state.
95
116
  #
@@ -232,6 +253,13 @@ class FiniteMDP::Solver
232
253
  #
233
254
  # @return [Boolean] true iff iteration converged to within tolerance
234
255
  #
256
+ # @yield [num_iters, delta] at the end of each iteration
257
+ #
258
+ # @yieldparam [Integer] num_iters iterations done so far
259
+ #
260
+ # @yieldparam [Float] delta largest change in the value function in the last
261
+ # iteration
262
+ #
235
263
  def value_iteration tolerance, max_iters=nil
236
264
  delta = Float::MAX
237
265
  num_iters = 0
@@ -240,7 +268,8 @@ class FiniteMDP::Solver
240
268
  num_iters += 1
241
269
 
242
270
  break if delta < tolerance
243
- break if max_iters && num_iters > max_iters
271
+ break if max_iters && num_iters >= max_iters
272
+ yield num_iters, delta if block_given?
244
273
  end
245
274
  delta < tolerance
246
275
  end
@@ -263,6 +292,18 @@ class FiniteMDP::Solver
263
292
  #
264
293
  # @return [Boolean] true iff a stable policy was obtained
265
294
  #
295
+ # @yield [num_policy_iters, num_value_iters, delta] at the end of each
296
+ # policy evaluation iteration
297
+ #
298
+ # @yieldparam [Integer] num_policy_iters policy improvement iterations done so
299
+ # far
300
+ #
301
+ # @yieldparam [Integer] num_value_iters policy evaluation iterations done so
302
+ # far for the current policy improvement iteration
303
+ #
304
+ # @yieldparam [Float] delta largest change in the value function in the last
305
+ # policy evaluation iteration
306
+ #
266
307
  def policy_iteration value_tolerance, max_value_iters=nil,
267
308
  max_policy_iters=nil
268
309
 
@@ -276,14 +317,15 @@ class FiniteMDP::Solver
276
317
  num_value_iters += 1
277
318
 
278
319
  break if value_delta < value_tolerance
279
- break if max_value_iters && num_value_iters > max_value_iters
320
+ break if max_value_iters && num_value_iters >= max_value_iters
321
+ yield num_policy_iters, num_value_iters, value_delta if block_given?
280
322
  end
281
323
 
282
324
  # policy improvement
283
325
  stable = improve_policy
284
326
  num_policy_iters += 1
285
327
  break if stable
286
- break if max_policy_iters && num_policy_iters > max_policy_iters
328
+ break if max_policy_iters && num_policy_iters >= max_policy_iters
287
329
  end
288
330
  stable
289
331
  end
@@ -297,6 +339,10 @@ class FiniteMDP::Solver
297
339
  #
298
340
  # @return [Boolean] true iff a stable policy was obtained
299
341
  #
342
+ # @yield [num_iters] at the end of each iteration
343
+ #
344
+ # @yieldparam [Integer] num_iters policy improvement iterations done so far
345
+ #
300
346
  def policy_iteration_exact max_iters=nil
301
347
  stable = false
302
348
  num_iters = 0
@@ -305,7 +351,8 @@ class FiniteMDP::Solver
305
351
  stable = improve_policy
306
352
  num_iters += 1
307
353
  break if stable
308
- break if max_iters && num_iters > max_iters
354
+ break if max_iters && num_iters >= max_iters
355
+ yield num_iters if block_given?
309
356
  end
310
357
  stable
311
358
  end
@@ -1,3 +1,6 @@
1
1
  module FiniteMDP
2
- VERSION = '0.0.1'
2
+ VERSION_MAJOR = 0
3
+ VERSION_MINOR = 1
4
+ VERSION_PATCH = 1
5
+ VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
3
6
  end
@@ -9,10 +9,16 @@ require 'set'
9
9
  class TestFiniteMDP < Test::Unit::TestCase
10
10
  include FiniteMDP
11
11
 
12
+ def assert_close expected, actual, tol=1e-6
13
+ assert (expected - actual).abs < tol,
14
+ "expected #{actual} to be within #{tol} of #{expected}"
15
+ end
16
+
12
17
  # check that we get the same model back; model parameters must be set before
13
18
  # calling; see test_recycling_robot
14
19
  def check_recycling_robot_model model, sparse
15
20
  model.check_transition_probabilities_sum
21
+ assert_equal Set[], model.terminal_states
16
22
 
17
23
  assert_equal Set[:high, :low], Set[*model.states]
18
24
  assert_equal Set[:search, :wait], Set[*model.actions(:high)]
@@ -113,13 +119,31 @@ class TestFiniteMDP < Test::Unit::TestCase
113
119
 
114
120
  # try solving with policy iteration using iterative policy evaluation
115
121
  solver = Solver.new(table_model, 0.95, Hash.new {:wait})
116
- assert solver.policy_iteration(1e-4, 2, 20), "did not find stable policy"
122
+ assert solver.policy_iteration(1e-4, 2, 50), "did not find stable policy"
117
123
  assert_equal({:high => :search, :low => :recharge}, solver.policy)
118
124
 
119
125
  # try solving with policy iteration using exact policy evaluation
120
- solver = Solver.new(table_model, 0.95, Hash.new {:wait})
126
+ gamma = 0.95
127
+ solver = Solver.new(table_model, gamma, Hash.new {:wait})
121
128
  assert solver.policy_iteration_exact(20), "did not find stable policy"
122
129
  assert_equal({:high => :search, :low => :recharge}, solver.policy)
130
+
131
+ # check the corresponding state-action values (Q(s,a) values)
132
+ v = solver.value
133
+ q_high_search = @alpha * (@r_search + gamma * v[:high]) +
134
+ (1-@alpha) * (@r_search + gamma * v[:low])
135
+ q_high_wait = @r_wait + gamma * v[:high]
136
+ q_low_search = (1-@beta) * (@r_rescue + gamma * v[:high]) +
137
+ @beta * (@r_search + gamma * v[:low])
138
+ q_low_wait = @r_wait + gamma * v[:low]
139
+ q_low_recharge = 0 + gamma * v[:high]
140
+
141
+ q = solver.state_action_value
142
+ assert_close q[[:high, :search]], q_high_search
143
+ assert_close q[[:high, :wait]], q_high_wait
144
+ assert_close q[[:low, :search]], q_low_search
145
+ assert_close q[[:low, :wait]], q_low_wait
146
+ assert_close q[[:low, :recharge]], q_low_recharge
123
147
  end
124
148
 
125
149
  #
@@ -239,6 +263,7 @@ class TestFiniteMDP < Test::Unit::TestCase
239
263
  [-0.04, -0.04, -0.04, -0.04]],
240
264
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
241
265
  model.check_transition_probabilities_sum
266
+ assert_equal Set[], model.terminal_states
242
267
 
243
268
  assert_equal Set[
244
269
  [0, 0], [0, 1], [0, 2], [0, 3],
@@ -278,6 +303,7 @@ class TestFiniteMDP < Test::Unit::TestCase
278
303
  [ r, r, r, r]],
279
304
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
280
305
  model.check_transition_probabilities_sum
306
+ assert_equal Set[], model.terminal_states # no actual terminals
281
307
 
282
308
  check_grid_solutions model,
283
309
  ["> > > ",
@@ -294,6 +320,7 @@ class TestFiniteMDP < Test::Unit::TestCase
294
320
  [ r, r, r, r]],
295
321
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
296
322
  model.check_transition_probabilities_sum
323
+ assert_equal Set[], model.terminal_states # no actual terminals
297
324
 
298
325
  check_grid_solutions model,
299
326
  ["> > > ",
@@ -310,6 +337,7 @@ class TestFiniteMDP < Test::Unit::TestCase
310
337
  [ r, r, r, r]],
311
338
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
312
339
  model.check_transition_probabilities_sum
340
+ assert_equal Set[], model.terminal_states # no actual terminals
313
341
 
314
342
  check_grid_solutions model,
315
343
  ["> > > ",
@@ -343,5 +371,12 @@ class TestFiniteMDP < Test::Unit::TestCase
343
371
  assert p1.eql?(p3)
344
372
  assert_equal p1.hash, p3.hash
345
373
  end
374
+
375
+ def test_incomplete_model
376
+ # model with a transition from a to b but no transitions from b
377
+ table_model = TableModel.new [
378
+ [:a, :a_a, :b, 1, 0]]
379
+ assert_equal Set[:b], table_model.terminal_states
380
+ end
346
381
  end
347
382
 
metadata CHANGED
@@ -1,94 +1,95 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: finite_mdp
3
- version: !ruby/object:Gem::Version
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
4
5
  prerelease:
5
- version: 0.0.1
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - John Lees-Miller
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2011-04-17 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
16
15
  name: narray
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &85251730 !ruby/object:Gem::Requirement
19
17
  none: false
20
- requirements:
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: 0.5.9
18
+ requirements:
24
19
  - - ~>
25
- - !ruby/object:Gem::Version
26
- version: "0"
20
+ - !ruby/object:Gem::Version
21
+ version: 0.5.9
27
22
  type: :runtime
28
- version_requirements: *id001
29
- - !ruby/object:Gem::Dependency
30
- name: gemma
31
23
  prerelease: false
32
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *85251730
25
+ - !ruby/object:Gem::Dependency
26
+ name: gemma
27
+ requirement: &85251480 !ruby/object:Gem::Requirement
33
28
  none: false
34
- requirements:
35
- - - ">="
36
- - !ruby/object:Gem::Version
37
- version: 1.0.1
29
+ requirements:
38
30
  - - ~>
39
- - !ruby/object:Gem::Version
40
- version: "1.0"
31
+ - !ruby/object:Gem::Version
32
+ version: 2.1.0
41
33
  type: :development
42
- version_requirements: *id002
43
- description: Solve small finite Markov Decision Process models.
44
- email:
34
+ prerelease: false
35
+ version_requirements: *85251480
36
+ description: ! 'This library provides several ways of describing a
37
+
38
+ finite Markov Decision Process (MDP) model (see FiniteMDP::Model) and some
39
+
40
+ reasonably efficient implementations of policy iteration and value iteration to
41
+
42
+ solve it (see FiniteMDP::Solver).'
43
+ email:
45
44
  - jdleesmiller@gmail.com
46
45
  executables: []
47
-
48
46
  extensions: []
49
-
50
- extra_rdoc_files:
47
+ extra_rdoc_files:
51
48
  - README.rdoc
52
- files:
53
- - lib/finite_mdp/hash_model.rb
54
- - lib/finite_mdp/vector_valued.rb
55
- - lib/finite_mdp/model.rb
56
- - lib/finite_mdp/version.rb
49
+ files:
50
+ - lib/finite_mdp.rb
57
51
  - lib/finite_mdp/solver.rb
52
+ - lib/finite_mdp/version.rb
58
53
  - lib/finite_mdp/table_model.rb
59
- - lib/finite_mdp.rb
54
+ - lib/finite_mdp/hash_model.rb
55
+ - lib/finite_mdp/model.rb
56
+ - lib/finite_mdp/vector_valued.rb
60
57
  - README.rdoc
61
- - test/finite_mdp_test.rb
58
+ - test/finite_mdp/finite_mdp_test.rb
62
59
  homepage: http://github.com/jdleesmiller/finite_mdp
63
60
  licenses: []
64
-
65
61
  post_install_message:
66
- rdoc_options:
62
+ rdoc_options:
67
63
  - --main
68
64
  - README.rdoc
69
65
  - --title
70
- - finite_mdp-0.0.1 Documentation
71
- require_paths:
66
+ - finite_mdp-0.1.1 Documentation
67
+ require_paths:
72
68
  - lib
73
- required_ruby_version: !ruby/object:Gem::Requirement
69
+ required_ruby_version: !ruby/object:Gem::Requirement
74
70
  none: false
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- version: "0"
79
- required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ segments:
76
+ - 0
77
+ hash: -310962355
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
79
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- version: "0"
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ segments:
85
+ - 0
86
+ hash: -310962355
85
87
  requirements: []
86
-
87
88
  rubyforge_project: finite_mdp
88
- rubygems_version: 1.7.2
89
+ rubygems_version: 1.8.10
89
90
  signing_key:
90
91
  specification_version: 3
91
- summary: Solve small finite Markov Decision Process models.
92
- test_files:
93
- - test/finite_mdp_test.rb
92
+ summary: Solve small, finite Markov Decision Process models.
93
+ test_files:
94
+ - test/finite_mdp/finite_mdp_test.rb
94
95
  has_rdoc: