finite_mdp 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  * https://github.com/jdleesmiller/finite_mdp
4
4
 
5
+ {<img src="https://secure.travis-ci.org/jdleesmiller/finite_mdp.png"/>}[http://travis-ci.org/jdleesmiller/finite_mdp]
6
+
5
7
  == SYNOPSIS
6
8
 
7
9
  Solve small, finite Markov Decision Process (MDP) models.
@@ -159,7 +161,7 @@ absorbing state with zero reward, called :stop.
159
161
  [-0.04, -0.04, -0.04, -0.04]],
160
162
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
161
163
 
162
- # sanity check: probabilities in a row must sum to 1
164
+ # sanity check: successor state probabilities must sum to 1
163
165
  model.check_transition_probabilities_sum
164
166
 
165
167
  solver = FiniteMDP::Solver.new(model, 1) # discount factor 1
@@ -197,6 +199,7 @@ at http://aima.cs.berkeley.edu/python/mdp.html
197
199
  Tested on
198
200
  * ruby 1.8.7 (2010-06-23 patchlevel 299) [i686-linux]
199
201
  * ruby 1.9.2p0 (2010-08-18 revision 29036) [i686-linux]
202
+ * ruby 1.9.2p180 (2011-02-18 revision 30909) [x86_64-linux]
200
203
 
201
204
  == INSTALLATION
202
205
 
@@ -172,6 +172,24 @@ module FiniteMDP::Model
172
172
  raise NotImplementedError
173
173
  end
174
174
 
175
+ #
176
+ # Sum of the transition probabilities for each (state, action) pair; the sums
177
+ # should be one in a valid model.
178
+ #
179
+ # @return [Hash<[State, Action], Float>]
180
+ #
181
+ def transition_probability_sums
182
+ prs = []
183
+ states.each do |state|
184
+ actions(state).each do |action|
185
+ pr = next_states(state, action).map{|next_state|
186
+ transition_probability(state, action, next_state)}.inject(:+)
187
+ prs << [[state, action], pr]
188
+ end
189
+ end
190
+ Hash[prs]
191
+ end
192
+
175
193
  #
176
194
  # Raise an error if the sum of the transition probabilities for any (state,
177
195
  # action) pair is not sufficiently close to 1.
@@ -181,15 +199,40 @@ module FiniteMDP::Model
181
199
  # @return [nil]
182
200
  #
183
201
  def check_transition_probabilities_sum tol=1e-6
202
+ transition_probability_sums.each do |(state, action), pr|
203
+ raise "transition probabilities for state #{state.inspect} and
204
+ action #{action.inspect} sum to #{pr}" if pr < 1 - tol
205
+ end
206
+ nil
207
+ end
208
+
209
+ #
210
+ # Set of states that have no transitions out.
211
+ #
212
+ # At present, this library can't solve a model with terminal states. However,
213
+ # you can add a dummy state (e.g. <tt>:stop</tt>) with zero reward that
214
+ # transitions back to itself with probability one.
215
+ #
216
+ # Note that if a state has transitions out, but all of them have probability
217
+ # zero, this method does not detect it as a terminal state. You can check for
218
+ # these using {#transition_probability_sums} instead.
219
+ #
220
+ # @return [Set]
221
+ #
222
+ def terminal_states
223
+ all_states = Set[]
224
+ out_states = Set[]
184
225
  states.each do |state|
226
+ all_states << state
227
+ any_out_transitions = false
185
228
  actions(state).each do |action|
186
- pr = next_states(state, action).map{|next_state|
187
- transition_probability(state, action, next_state)}.inject(:+)
188
- raise "transition probabilities for state #{state.inspect} and
189
- action #{action.inspect} sum to #{pr}" if pr < 1 - tol
229
+ ns = next_states(state, action)
230
+ all_states.merge ns
231
+ any_out_transitions ||= !ns.empty?
190
232
  end
233
+ out_states << state if any_out_transitions
191
234
  end
192
- nil
235
+ all_states - out_states
193
236
  end
194
237
  end
195
238
 
@@ -73,7 +73,7 @@ class FiniteMDP::Solver
73
73
 
74
74
  #
75
75
  # @return [Model] the model being solved; read only; do not change the model
76
- # while it is being solved
76
+ # while it is being solved
77
77
  #
78
78
  attr_reader :model
79
79
 
@@ -90,6 +90,27 @@ class FiniteMDP::Solver
90
90
  Hash[model.states.zip(@array_value)]
91
91
  end
92
92
 
93
+ #
94
+ # Current state-action value estimates; whereas {#value} returns $V(s)$, this
95
+ # returns $Q(s,a)$, in the usual notation.
96
+ #
97
+ # @return [Hash<[state, action], Float>]
98
+ #
99
+ def state_action_value
100
+ q = {}
101
+ states = model.states
102
+ @array_model.each_with_index do |actions, state_n|
103
+ state = states[state_n]
104
+ state_actions = model.actions(state)
105
+ actions.each_with_index do |next_state_ns, action_n|
106
+ q_sa = next_state_ns.map {|next_state_n, pr, r|
107
+ pr * (r + @discount * @array_value[next_state_n])}.inject(:+)
108
+ q[[state, state_actions[action_n]]] = q_sa
109
+ end
110
+ end
111
+ q
112
+ end
113
+
93
114
  #
94
115
  # Current estimate of the optimal action for each state.
95
116
  #
@@ -232,6 +253,13 @@ class FiniteMDP::Solver
232
253
  #
233
254
  # @return [Boolean] true iff iteration converged to within tolerance
234
255
  #
256
+ # @yield [num_iters, delta] at the end of each iteration
257
+ #
258
+ # @yieldparam [Integer] num_iters iterations done so far
259
+ #
260
+ # @yieldparam [Float] delta largest change in the value function in the last
261
+ # iteration
262
+ #
235
263
  def value_iteration tolerance, max_iters=nil
236
264
  delta = Float::MAX
237
265
  num_iters = 0
@@ -240,7 +268,8 @@ class FiniteMDP::Solver
240
268
  num_iters += 1
241
269
 
242
270
  break if delta < tolerance
243
- break if max_iters && num_iters > max_iters
271
+ break if max_iters && num_iters >= max_iters
272
+ yield num_iters, delta if block_given?
244
273
  end
245
274
  delta < tolerance
246
275
  end
@@ -263,6 +292,18 @@ class FiniteMDP::Solver
263
292
  #
264
293
  # @return [Boolean] true iff a stable policy was obtained
265
294
  #
295
+ # @yield [num_policy_iters, num_value_iters, delta] at the end of each
296
+ # policy evaluation iteration
297
+ #
298
+ # @yieldparam [Integer] num_policy_iters policy improvement iterations done so
299
+ # far
300
+ #
301
+ # @yieldparam [Integer] num_value_iters policy evaluation iterations done so
302
+ # far for the current policy improvement iteration
303
+ #
304
+ # @yieldparam [Float] delta largest change in the value function in the last
305
+ # policy evaluation iteration
306
+ #
266
307
  def policy_iteration value_tolerance, max_value_iters=nil,
267
308
  max_policy_iters=nil
268
309
 
@@ -276,14 +317,15 @@ class FiniteMDP::Solver
276
317
  num_value_iters += 1
277
318
 
278
319
  break if value_delta < value_tolerance
279
- break if max_value_iters && num_value_iters > max_value_iters
320
+ break if max_value_iters && num_value_iters >= max_value_iters
321
+ yield num_policy_iters, num_value_iters, value_delta if block_given?
280
322
  end
281
323
 
282
324
  # policy improvement
283
325
  stable = improve_policy
284
326
  num_policy_iters += 1
285
327
  break if stable
286
- break if max_policy_iters && num_policy_iters > max_policy_iters
328
+ break if max_policy_iters && num_policy_iters >= max_policy_iters
287
329
  end
288
330
  stable
289
331
  end
@@ -297,6 +339,10 @@ class FiniteMDP::Solver
297
339
  #
298
340
  # @return [Boolean] true iff a stable policy was obtained
299
341
  #
342
+ # @yield [num_iters] at the end of each iteration
343
+ #
344
+ # @yieldparam [Integer] num_iters policy improvement iterations done so far
345
+ #
300
346
  def policy_iteration_exact max_iters=nil
301
347
  stable = false
302
348
  num_iters = 0
@@ -305,7 +351,8 @@ class FiniteMDP::Solver
305
351
  stable = improve_policy
306
352
  num_iters += 1
307
353
  break if stable
308
- break if max_iters && num_iters > max_iters
354
+ break if max_iters && num_iters >= max_iters
355
+ yield num_iters if block_given?
309
356
  end
310
357
  stable
311
358
  end
@@ -1,3 +1,6 @@
1
1
  module FiniteMDP
2
- VERSION = '0.0.1'
2
+ VERSION_MAJOR = 0
3
+ VERSION_MINOR = 1
4
+ VERSION_PATCH = 1
5
+ VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
3
6
  end
@@ -9,10 +9,16 @@ require 'set'
9
9
  class TestFiniteMDP < Test::Unit::TestCase
10
10
  include FiniteMDP
11
11
 
12
+ def assert_close expected, actual, tol=1e-6
13
+ assert (expected - actual).abs < tol,
14
+ "expected #{actual} to be within #{tol} of #{expected}"
15
+ end
16
+
12
17
  # check that we get the same model back; model parameters must be set before
13
18
  # calling; see test_recycling_robot
14
19
  def check_recycling_robot_model model, sparse
15
20
  model.check_transition_probabilities_sum
21
+ assert_equal Set[], model.terminal_states
16
22
 
17
23
  assert_equal Set[:high, :low], Set[*model.states]
18
24
  assert_equal Set[:search, :wait], Set[*model.actions(:high)]
@@ -113,13 +119,31 @@ class TestFiniteMDP < Test::Unit::TestCase
113
119
 
114
120
  # try solving with policy iteration using iterative policy evaluation
115
121
  solver = Solver.new(table_model, 0.95, Hash.new {:wait})
116
- assert solver.policy_iteration(1e-4, 2, 20), "did not find stable policy"
122
+ assert solver.policy_iteration(1e-4, 2, 50), "did not find stable policy"
117
123
  assert_equal({:high => :search, :low => :recharge}, solver.policy)
118
124
 
119
125
  # try solving with policy iteration using exact policy evaluation
120
- solver = Solver.new(table_model, 0.95, Hash.new {:wait})
126
+ gamma = 0.95
127
+ solver = Solver.new(table_model, gamma, Hash.new {:wait})
121
128
  assert solver.policy_iteration_exact(20), "did not find stable policy"
122
129
  assert_equal({:high => :search, :low => :recharge}, solver.policy)
130
+
131
+ # check the corresponding state-action values (Q(s,a) values)
132
+ v = solver.value
133
+ q_high_search = @alpha * (@r_search + gamma * v[:high]) +
134
+ (1-@alpha) * (@r_search + gamma * v[:low])
135
+ q_high_wait = @r_wait + gamma * v[:high]
136
+ q_low_search = (1-@beta) * (@r_rescue + gamma * v[:high]) +
137
+ @beta * (@r_search + gamma * v[:low])
138
+ q_low_wait = @r_wait + gamma * v[:low]
139
+ q_low_recharge = 0 + gamma * v[:high]
140
+
141
+ q = solver.state_action_value
142
+ assert_close q[[:high, :search]], q_high_search
143
+ assert_close q[[:high, :wait]], q_high_wait
144
+ assert_close q[[:low, :search]], q_low_search
145
+ assert_close q[[:low, :wait]], q_low_wait
146
+ assert_close q[[:low, :recharge]], q_low_recharge
123
147
  end
124
148
 
125
149
  #
@@ -239,6 +263,7 @@ class TestFiniteMDP < Test::Unit::TestCase
239
263
  [-0.04, -0.04, -0.04, -0.04]],
240
264
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
241
265
  model.check_transition_probabilities_sum
266
+ assert_equal Set[], model.terminal_states
242
267
 
243
268
  assert_equal Set[
244
269
  [0, 0], [0, 1], [0, 2], [0, 3],
@@ -278,6 +303,7 @@ class TestFiniteMDP < Test::Unit::TestCase
278
303
  [ r, r, r, r]],
279
304
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
280
305
  model.check_transition_probabilities_sum
306
+ assert_equal Set[], model.terminal_states # no actual terminals
281
307
 
282
308
  check_grid_solutions model,
283
309
  ["> > > ",
@@ -294,6 +320,7 @@ class TestFiniteMDP < Test::Unit::TestCase
294
320
  [ r, r, r, r]],
295
321
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
296
322
  model.check_transition_probabilities_sum
323
+ assert_equal Set[], model.terminal_states # no actual terminals
297
324
 
298
325
  check_grid_solutions model,
299
326
  ["> > > ",
@@ -310,6 +337,7 @@ class TestFiniteMDP < Test::Unit::TestCase
310
337
  [ r, r, r, r]],
311
338
  [[0, 3], [1, 3]]) # terminals (the +1 and -1 states)
312
339
  model.check_transition_probabilities_sum
340
+ assert_equal Set[], model.terminal_states # no actual terminals
313
341
 
314
342
  check_grid_solutions model,
315
343
  ["> > > ",
@@ -343,5 +371,12 @@ class TestFiniteMDP < Test::Unit::TestCase
343
371
  assert p1.eql?(p3)
344
372
  assert_equal p1.hash, p3.hash
345
373
  end
374
+
375
+ def test_incomplete_model
376
+ # model with a transition from a to b but no transitions from b
377
+ table_model = TableModel.new [
378
+ [:a, :a_a, :b, 1, 0]]
379
+ assert_equal Set[:b], table_model.terminal_states
380
+ end
346
381
  end
347
382
 
metadata CHANGED
@@ -1,94 +1,95 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: finite_mdp
3
- version: !ruby/object:Gem::Version
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
4
5
  prerelease:
5
- version: 0.0.1
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - John Lees-Miller
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2011-04-17 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
16
15
  name: narray
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &85251730 !ruby/object:Gem::Requirement
19
17
  none: false
20
- requirements:
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: 0.5.9
18
+ requirements:
24
19
  - - ~>
25
- - !ruby/object:Gem::Version
26
- version: "0"
20
+ - !ruby/object:Gem::Version
21
+ version: 0.5.9
27
22
  type: :runtime
28
- version_requirements: *id001
29
- - !ruby/object:Gem::Dependency
30
- name: gemma
31
23
  prerelease: false
32
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *85251730
25
+ - !ruby/object:Gem::Dependency
26
+ name: gemma
27
+ requirement: &85251480 !ruby/object:Gem::Requirement
33
28
  none: false
34
- requirements:
35
- - - ">="
36
- - !ruby/object:Gem::Version
37
- version: 1.0.1
29
+ requirements:
38
30
  - - ~>
39
- - !ruby/object:Gem::Version
40
- version: "1.0"
31
+ - !ruby/object:Gem::Version
32
+ version: 2.1.0
41
33
  type: :development
42
- version_requirements: *id002
43
- description: Solve small finite Markov Decision Process models.
44
- email:
34
+ prerelease: false
35
+ version_requirements: *85251480
36
+ description: ! 'This library provides several ways of describing a
37
+
38
+ finite Markov Decision Process (MDP) model (see FiniteMDP::Model) and some
39
+
40
+ reasonably efficient implementations of policy iteration and value iteration to
41
+
42
+ solve it (see FiniteMDP::Solver).'
43
+ email:
45
44
  - jdleesmiller@gmail.com
46
45
  executables: []
47
-
48
46
  extensions: []
49
-
50
- extra_rdoc_files:
47
+ extra_rdoc_files:
51
48
  - README.rdoc
52
- files:
53
- - lib/finite_mdp/hash_model.rb
54
- - lib/finite_mdp/vector_valued.rb
55
- - lib/finite_mdp/model.rb
56
- - lib/finite_mdp/version.rb
49
+ files:
50
+ - lib/finite_mdp.rb
57
51
  - lib/finite_mdp/solver.rb
52
+ - lib/finite_mdp/version.rb
58
53
  - lib/finite_mdp/table_model.rb
59
- - lib/finite_mdp.rb
54
+ - lib/finite_mdp/hash_model.rb
55
+ - lib/finite_mdp/model.rb
56
+ - lib/finite_mdp/vector_valued.rb
60
57
  - README.rdoc
61
- - test/finite_mdp_test.rb
58
+ - test/finite_mdp/finite_mdp_test.rb
62
59
  homepage: http://github.com/jdleesmiller/finite_mdp
63
60
  licenses: []
64
-
65
61
  post_install_message:
66
- rdoc_options:
62
+ rdoc_options:
67
63
  - --main
68
64
  - README.rdoc
69
65
  - --title
70
- - finite_mdp-0.0.1 Documentation
71
- require_paths:
66
+ - finite_mdp-0.1.1 Documentation
67
+ require_paths:
72
68
  - lib
73
- required_ruby_version: !ruby/object:Gem::Requirement
69
+ required_ruby_version: !ruby/object:Gem::Requirement
74
70
  none: false
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- version: "0"
79
- required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ segments:
76
+ - 0
77
+ hash: -310962355
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
79
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- version: "0"
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ segments:
85
+ - 0
86
+ hash: -310962355
85
87
  requirements: []
86
-
87
88
  rubyforge_project: finite_mdp
88
- rubygems_version: 1.7.2
89
+ rubygems_version: 1.8.10
89
90
  signing_key:
90
91
  specification_version: 3
91
- summary: Solve small finite Markov Decision Process models.
92
- test_files:
93
- - test/finite_mdp_test.rb
92
+ summary: Solve small, finite Markov Decision Process models.
93
+ test_files:
94
+ - test/finite_mdp/finite_mdp_test.rb
94
95
  has_rdoc: