finite_mdp 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: abf81db7b691f5238c134d835f819f75609fa17c
4
- data.tar.gz: 4d897e26e7cc8e8aaffd5c5ce80855d3de55fbb2
3
+ metadata.gz: bd00a14ccd84691b9ba6f544d1c73453e0cf6b68
4
+ data.tar.gz: 21a81b94680509a011ddf870bd6e7f86e905d000
5
5
  SHA512:
6
- metadata.gz: 8711791575db42460dc233ab92a787697731e833c3f121a351ddb1a3dc690c411ec00ff99a0dd996b33fd1be6cae4ea1a423354c013e709f4cb12968b1e3d0c8
7
- data.tar.gz: aae51101f51e9d60f9b648b58a3ea6443f8228b8ddee4b00c1d55861d8fd89f8c48d4b35174c4fbda2dbdff037d7631d64ecc3f45957e529e5720087e43fe2b3
6
+ metadata.gz: af3127d8c5d7d84260e143e18fd611914a767075c903f97b767dfb6b3654e0c1efd54e0c8cc147ab8c61b9a23c241a19abe0939255d520fb74bf0b1e46758019
7
+ data.tar.gz: 2f22ebc9a7bfacfb08c92c309e672da77a0dadb161be4c1a40e85f80fb690b67aad0839e27eb0ceb3b3404ea49eb16fb9344c82442ae700cd8a983201e8e37a1
data/lib/finite_mdp.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'enumerator'
3
2
 
4
3
  require 'finite_mdp/version'
5
4
  require 'finite_mdp/vector_valued'
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  #
3
4
  # A finite markov decision process model for which the states, transition
4
5
  # probabilities and rewards are stored in a sparse nested array format:
@@ -216,7 +217,9 @@ class FiniteMDP::ArrayModel
216
217
  pr = model.transition_probability(state, action, next_state)
217
218
  next unless pr > 0 || !sparse
218
219
  reward = model.reward(state, action, next_state)
219
- [state_action_map.state_index(next_state), pr, reward]
220
+ next_index = state_action_map.state_index(next_state)
221
+ raise "successor state not found: #{next_state}" unless next_index
222
+ [next_index, pr, reward]
220
223
  end.compact
221
224
  end
222
225
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  #
3
4
  # A finite markov decision process model for which the transition
4
5
  # probabilities and rewards are specified using nested hash tables.
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  #
3
4
  # Interface that defines a finite markov decision process model.
4
5
  #
@@ -202,8 +203,9 @@ module FiniteMDP::Model
202
203
  #
203
204
  def check_transition_probabilities_sum(tol = 1e-6)
204
205
  transition_probability_sums.each do |(state, action), pr|
205
- raise "transition probabilities for state #{state.inspect} and
206
- action #{action.inspect} sum to #{pr}" if pr < 1 - tol
206
+ next if (1 - pr).abs <= tol
207
+ raise "transition probabilities for state #{state.inspect} and action " \
208
+ "#{action.inspect} sum to #{pr}"
207
209
  end
208
210
  nil
209
211
  end
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # We use A to denote a matrix, which rubocop does not like.
4
- # rubocop:disable Style/MethodName
5
- # rubocop:disable Style/VariableName
4
+ # rubocop:disable Naming/MethodName
5
+ # rubocop:disable Naming/VariableName
6
6
 
7
7
  require 'narray'
8
8
 
@@ -328,8 +328,10 @@ class FiniteMDP::Solver
328
328
  loop do
329
329
  value_delta = evaluate_policy
330
330
  num_value_iters += 1
331
- yield(num_policy_iters, num_actions_changed, num_value_iters,
332
- value_delta) if block_given?
331
+ if block_given?
332
+ yield(num_policy_iters, num_actions_changed, num_value_iters,
333
+ value_delta)
334
+ end
333
335
 
334
336
  break if value_delta < value_tolerance
335
337
  break if max_value_iters && num_value_iters >= max_value_iters
@@ -402,3 +404,6 @@ class FiniteMDP::Solver
402
404
  @policy_b[state_n] = b_n
403
405
  end
404
406
  end
407
+
408
+ # rubocop:enable Naming/MethodName
409
+ # rubocop:enable Naming/VariableName
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  #
3
4
  # A finite markov decision process model for which the states, actions,
4
5
  # transition probabilities and rewards are specified as a table. This is a
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  #
3
4
  # Define an object's hash code and equality (in the sense of <tt>eql?</tt>)
4
5
  # according to its array representation (<tt>to_a</tt>). See notes for {Model}
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module FiniteMDP
3
4
  VERSION_MAJOR = 0
4
- VERSION_MINOR = 3
5
+ VERSION_MINOR = 4
5
6
  VERSION_PATCH = 0
6
7
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
8
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  if ENV['COVERAGE']
3
4
  require 'simplecov'
4
5
  SimpleCov.start
@@ -60,9 +61,9 @@ class TestFiniteMDP < MiniTest::Test
60
61
  assert_equal @r_wait, model.reward(:high, :wait, :high)
61
62
 
62
63
  if sparse
63
- assert_equal nil, model.reward(:low, :wait, :high)
64
- assert_equal nil, model.reward(:low, :recharge, :low)
65
- assert_equal nil, model.reward(:high, :wait, :low)
64
+ assert_nil model.reward(:low, :wait, :high)
65
+ assert_nil model.reward(:low, :recharge, :low)
66
+ assert_nil model.reward(:high, :wait, :low)
66
67
  else
67
68
  assert_equal @r_wait, model.reward(:low, :wait, :high)
68
69
  assert_equal 0, model.reward(:low, :recharge, :low)
@@ -168,11 +169,11 @@ class TestFiniteMDP < MiniTest::Test
168
169
  q_low_recharge = 0 + gamma * v[:high]
169
170
 
170
171
  q = solver.state_action_value
171
- assert_close q[[:high, :search]], q_high_search
172
- assert_close q[[:high, :wait]], q_high_wait
173
- assert_close q[[:low, :search]], q_low_search
174
- assert_close q[[:low, :wait]], q_low_wait
175
- assert_close q[[:low, :recharge]], q_low_recharge
172
+ assert_close q[%i[high search]], q_high_search
173
+ assert_close q[%i[high wait]], q_high_wait
174
+ assert_close q[%i[low search]], q_low_search
175
+ assert_close q[%i[low wait]], q_low_wait
176
+ assert_close q[%i[low recharge]], q_low_recharge
176
177
  end
177
178
 
178
179
  #
@@ -314,7 +315,7 @@ class TestFiniteMDP < MiniTest::Test
314
315
  [1, 0], [1, 2], [1, 3],
315
316
  [2, 0], [2, 1], [2, 2], [2, 3], :stop], Set[*model.states]
316
317
 
317
- assert_equal Set[%w(^ > v <)], Set[model.actions([0, 0])]
318
+ assert_equal Set[%w[^ > v <]], Set[model.actions([0, 0])]
318
319
  assert_equal [:stop], model.actions([1, 3])
319
320
  assert_equal [:stop], model.actions(:stop)
320
321
 
@@ -332,11 +333,11 @@ class TestFiniteMDP < MiniTest::Test
332
333
  ], model.hash_to_grid(solver.policy)
333
334
 
334
335
  # check values against Figure 17.3
335
- assert [[0.812, 0.868, 0.918, 1],
336
+ assert([[0.812, 0.868, 0.918, 1],
336
337
  [0.762, nil, 0.660, -1],
337
338
  [0.705, 0.655, 0.611, 0.388]].flatten
338
339
  .zip(model.hash_to_grid(solver.value).flatten)
339
- .all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 }
340
+ .all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 })
340
341
  end
341
342
 
342
343
  def test_aima_grid_2
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: finite_mdp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Lees-Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-09 00:00:00.000000000 Z
11
+ date: 2017-12-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.43.0
47
+ version: 0.52.0
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.43.0
54
+ version: 0.52.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: simplecov
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -96,7 +96,7 @@ rdoc_options:
96
96
  - "--main"
97
97
  - README.rdoc
98
98
  - "--title"
99
- - finite_mdp-0.3.0 Documentation
99
+ - finite_mdp-0.4.0 Documentation
100
100
  require_paths:
101
101
  - lib
102
102
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
111
  version: '0'
112
112
  requirements: []
113
113
  rubyforge_project: finite_mdp
114
- rubygems_version: 2.5.1
114
+ rubygems_version: 2.6.13
115
115
  signing_key:
116
116
  specification_version: 4
117
117
  summary: Solve small, finite Markov Decision Process models.