finite_mdp 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/finite_mdp.rb +0 -1
- data/lib/finite_mdp/array_model.rb +4 -1
- data/lib/finite_mdp/hash_model.rb +1 -0
- data/lib/finite_mdp/model.rb +4 -2
- data/lib/finite_mdp/solver.rb +9 -4
- data/lib/finite_mdp/table_model.rb +1 -0
- data/lib/finite_mdp/vector_valued.rb +1 -0
- data/lib/finite_mdp/version.rb +2 -1
- data/test/finite_mdp/finite_mdp_test.rb +12 -11
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd00a14ccd84691b9ba6f544d1c73453e0cf6b68
|
4
|
+
data.tar.gz: 21a81b94680509a011ddf870bd6e7f86e905d000
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af3127d8c5d7d84260e143e18fd611914a767075c903f97b767dfb6b3654e0c1efd54e0c8cc147ab8c61b9a23c241a19abe0939255d520fb74bf0b1e46758019
|
7
|
+
data.tar.gz: 2f22ebc9a7bfacfb08c92c309e672da77a0dadb161be4c1a40e85f80fb690b67aad0839e27eb0ceb3b3404ea49eb16fb9344c82442ae700cd8a983201e8e37a1
|
data/lib/finite_mdp.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
#
|
3
4
|
# A finite markov decision process model for which the states, transition
|
4
5
|
# probabilities and rewards are stored in a sparse nested array format:
|
@@ -216,7 +217,9 @@ class FiniteMDP::ArrayModel
|
|
216
217
|
pr = model.transition_probability(state, action, next_state)
|
217
218
|
next unless pr > 0 || !sparse
|
218
219
|
reward = model.reward(state, action, next_state)
|
219
|
-
|
220
|
+
next_index = state_action_map.state_index(next_state)
|
221
|
+
raise "successor state not found: #{next_state}" unless next_index
|
222
|
+
[next_index, pr, reward]
|
220
223
|
end.compact
|
221
224
|
end
|
222
225
|
end
|
data/lib/finite_mdp/model.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
#
|
3
4
|
# Interface that defines a finite markov decision process model.
|
4
5
|
#
|
@@ -202,8 +203,9 @@ module FiniteMDP::Model
|
|
202
203
|
#
|
203
204
|
def check_transition_probabilities_sum(tol = 1e-6)
|
204
205
|
transition_probability_sums.each do |(state, action), pr|
|
205
|
-
|
206
|
-
|
206
|
+
next if (1 - pr).abs <= tol
|
207
|
+
raise "transition probabilities for state #{state.inspect} and action " \
|
208
|
+
"#{action.inspect} sum to #{pr}"
|
207
209
|
end
|
208
210
|
nil
|
209
211
|
end
|
data/lib/finite_mdp/solver.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# We use A to denote a matrix, which rubocop does not like.
|
4
|
-
# rubocop:disable
|
5
|
-
# rubocop:disable
|
4
|
+
# rubocop:disable Naming/MethodName
|
5
|
+
# rubocop:disable Naming/VariableName
|
6
6
|
|
7
7
|
require 'narray'
|
8
8
|
|
@@ -328,8 +328,10 @@ class FiniteMDP::Solver
|
|
328
328
|
loop do
|
329
329
|
value_delta = evaluate_policy
|
330
330
|
num_value_iters += 1
|
331
|
-
|
332
|
-
|
331
|
+
if block_given?
|
332
|
+
yield(num_policy_iters, num_actions_changed, num_value_iters,
|
333
|
+
value_delta)
|
334
|
+
end
|
333
335
|
|
334
336
|
break if value_delta < value_tolerance
|
335
337
|
break if max_value_iters && num_value_iters >= max_value_iters
|
@@ -402,3 +404,6 @@ class FiniteMDP::Solver
|
|
402
404
|
@policy_b[state_n] = b_n
|
403
405
|
end
|
404
406
|
end
|
407
|
+
|
408
|
+
# rubocop:enable Naming/MethodName
|
409
|
+
# rubocop:enable Naming/VariableName
|
data/lib/finite_mdp/version.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
if ENV['COVERAGE']
|
3
4
|
require 'simplecov'
|
4
5
|
SimpleCov.start
|
@@ -60,9 +61,9 @@ class TestFiniteMDP < MiniTest::Test
|
|
60
61
|
assert_equal @r_wait, model.reward(:high, :wait, :high)
|
61
62
|
|
62
63
|
if sparse
|
63
|
-
|
64
|
-
|
65
|
-
|
64
|
+
assert_nil model.reward(:low, :wait, :high)
|
65
|
+
assert_nil model.reward(:low, :recharge, :low)
|
66
|
+
assert_nil model.reward(:high, :wait, :low)
|
66
67
|
else
|
67
68
|
assert_equal @r_wait, model.reward(:low, :wait, :high)
|
68
69
|
assert_equal 0, model.reward(:low, :recharge, :low)
|
@@ -168,11 +169,11 @@ class TestFiniteMDP < MiniTest::Test
|
|
168
169
|
q_low_recharge = 0 + gamma * v[:high]
|
169
170
|
|
170
171
|
q = solver.state_action_value
|
171
|
-
assert_close q[[
|
172
|
-
assert_close q[[
|
173
|
-
assert_close q[[
|
174
|
-
assert_close q[[
|
175
|
-
assert_close q[[
|
172
|
+
assert_close q[%i[high search]], q_high_search
|
173
|
+
assert_close q[%i[high wait]], q_high_wait
|
174
|
+
assert_close q[%i[low search]], q_low_search
|
175
|
+
assert_close q[%i[low wait]], q_low_wait
|
176
|
+
assert_close q[%i[low recharge]], q_low_recharge
|
176
177
|
end
|
177
178
|
|
178
179
|
#
|
@@ -314,7 +315,7 @@ class TestFiniteMDP < MiniTest::Test
|
|
314
315
|
[1, 0], [1, 2], [1, 3],
|
315
316
|
[2, 0], [2, 1], [2, 2], [2, 3], :stop], Set[*model.states]
|
316
317
|
|
317
|
-
assert_equal Set[%w
|
318
|
+
assert_equal Set[%w[^ > v <]], Set[model.actions([0, 0])]
|
318
319
|
assert_equal [:stop], model.actions([1, 3])
|
319
320
|
assert_equal [:stop], model.actions(:stop)
|
320
321
|
|
@@ -332,11 +333,11 @@ class TestFiniteMDP < MiniTest::Test
|
|
332
333
|
], model.hash_to_grid(solver.policy)
|
333
334
|
|
334
335
|
# check values against Figure 17.3
|
335
|
-
assert
|
336
|
+
assert([[0.812, 0.868, 0.918, 1],
|
336
337
|
[0.762, nil, 0.660, -1],
|
337
338
|
[0.705, 0.655, 0.611, 0.388]].flatten
|
338
339
|
.zip(model.hash_to_grid(solver.value).flatten)
|
339
|
-
.all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 }
|
340
|
+
.all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 })
|
340
341
|
end
|
341
342
|
|
342
343
|
def test_aima_grid_2
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: finite_mdp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Lees-Miller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.52.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.52.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,7 +96,7 @@ rdoc_options:
|
|
96
96
|
- "--main"
|
97
97
|
- README.rdoc
|
98
98
|
- "--title"
|
99
|
-
- finite_mdp-0.
|
99
|
+
- finite_mdp-0.4.0 Documentation
|
100
100
|
require_paths:
|
101
101
|
- lib
|
102
102
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
111
|
version: '0'
|
112
112
|
requirements: []
|
113
113
|
rubyforge_project: finite_mdp
|
114
|
-
rubygems_version: 2.
|
114
|
+
rubygems_version: 2.6.13
|
115
115
|
signing_key:
|
116
116
|
specification_version: 4
|
117
117
|
summary: Solve small, finite Markov Decision Process models.
|