finite_mdp 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/finite_mdp.rb +0 -1
- data/lib/finite_mdp/array_model.rb +4 -1
- data/lib/finite_mdp/hash_model.rb +1 -0
- data/lib/finite_mdp/model.rb +4 -2
- data/lib/finite_mdp/solver.rb +9 -4
- data/lib/finite_mdp/table_model.rb +1 -0
- data/lib/finite_mdp/vector_valued.rb +1 -0
- data/lib/finite_mdp/version.rb +2 -1
- data/test/finite_mdp/finite_mdp_test.rb +12 -11
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd00a14ccd84691b9ba6f544d1c73453e0cf6b68
|
4
|
+
data.tar.gz: 21a81b94680509a011ddf870bd6e7f86e905d000
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af3127d8c5d7d84260e143e18fd611914a767075c903f97b767dfb6b3654e0c1efd54e0c8cc147ab8c61b9a23c241a19abe0939255d520fb74bf0b1e46758019
|
7
|
+
data.tar.gz: 2f22ebc9a7bfacfb08c92c309e672da77a0dadb161be4c1a40e85f80fb690b67aad0839e27eb0ceb3b3404ea49eb16fb9344c82442ae700cd8a983201e8e37a1
|
data/lib/finite_mdp.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
#
|
3
4
|
# A finite markov decision process model for which the states, transition
|
4
5
|
# probabilities and rewards are stored in a sparse nested array format:
|
@@ -216,7 +217,9 @@ class FiniteMDP::ArrayModel
|
|
216
217
|
pr = model.transition_probability(state, action, next_state)
|
217
218
|
next unless pr > 0 || !sparse
|
218
219
|
reward = model.reward(state, action, next_state)
|
219
|
-
|
220
|
+
next_index = state_action_map.state_index(next_state)
|
221
|
+
raise "successor state not found: #{next_state}" unless next_index
|
222
|
+
[next_index, pr, reward]
|
220
223
|
end.compact
|
221
224
|
end
|
222
225
|
end
|
data/lib/finite_mdp/model.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
#
|
3
4
|
# Interface that defines a finite markov decision process model.
|
4
5
|
#
|
@@ -202,8 +203,9 @@ module FiniteMDP::Model
|
|
202
203
|
#
|
203
204
|
def check_transition_probabilities_sum(tol = 1e-6)
|
204
205
|
transition_probability_sums.each do |(state, action), pr|
|
205
|
-
|
206
|
-
|
206
|
+
next if (1 - pr).abs <= tol
|
207
|
+
raise "transition probabilities for state #{state.inspect} and action " \
|
208
|
+
"#{action.inspect} sum to #{pr}"
|
207
209
|
end
|
208
210
|
nil
|
209
211
|
end
|
data/lib/finite_mdp/solver.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# We use A to denote a matrix, which rubocop does not like.
|
4
|
-
# rubocop:disable
|
5
|
-
# rubocop:disable
|
4
|
+
# rubocop:disable Naming/MethodName
|
5
|
+
# rubocop:disable Naming/VariableName
|
6
6
|
|
7
7
|
require 'narray'
|
8
8
|
|
@@ -328,8 +328,10 @@ class FiniteMDP::Solver
|
|
328
328
|
loop do
|
329
329
|
value_delta = evaluate_policy
|
330
330
|
num_value_iters += 1
|
331
|
-
|
332
|
-
|
331
|
+
if block_given?
|
332
|
+
yield(num_policy_iters, num_actions_changed, num_value_iters,
|
333
|
+
value_delta)
|
334
|
+
end
|
333
335
|
|
334
336
|
break if value_delta < value_tolerance
|
335
337
|
break if max_value_iters && num_value_iters >= max_value_iters
|
@@ -402,3 +404,6 @@ class FiniteMDP::Solver
|
|
402
404
|
@policy_b[state_n] = b_n
|
403
405
|
end
|
404
406
|
end
|
407
|
+
|
408
|
+
# rubocop:enable Naming/MethodName
|
409
|
+
# rubocop:enable Naming/VariableName
|
data/lib/finite_mdp/version.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
if ENV['COVERAGE']
|
3
4
|
require 'simplecov'
|
4
5
|
SimpleCov.start
|
@@ -60,9 +61,9 @@ class TestFiniteMDP < MiniTest::Test
|
|
60
61
|
assert_equal @r_wait, model.reward(:high, :wait, :high)
|
61
62
|
|
62
63
|
if sparse
|
63
|
-
|
64
|
-
|
65
|
-
|
64
|
+
assert_nil model.reward(:low, :wait, :high)
|
65
|
+
assert_nil model.reward(:low, :recharge, :low)
|
66
|
+
assert_nil model.reward(:high, :wait, :low)
|
66
67
|
else
|
67
68
|
assert_equal @r_wait, model.reward(:low, :wait, :high)
|
68
69
|
assert_equal 0, model.reward(:low, :recharge, :low)
|
@@ -168,11 +169,11 @@ class TestFiniteMDP < MiniTest::Test
|
|
168
169
|
q_low_recharge = 0 + gamma * v[:high]
|
169
170
|
|
170
171
|
q = solver.state_action_value
|
171
|
-
assert_close q[[
|
172
|
-
assert_close q[[
|
173
|
-
assert_close q[[
|
174
|
-
assert_close q[[
|
175
|
-
assert_close q[[
|
172
|
+
assert_close q[%i[high search]], q_high_search
|
173
|
+
assert_close q[%i[high wait]], q_high_wait
|
174
|
+
assert_close q[%i[low search]], q_low_search
|
175
|
+
assert_close q[%i[low wait]], q_low_wait
|
176
|
+
assert_close q[%i[low recharge]], q_low_recharge
|
176
177
|
end
|
177
178
|
|
178
179
|
#
|
@@ -314,7 +315,7 @@ class TestFiniteMDP < MiniTest::Test
|
|
314
315
|
[1, 0], [1, 2], [1, 3],
|
315
316
|
[2, 0], [2, 1], [2, 2], [2, 3], :stop], Set[*model.states]
|
316
317
|
|
317
|
-
assert_equal Set[%w
|
318
|
+
assert_equal Set[%w[^ > v <]], Set[model.actions([0, 0])]
|
318
319
|
assert_equal [:stop], model.actions([1, 3])
|
319
320
|
assert_equal [:stop], model.actions(:stop)
|
320
321
|
|
@@ -332,11 +333,11 @@ class TestFiniteMDP < MiniTest::Test
|
|
332
333
|
], model.hash_to_grid(solver.policy)
|
333
334
|
|
334
335
|
# check values against Figure 17.3
|
335
|
-
assert
|
336
|
+
assert([[0.812, 0.868, 0.918, 1],
|
336
337
|
[0.762, nil, 0.660, -1],
|
337
338
|
[0.705, 0.655, 0.611, 0.388]].flatten
|
338
339
|
.zip(model.hash_to_grid(solver.value).flatten)
|
339
|
-
.all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 }
|
340
|
+
.all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 })
|
340
341
|
end
|
341
342
|
|
342
343
|
def test_aima_grid_2
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: finite_mdp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Lees-Miller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.52.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.52.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,7 +96,7 @@ rdoc_options:
|
|
96
96
|
- "--main"
|
97
97
|
- README.rdoc
|
98
98
|
- "--title"
|
99
|
-
- finite_mdp-0.
|
99
|
+
- finite_mdp-0.4.0 Documentation
|
100
100
|
require_paths:
|
101
101
|
- lib
|
102
102
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
111
|
version: '0'
|
112
112
|
requirements: []
|
113
113
|
rubyforge_project: finite_mdp
|
114
|
-
rubygems_version: 2.
|
114
|
+
rubygems_version: 2.6.13
|
115
115
|
signing_key:
|
116
116
|
specification_version: 4
|
117
117
|
summary: Solve small, finite Markov Decision Process models.
|