finite_mdp 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/finite_mdp.rb +0 -1
 - data/lib/finite_mdp/array_model.rb +4 -1
 - data/lib/finite_mdp/hash_model.rb +1 -0
 - data/lib/finite_mdp/model.rb +4 -2
 - data/lib/finite_mdp/solver.rb +9 -4
 - data/lib/finite_mdp/table_model.rb +1 -0
 - data/lib/finite_mdp/vector_valued.rb +1 -0
 - data/lib/finite_mdp/version.rb +2 -1
 - data/test/finite_mdp/finite_mdp_test.rb +12 -11
 - metadata +6 -6
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: bd00a14ccd84691b9ba6f544d1c73453e0cf6b68
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 21a81b94680509a011ddf870bd6e7f86e905d000
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: af3127d8c5d7d84260e143e18fd611914a767075c903f97b767dfb6b3654e0c1efd54e0c8cc147ab8c61b9a23c241a19abe0939255d520fb74bf0b1e46758019
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 2f22ebc9a7bfacfb08c92c309e672da77a0dadb161be4c1a40e85f80fb690b67aad0839e27eb0ceb3b3404ea49eb16fb9344c82442ae700cd8a983201e8e37a1
         
     | 
    
        data/lib/finite_mdp.rb
    CHANGED
    
    
| 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            #
         
     | 
| 
       3 
4 
     | 
    
         
             
            # A finite markov decision process model for which the states, transition
         
     | 
| 
       4 
5 
     | 
    
         
             
            # probabilities and rewards are stored in a sparse nested array format:
         
     | 
| 
         @@ -216,7 +217,9 @@ class FiniteMDP::ArrayModel 
     | 
|
| 
       216 
217 
     | 
    
         
             
                      pr = model.transition_probability(state, action, next_state)
         
     | 
| 
       217 
218 
     | 
    
         
             
                      next unless pr > 0 || !sparse
         
     | 
| 
       218 
219 
     | 
    
         
             
                      reward = model.reward(state, action, next_state)
         
     | 
| 
       219 
     | 
    
         
            -
                       
     | 
| 
      
 220 
     | 
    
         
            +
                      next_index = state_action_map.state_index(next_state)
         
     | 
| 
      
 221 
     | 
    
         
            +
                      raise "successor state not found: #{next_state}" unless next_index
         
     | 
| 
      
 222 
     | 
    
         
            +
                      [next_index, pr, reward]
         
     | 
| 
       220 
223 
     | 
    
         
             
                    end.compact
         
     | 
| 
       221 
224 
     | 
    
         
             
                  end
         
     | 
| 
       222 
225 
     | 
    
         
             
                end
         
     | 
    
        data/lib/finite_mdp/model.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            #
         
     | 
| 
       3 
4 
     | 
    
         
             
            # Interface that defines a finite markov decision process model.
         
     | 
| 
       4 
5 
     | 
    
         
             
            #
         
     | 
| 
         @@ -202,8 +203,9 @@ module FiniteMDP::Model 
     | 
|
| 
       202 
203 
     | 
    
         
             
              #
         
     | 
| 
       203 
204 
     | 
    
         
             
              def check_transition_probabilities_sum(tol = 1e-6)
         
     | 
| 
       204 
205 
     | 
    
         
             
                transition_probability_sums.each do |(state, action), pr|
         
     | 
| 
       205 
     | 
    
         
            -
                   
     | 
| 
       206 
     | 
    
         
            -
             
     | 
| 
      
 206 
     | 
    
         
            +
                  next if (1 - pr).abs <= tol
         
     | 
| 
      
 207 
     | 
    
         
            +
                  raise "transition probabilities for state #{state.inspect} and action " \
         
     | 
| 
      
 208 
     | 
    
         
            +
                      "#{action.inspect} sum to #{pr}"
         
     | 
| 
       207 
209 
     | 
    
         
             
                end
         
     | 
| 
       208 
210 
     | 
    
         
             
                nil
         
     | 
| 
       209 
211 
     | 
    
         
             
              end
         
     | 
    
        data/lib/finite_mdp/solver.rb
    CHANGED
    
    | 
         @@ -1,8 +1,8 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            # We use A to denote a matrix, which rubocop does not like.
         
     | 
| 
       4 
     | 
    
         
            -
            # rubocop:disable  
     | 
| 
       5 
     | 
    
         
            -
            # rubocop:disable  
     | 
| 
      
 4 
     | 
    
         
            +
            # rubocop:disable Naming/MethodName
         
     | 
| 
      
 5 
     | 
    
         
            +
            # rubocop:disable Naming/VariableName
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
            require 'narray'
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
         @@ -328,8 +328,10 @@ class FiniteMDP::Solver 
     | 
|
| 
       328 
328 
     | 
    
         
             
                  loop do
         
     | 
| 
       329 
329 
     | 
    
         
             
                    value_delta = evaluate_policy
         
     | 
| 
       330 
330 
     | 
    
         
             
                    num_value_iters += 1
         
     | 
| 
       331 
     | 
    
         
            -
                     
     | 
| 
       332 
     | 
    
         
            -
                       
     | 
| 
      
 331 
     | 
    
         
            +
                    if block_given?
         
     | 
| 
      
 332 
     | 
    
         
            +
                      yield(num_policy_iters, num_actions_changed, num_value_iters,
         
     | 
| 
      
 333 
     | 
    
         
            +
                        value_delta)
         
     | 
| 
      
 334 
     | 
    
         
            +
                    end
         
     | 
| 
       333 
335 
     | 
    
         | 
| 
       334 
336 
     | 
    
         
             
                    break if value_delta < value_tolerance
         
     | 
| 
       335 
337 
     | 
    
         
             
                    break if max_value_iters && num_value_iters >= max_value_iters
         
     | 
| 
         @@ -402,3 +404,6 @@ class FiniteMDP::Solver 
     | 
|
| 
       402 
404 
     | 
    
         
             
                @policy_b[state_n] = b_n
         
     | 
| 
       403 
405 
     | 
    
         
             
              end
         
     | 
| 
       404 
406 
     | 
    
         
             
            end
         
     | 
| 
      
 407 
     | 
    
         
            +
             
     | 
| 
      
 408 
     | 
    
         
            +
            # rubocop:enable Naming/MethodName
         
     | 
| 
      
 409 
     | 
    
         
            +
            # rubocop:enable Naming/VariableName
         
     | 
    
        data/lib/finite_mdp/version.rb
    CHANGED
    
    
| 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            if ENV['COVERAGE']
         
     | 
| 
       3 
4 
     | 
    
         
             
              require 'simplecov'
         
     | 
| 
       4 
5 
     | 
    
         
             
              SimpleCov.start
         
     | 
| 
         @@ -60,9 +61,9 @@ class TestFiniteMDP < MiniTest::Test 
     | 
|
| 
       60 
61 
     | 
    
         
             
                assert_equal @r_wait,   model.reward(:high, :wait, :high)
         
     | 
| 
       61 
62 
     | 
    
         | 
| 
       62 
63 
     | 
    
         
             
                if sparse
         
     | 
| 
       63 
     | 
    
         
            -
                   
     | 
| 
       64 
     | 
    
         
            -
                   
     | 
| 
       65 
     | 
    
         
            -
                   
     | 
| 
      
 64 
     | 
    
         
            +
                  assert_nil model.reward(:low, :wait, :high)
         
     | 
| 
      
 65 
     | 
    
         
            +
                  assert_nil model.reward(:low, :recharge, :low)
         
     | 
| 
      
 66 
     | 
    
         
            +
                  assert_nil model.reward(:high, :wait, :low)
         
     | 
| 
       66 
67 
     | 
    
         
             
                else
         
     | 
| 
       67 
68 
     | 
    
         
             
                  assert_equal @r_wait, model.reward(:low, :wait, :high)
         
     | 
| 
       68 
69 
     | 
    
         
             
                  assert_equal 0,       model.reward(:low, :recharge, :low)
         
     | 
| 
         @@ -168,11 +169,11 @@ class TestFiniteMDP < MiniTest::Test 
     | 
|
| 
       168 
169 
     | 
    
         
             
                q_low_recharge = 0 + gamma * v[:high]
         
     | 
| 
       169 
170 
     | 
    
         | 
| 
       170 
171 
     | 
    
         
             
                q = solver.state_action_value
         
     | 
| 
       171 
     | 
    
         
            -
                assert_close q[[ 
     | 
| 
       172 
     | 
    
         
            -
                assert_close q[[ 
     | 
| 
       173 
     | 
    
         
            -
                assert_close q[[ 
     | 
| 
       174 
     | 
    
         
            -
                assert_close q[[ 
     | 
| 
       175 
     | 
    
         
            -
                assert_close q[[ 
     | 
| 
      
 172 
     | 
    
         
            +
                assert_close q[%i[high search]],  q_high_search
         
     | 
| 
      
 173 
     | 
    
         
            +
                assert_close q[%i[high wait]],    q_high_wait
         
     | 
| 
      
 174 
     | 
    
         
            +
                assert_close q[%i[low search]],   q_low_search
         
     | 
| 
      
 175 
     | 
    
         
            +
                assert_close q[%i[low wait]],     q_low_wait
         
     | 
| 
      
 176 
     | 
    
         
            +
                assert_close q[%i[low recharge]], q_low_recharge
         
     | 
| 
       176 
177 
     | 
    
         
             
              end
         
     | 
| 
       177 
178 
     | 
    
         | 
| 
       178 
179 
     | 
    
         
             
              #
         
     | 
| 
         @@ -314,7 +315,7 @@ class TestFiniteMDP < MiniTest::Test 
     | 
|
| 
       314 
315 
     | 
    
         
             
                  [1, 0],         [1, 2], [1, 3],
         
     | 
| 
       315 
316 
     | 
    
         
             
                  [2, 0], [2, 1], [2, 2], [2, 3], :stop], Set[*model.states]
         
     | 
| 
       316 
317 
     | 
    
         | 
| 
       317 
     | 
    
         
            -
                assert_equal Set[%w 
     | 
| 
      
 318 
     | 
    
         
            +
                assert_equal Set[%w[^ > v <]], Set[model.actions([0, 0])]
         
     | 
| 
       318 
319 
     | 
    
         
             
                assert_equal [:stop], model.actions([1, 3])
         
     | 
| 
       319 
320 
     | 
    
         
             
                assert_equal [:stop], model.actions(:stop)
         
     | 
| 
       320 
321 
     | 
    
         | 
| 
         @@ -332,11 +333,11 @@ class TestFiniteMDP < MiniTest::Test 
     | 
|
| 
       332 
333 
     | 
    
         
             
                ], model.hash_to_grid(solver.policy)
         
     | 
| 
       333 
334 
     | 
    
         | 
| 
       334 
335 
     | 
    
         
             
                # check values against Figure 17.3
         
     | 
| 
       335 
     | 
    
         
            -
                assert 
     | 
| 
      
 336 
     | 
    
         
            +
                assert([[0.812, 0.868, 0.918, 1],
         
     | 
| 
       336 
337 
     | 
    
         
             
                        [0.762, nil,   0.660, -1],
         
     | 
| 
       337 
338 
     | 
    
         
             
                        [0.705, 0.655, 0.611, 0.388]].flatten
         
     | 
| 
       338 
339 
     | 
    
         
             
                  .zip(model.hash_to_grid(solver.value).flatten)
         
     | 
| 
       339 
     | 
    
         
            -
                  .all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 }
         
     | 
| 
      
 340 
     | 
    
         
            +
                  .all? { |x, y| (x.nil? && y.nil?) || (x - y).abs < 5e-4 })
         
     | 
| 
       340 
341 
     | 
    
         
             
              end
         
     | 
| 
       341 
342 
     | 
    
         | 
| 
       342 
343 
     | 
    
         
             
              def test_aima_grid_2
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: finite_mdp
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.4.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - John Lees-Miller
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date:  
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2017-12-18 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: narray
         
     | 
| 
         @@ -44,14 +44,14 @@ dependencies: 
     | 
|
| 
       44 
44 
     | 
    
         
             
                requirements:
         
     | 
| 
       45 
45 
     | 
    
         
             
                - - "~>"
         
     | 
| 
       46 
46 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       47 
     | 
    
         
            -
                    version: 0. 
     | 
| 
      
 47 
     | 
    
         
            +
                    version: 0.52.0
         
     | 
| 
       48 
48 
     | 
    
         
             
              type: :development
         
     | 
| 
       49 
49 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       50 
50 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       51 
51 
     | 
    
         
             
                requirements:
         
     | 
| 
       52 
52 
     | 
    
         
             
                - - "~>"
         
     | 
| 
       53 
53 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       54 
     | 
    
         
            -
                    version: 0. 
     | 
| 
      
 54 
     | 
    
         
            +
                    version: 0.52.0
         
     | 
| 
       55 
55 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       56 
56 
     | 
    
         
             
              name: simplecov
         
     | 
| 
       57 
57 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
         @@ -96,7 +96,7 @@ rdoc_options: 
     | 
|
| 
       96 
96 
     | 
    
         
             
            - "--main"
         
     | 
| 
       97 
97 
     | 
    
         
             
            - README.rdoc
         
     | 
| 
       98 
98 
     | 
    
         
             
            - "--title"
         
     | 
| 
       99 
     | 
    
         
            -
            - finite_mdp-0. 
     | 
| 
      
 99 
     | 
    
         
            +
            - finite_mdp-0.4.0 Documentation
         
     | 
| 
       100 
100 
     | 
    
         
             
            require_paths:
         
     | 
| 
       101 
101 
     | 
    
         
             
            - lib
         
     | 
| 
       102 
102 
     | 
    
         
             
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
         @@ -111,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       111 
111 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       112 
112 
     | 
    
         
             
            requirements: []
         
     | 
| 
       113 
113 
     | 
    
         
             
            rubyforge_project: finite_mdp
         
     | 
| 
       114 
     | 
    
         
            -
            rubygems_version: 2. 
     | 
| 
      
 114 
     | 
    
         
            +
            rubygems_version: 2.6.13
         
     | 
| 
       115 
115 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       116 
116 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       117 
117 
     | 
    
         
             
            summary: Solve small, finite Markov Decision Process models.
         
     |