RubyGems - spatial_stats - Versions diffs - 1.0.0 → 1.0.1 - Mend

spatial_stats 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +23 -15
data/lib/spatial_stats/local/multivariate_geary.rb +22 -18
data/lib/spatial_stats/local/stat.rb +54 -82
data/lib/spatial_stats/version.rb +1 -1
data/lib/spatial_stats/weights/weights_matrix.rb +13 -0
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '08787d83d402843b9d711fd88cdc5f22cbdacfb7a960d8bdb185591f10215f85'
-  data.tar.gz: 96241b1ef7099ce6371f24dee56258121e0d6686ec746ac0411c4ed8ccaf5cad
+  metadata.gz: 3d63578b1952570f2de9a3ab045c8a977988960c2942c73416b86de6b228a091
+  data.tar.gz: 591927bf3c2fd3d7723b7e155bbae6a2c73313b45c749c6ab552e445e5f3b609
 SHA512:
-  metadata.gz: 4ec63da930afeae2b68e3f821e82c8c18f18b11ffa3296ad34e56706b54db11170e5319133d84e9440360e984776cdc5adbd6193d7de252f6f618136a431696b
-  data.tar.gz: 13964eb6a903e5ba0881b0ea2dfeab5e10fcfe51722d9e8a3342f2084200a60bc046d7375e29f651a5267d04a7b9b3a571f26b1ecf13ebf1c3fa2a3211ac6d8c
+  metadata.gz: dfe901b03685c5d66539793d7446738c49b0cdaae3022559544620962aa9d63e4d668d0177b8dc3c44cff797ce5c8ebed8fdeba87a52ec28ebdf0d02b69625b3
+  data.tar.gz: 3cfde56bb984bfbf3ab83bb89e02797a24eb47b1a8387e695890127ee5711f185a114f300906f96728c0e51996f223091f0a9455713f8fb3c3ff34b8285615da

data/README.md CHANGED

@@ -58,8 +58,6 @@ weights = SpatialStats::Weights::Distant.idw_knn(scope, :geom, 5)
 Weight matrices can be defined by a hash that describes each key's neighbor and weight.
-Note: Currently, the keys must be numeric.
 Example: Define WeightsMatrix and get the matrix in row_standardized format.
 ```ruby
@@ -70,30 +68,40 @@ weights = {
     4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
 }
 keys = weights.keys
-wm = SpatialStats::Weights::WeightsMatrix.new(keys, weights)
+wm = SpatialStats::Weights::WeightsMatrix.new(weights)
 #  => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>1}, {:id=>4, :weight=>1}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>1}, {:id=>3, :weight=>1}]}, @n=4>
-wm.standardized
-#  => Numo::DFloat#shape=[4,4]
-#[[0, 0.5, 0, 0.5],
-# [1, 0, 0, 0],
-# [0, 0, 0, 1],
-# [0.5, 0, 0.5, 0]]
+wm = wm.standardize
+#  => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>0.5}, {:id=>4, :weight=>0.5}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>0.5}, {:id=>3, :weight=>0.5}]}, @n=4>
+wm.dense
+# => Numo::DFloat[
+#    [0, 0.5, 0, 0.5],
+#    [1, 0, 0, 0],
+#    [0, 0, 0, 1],
+#    [0.5, 0, 0.5, 0]
+#   ]
+wm.sparse
+# => #<SpatialStats::Weights::CSRMatrix @m=4, @n=4, @nnz=6>
 ```
 ### Lagged Variables
-Spatially lagged variables can be computed with a 2-D n x n `Numo::NArray` and 1-D vector (`Array` or `Numo::NArray`).
+Spatially lagged variables can be computed with weights matrix and 1-D vector (`Array`).
 #### Compute a Lagged Variable
 ```ruby
-w = Numo::DFloat[[0, 0.5, 0, 0.5],
-                 [1, 0, 0, 0],
-                 [0, 0, 0, 1],
-                 [0.5, 0, 0.5, 0]]
+weights = {
+    1 => [{ id: 2, weight: 1 }, { id: 4, weight: 1 }],
+    2 => [{ id: 1, weight: 1 }],
+    3 => [{ id: 4, weight: 1 }],
+    4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
+}
+wm = SpatialStats::Weights::WeightsMatrix.new(weights).standardize
 vec = [1, 2, 3, 4]
-lagged_var = SpatialStats::Utils::Lag.neighbor_sum(w, vec)
+lagged_var = SpatialStats::Utils::Lag.neighbor_sum(wm, vec)
 # => [3.0, 1.0, 4.0, 2.0]
 ```

data/lib/spatial_stats/local/multivariate_geary.rb CHANGED

@@ -60,35 +60,39 @@ module SpatialStats
         # of indices, which will return a list of new orders for the fields.
         # They will then be shuffled corresponding to the new indices.
         rng = gen_rng(seed)
-        n = w.shape[0]
-        indices = (0..(n - 1)).to_a
-        shuffles = crand(indices, permutations, rng)
+        rids = crand(permutations, rng)
+        n_1 = weights.n - 1
+        sparse = weights.sparse
+        row_index = sparse.row_index
+        ws = sparse.values
+        wc = weights.wc
         stat_orig = stat
-        rs = [0] * n
-        row_index = weights.sparse.row_index
-        ws = weights.sparse.values
-        idx = 0
-        while idx < n
-          stat_i_orig = stat_orig[idx]
+        ids = (0..n_1).to_a
+        observations = Array.new(weights.n)
+        (0..n_1).each do |idx|
+          idsi = ids.dup
+          idsi.delete_at(idx)
+          idsi.shuffle!(random: rng)
+          idsi = Numo::Int32.cast(idsi)
+          sample = rids[idsi[rids[true, 0..wc[idx] - 1]]]
+          # account for case where there are no neighbors
           row_range = row_index[idx]..(row_index[idx + 1] - 1)
           if row_range.size.zero?
-            rs[idx] = permutations
-            idx += 1
+            observations[idx] = permutations
             next
           end
-          wi = Numo::DFloat.cast(ws[row_range])
-          # for each field, compute the C value at that index.
-          stat_i_new = mc_i(wi, shuffles[idx], idx)
-          rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new, permutations)
-          idx += 1
+          wi = Numo::DFloat.cast(ws[row_range])
+          stat_i_new = mc_i(wi, sample, idx)
+          stat_i_orig = stat_orig[idx]
+          observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
+                                                  permutations)
         end
-        rs.map do |ri|
+        observations.map do |ri|
           (ri + 1.0) / (permutations + 1.0)
         end
       end

data/lib/spatial_stats/local/stat.rb CHANGED

@@ -42,12 +42,12 @@ module SpatialStats
       ##
       # Conditional randomization algorithm used in permutation testing.
-      # Outputs an array of length n of Numo::DFloat matrices of
-      # size m x num_neighbors. Where m is the number of permutations and
-      # num_neighbors is the number of neighbors for that observation.
+      # Returns a matrix with permuted index values that will be used for
+      # selecting values from the original data set.
       #
-      # The values are randomly permutated values from arr that will act
-      # as its neighbors for that permutation.
+      # The width of the matrix is the max number of neighbors + 1
+      # which is way less than it would be if the original vector
+      # was shuffled in full.
       #
       # This is super important because most weight matrices are very
       # sparse so the amount of shuffling/multiplication that is done
@@ -55,9 +55,9 @@ module SpatialStats
       #
       # @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
       #
-      # @return [Array] of Numo::Narray matrices
+      # @return [Numo::Int32] matrix of shape perms x wc_max + 1
       #
-      def crand(arr, permutations, rng)
+      def crand(permutations, rng)
         # basing this off the ESDA method
         # need to get k for max_neighbors
         # and wc for cardinalities of each item
@@ -68,32 +68,13 @@ module SpatialStats
         # entry not the entire list of permutations for each entry.
         n_1 = weights.n - 1
-        sparse = weights.sparse
-        row_index = sparse.row_index
         # weight counts
-        wc = Array.new(weights.n)
-        k = 0
-        (0..n_1).each do |idx|
-          wc[idx] = row_index[idx + 1] - row_index[idx]
-        end
+        wc = weights.wc
         k = wc.max + 1
         prange = (0..permutations - 1).to_a
-        arr = Numo::DFloat.cast(arr)
-        ids = (0..n_1).to_a
         ids_perm = (0..n_1 - 1).to_a
-        rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
-        (0..n_1).map do |idx|
-          idsi = ids.dup
-          idsi.delete_at(idx)
-          idsi.shuffle!(random: rng)
-          idsi = Numo::Int32.cast(idsi)
-          arr[idsi[rids[true, 0..wc[idx] - 1]]]
-        end
+        Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
       end
       ##
@@ -114,48 +95,40 @@ module SpatialStats
         # its neighbors. Then we will only test for that item instead
         # of the entire set. This will be done for each item.
         rng = gen_rng(seed)
-        shuffles = crand(x, permutations, rng)
+        rids = crand(permutations, rng)
-        n = weights.n
-        # r is the number of equal to or more extreme samples
+        n_1 = weights.n - 1
+        sparse = weights.sparse
+        row_index = sparse.row_index
+        ws = sparse.values
+        wc = weights.wc
         stat_orig = stat
-        rs = [0] * n
-        row_index = weights.sparse.row_index
-        ws = weights.sparse.values
-        idx = 0
-        while idx < n
-          # need to truncate because floats from
-          # c in sparse matrix are inconsistent with
-          # dfloats
-          stat_i_orig = stat_orig[idx]
+        arr = Numo::DFloat.cast(x)
+        ids = (0..n_1).to_a
+        observations = Array.new(weights.n)
+        (0..n_1).each do |idx|
+          idsi = ids.dup
+          idsi.delete_at(idx)
+          idsi.shuffle!(random: rng)
+          idsi = Numo::Int32.cast(idsi)
+          sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
           # account for case where there are no neighbors
-          # the way Numo handles negative ranges, it returns the max
-          # so there will be a len 0 z array being multiplied by a
-          # max_neighbor width permutation matrix.
-          # Need to skip.
           row_range = row_index[idx]..(row_index[idx + 1] - 1)
           if row_range.size.zero?
-            rs[idx] = permutations
-            idx += 1
+            observations[idx] = permutations
             next
           end
-          wi = Numo::DFloat.cast(ws[row_range])
-          stat_i_new = mc_i(wi, shuffles[idx], idx)
-          rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
-                                        permutations)
-          # rs[idx] = if stat_i_orig.positive?
-          #             (stat_i_new >= stat_i_orig).count
-          #           else
-          #             (stat_i_new <= stat_i_orig).count
-          #           end
-          idx += 1
+          wi = Numo::DFloat.cast(ws[row_range])
+          stat_i_new = mc_i(wi, sample, idx)
+          stat_i_orig = stat_orig[idx]
+          observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
+                                                  permutations)
         end
-        rs.map do |ri|
+        observations.map do |ri|
           (ri + 1.0) / (permutations + 1.0)
         end
       end
@@ -174,41 +147,40 @@ module SpatialStats
       # @return [Array] of p-values
       def mc_bv(permutations, seed)
         rng = gen_rng(seed)
-        shuffles = crand(y, permutations, rng)
-        n = weights.n
+        rids = crand(permutations, rng)
+        n_1 = weights.n - 1
+        sparse = weights.sparse
+        row_index = sparse.row_index
+        ws = sparse.values
+        wc = weights.wc
         stat_orig = stat
-        rs = [0] * n
-        row_index = weights.sparse.row_index
-        ws = weights.sparse.values
-        idx = 0
-        while idx < n
-          stat_i_orig = stat_orig[idx]
+        arr = Numo::DFloat.cast(y)
+        ids = (0..n_1).to_a
+        observations = Array.new(weights.n)
+        (0..n_1).each do |idx|
+          idsi = ids.dup
+          idsi.delete_at(idx)
+          idsi.shuffle!(random: rng)
+          idsi = Numo::Int32.cast(idsi)
+          sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
+          # account for case where there are no neighbors
           row_range = row_index[idx]..(row_index[idx + 1] - 1)
           if row_range.size.zero?
-            rs[idx] = permutations
-            idx += 1
+            observations[idx] = permutations
             next
           end
-          wi = Numo::DFloat.cast(ws[row_range])
-          stat_i_new = mc_i(wi, shuffles[idx], idx)
-          rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
-                                        permutations)
-          # if stat_i_orig.positive?
-          #             (stat_i_new >= stat_i_orig).count
-          #           else
-          #             (stat_i_new <= stat_i_orig).count
-          #           end
-          idx += 1
+          wi = Numo::DFloat.cast(ws[row_range])
+          stat_i_new = mc_i(wi, sample, idx)
+          stat_i_orig = stat_orig[idx]
+          observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
+                                                  permutations)
         end
-        rs.map do |ri|
+        observations.map do |ri|
           (ri + 1.0) / (permutations + 1.0)
         end
       end

data/lib/spatial_stats/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SpatialStats
-  VERSION = '1.0.0'
+  VERSION = '1.0.1'
 end

data/lib/spatial_stats/weights/weights_matrix.rb CHANGED

@@ -58,6 +58,19 @@ module SpatialStats
         @sparse ||= CSRMatrix.new(dense.to_a.flatten, n, n)
       end
+      ##
+      # Compute the cardinalities of each neighbor into an array
+      #
+      # @return [Array]
+      def wc
+        @wc ||= begin
+          row_index = sparse.row_index
+          (0..n - 1).map do |idx|
+            row_index[idx + 1] - row_index[idx]
+          end
+        end
+      end
       ##
       # Row standardized version of the weights matrix.
       # Will return a new version of the weights matrix with standardized

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: spatial_stats
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
 platform: ruby
 authors:
 - Keith Doggett
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-04-27 00:00:00.000000000 Z
+date: 2020-05-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray