RubyGems - spatial_stats - Versions diffs - 0.2.2 → 1.0.4 - Mend

spatial_stats 0.2.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/README.md +126 -55
data/Rakefile +7 -0
data/ext/spatial_stats/csr_matrix.c +380 -0
data/ext/spatial_stats/csr_matrix.h +34 -0
data/ext/spatial_stats/extconf.rb +6 -0
data/ext/spatial_stats/spatial_stats.c +32 -0
data/lib/spatial_stats.rb +1 -0
data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
data/lib/spatial_stats/global/moran.rb +43 -36
data/lib/spatial_stats/global/stat.rb +55 -27
data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
data/lib/spatial_stats/local/geary.rb +35 -5
data/lib/spatial_stats/local/getis_ord.rb +45 -17
data/lib/spatial_stats/local/moran.rb +39 -9
data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
data/lib/spatial_stats/local/stat.rb +112 -80
data/lib/spatial_stats/narray_ext.rb +5 -5
data/lib/spatial_stats/spatial_stats.so +0 -0
data/lib/spatial_stats/utils.rb +25 -0
data/lib/spatial_stats/utils/lag.rb +10 -10
data/lib/spatial_stats/version.rb +1 -1
data/lib/spatial_stats/weights/contiguous.rb +20 -10
data/lib/spatial_stats/weights/distant.rb +38 -20
data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
metadata +33 -11
data/MIT-LICENSE +0 -20

data/lib/spatial_stats/local/stat.rb CHANGED

@@ -12,10 +12,25 @@ module SpatialStats
       def initialize(scope, field, weights)
         @scope = scope
         @field = field
-        @weights = weights
+        @weights = weights.standardize
       end
       attr_accessor :scope, :field, :weights
+      ##
+      # A new instance of Stat, from vector and weights.
+      #
+      # @param [Array] x observations of dataset
+      # @param [WeightsMatrix] weights to define relationships between observations
+      #
+      # @return [Stat]
+      def self.from_observations(x, weights)
+        raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
+        instance = new(nil, nil, weights.standardize)
+        instance.x = x
+        instance
+      end
       def stat
         raise NotImplementedError, 'method stat not defined'
       end
@@ -28,6 +43,15 @@ module SpatialStats
         raise NotImplementedError, 'method variance not implemented'
       end
+      def x=(values)
+        @x = values.standardize
+      end
+      alias z= x=
+      def y=(values)
+        @y = values.standardize
+      end
       ##
       # Z-score for each observation of the statistic.
       #
@@ -42,12 +66,12 @@ module SpatialStats
       ##
       # Conditional randomization algorithm used in permutation testing.
-      # Outputs an array of length n of Numo::DFloat matrices of
-      # size m x num_neighbors. Where m is the number of permutations and
-      # num_neighbors is the number of neighbors for that observation.
+      # Returns a matrix with permuted index values that will be used for
+      # selecting values from the original data set.
       #
-      # The values are randomly permutated values from arr that will act
-      # as its neighbors for that permutation.
+      # The width of the matrix is the max number of neighbors + 1
+      # which is way less than it would be if the original vector
+      # was shuffled in full.
       #
       # This is super important because most weight matrices are very
       # sparse so the amount of shuffling/multiplication that is done
@@ -55,42 +79,26 @@ module SpatialStats
       #
       # @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
       #
-      # @return [Array] of Numo::Narray matrices
+      # @return [Numo::Int32] matrix of shape perms x wc_max + 1
       #
-      def crand(arr, permutations, rng)
+      def crand(permutations, rng)
         # basing this off the ESDA method
         # need to get k for max_neighbors
         # and wc for cardinalities of each item
         # this returns an array of length n with
-        # (permutations x neighborz) Numo Arrays.
+        # (permutations x neighbors) Numo Arrays.
         # This helps reduce computation time because
         # we are only dealing with neighbors for each
         # entry not the entire list of permutations for each entry.
         n_1 = weights.n - 1
         # weight counts
-        wc = [0] * weights.n
-        k = 0
-        (0..n_1).each do |idx|
-          wc[idx] = (w[idx, true] > 0).count
-        end
+        wc = weights.wc
         k = wc.max + 1
         prange = (0..permutations - 1).to_a
-        arr = Numo::DFloat.cast(arr)
-        ids = (0..n_1).to_a
         ids_perm = (0..n_1 - 1).to_a
-        rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
-        (0..n_1).map do |idx|
-          idsi = ids.dup
-          idsi.delete_at(idx)
-          idsi.shuffle!(random: rng)
-          idsi = Numo::Int32.cast(idsi)
-          arr[idsi[rids[true, 0..wc[idx] - 1]]]
-        end
+        Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
       end
       ##
@@ -111,31 +119,40 @@ module SpatialStats
         # its neighbors. Then we will only test for that item instead
         # of the entire set. This will be done for each item.
         rng = gen_rng(seed)
-        shuffles = crand(x, permutations, rng)
-        n = weights.n
-        # r is the number of equal to or more extreme samples
-        stat_orig = stat
-        rs = [0] * n
-        ws = neighbor_weights
+        rids = crand(permutations, rng)
-        idx = 0
-        while idx < n
-          stat_i_orig = stat_orig[idx]
+        n_1 = weights.n - 1
+        sparse = weights.sparse
+        row_index = sparse.row_index
+        ws = sparse.values
+        wc = weights.wc
+        stat_orig = stat
-          wi = Numo::DFloat.cast(ws[idx])
-          stat_i_new = mc_i(wi, shuffles[idx], idx)
+        arr = Numo::DFloat.cast(x)
+        ids = (0..n_1).to_a
+        observations = Array.new(weights.n)
+        (0..n_1).each do |idx|
+          idsi = ids.dup
+          idsi.delete_at(idx)
+          idsi.shuffle!(random: rng)
+          idsi = Numo::Int32.cast(idsi)
+          sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
-          rs[idx] = if stat_i_orig.positive?
-                      (stat_i_new >= stat_i_orig).count
-                    else
-                      (stat_i_new <= stat_i_orig).count
-                    end
+          # account for case where there are no neighbors
+          row_range = row_index[idx]..(row_index[idx + 1] - 1)
+          if row_range.size.zero?
+            observations[idx] = permutations
+            next
+          end
-          idx += 1
+          wi = Numo::DFloat.cast(ws[row_range])
+          stat_i_new = mc_i(wi, sample, idx)
+          stat_i_orig = stat_orig[idx]
+          observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
+                                                  permutations)
         end
-        rs.map do |ri|
+        observations.map do |ri|
           (ri + 1.0) / (permutations + 1.0)
         end
       end
@@ -154,30 +171,40 @@ module SpatialStats
       # @return [Array] of p-values
       def mc_bv(permutations, seed)
         rng = gen_rng(seed)
-        shuffles = crand(y, permutations, rng)
-        n = weights.n
+        rids = crand(permutations, rng)
+        n_1 = weights.n - 1
+        sparse = weights.sparse
+        row_index = sparse.row_index
+        ws = sparse.values
+        wc = weights.wc
         stat_orig = stat
-        rs = [0] * n
-        ws = neighbor_weights
-        idx = 0
-        while idx < n
-          stat_i_orig = stat_orig[idx]
-          wi = Numo::DFloat.cast(ws[idx])
-          stat_i_new = mc_i(wi, shuffles[idx], idx)
+        arr = Numo::DFloat.cast(y)
+        ids = (0..n_1).to_a
+        observations = Array.new(weights.n)
+        (0..n_1).each do |idx|
+          idsi = ids.dup
+          idsi.delete_at(idx)
+          idsi.shuffle!(random: rng)
+          idsi = Numo::Int32.cast(idsi)
+          sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
-          rs[idx] = if stat_i_orig.positive?
-                      (stat_i_new >= stat_i_orig).count
-                    else
-                      (stat_i_new <= stat_i_orig).count
-                    end
+          # account for case where there are no neighbors
+          row_range = row_index[idx]..(row_index[idx + 1] - 1)
+          if row_range.size.zero?
+            observations[idx] = permutations
+            next
+          end
-          idx += 1
+          wi = Numo::DFloat.cast(ws[row_range])
+          stat_i_new = mc_i(wi, sample, idx)
+          stat_i_orig = stat_orig[idx]
+          observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
+                                                  permutations)
         end
-        rs.map do |ri|
+        observations.map do |ri|
           (ri + 1.0) / (permutations + 1.0)
         end
       end
@@ -200,8 +227,7 @@ module SpatialStats
       # @return [Array] of labels
       def quads
         # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
-        w = @weights.full
-        z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
+        z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, z)
         zp = z.map(&:positive?)
         lp = z_lag.map(&:positive?)
@@ -221,6 +247,22 @@ module SpatialStats
         end
       end
+      ##
+      # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
+      # in a hash array.
+      #
+      # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
+      # @param [Integer] seed used in random number generator for shuffles.
+      #
+      # @return [Array]
+      def summary(permutations = 99, seed = nil)
+        p_vals = mc(permutations, seed)
+        data = weights.keys.zip(stat, p_vals, groups)
+        data.map do |row|
+          { key: row[0], stat: row[1], p: row[2], group: row[3] }
+        end
+      end
       private
       def stat_i
@@ -231,8 +273,12 @@ module SpatialStats
         raise NotImplementedError, 'method mc_i not defined'
       end
+      def mc_observation_calc(_stat_i_orig, _stat_i_new, _permutations)
+        raise NotImplementedError, 'method mc_observation_calc not defined'
+      end
       def w
-        weights.standardized
+        @w ||= weights.dense
       end
       def gen_rng(seed = nil)
@@ -242,20 +288,6 @@ module SpatialStats
           Random.new
         end
       end
-      def neighbor_weights
-        # record the non-zero weights in variable length arrays for each
-        # row in the weights table
-        ws = [[]] * weights.n
-        (0..weights.n - 1).each do |idx|
-          neighbors = []
-          w[idx, true].each do |wij|
-            neighbors << wij if wij != 0
-          end
-          ws[idx] = neighbors
-        end
-        ws
-      end
     end
   end
 end

data/lib/spatial_stats/narray_ext.rb CHANGED

@@ -12,11 +12,11 @@ module Numo
     #
     # @ example
     #
-    #   Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardized
+    #   Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardize
     #   Numo::DFloat [[0, 0.5, 0.5], [0.33333, 0.33333, 0.33333]]
     #
     # @return [Numo::NArray]
-    def row_standardized
+    def row_standardize
       # every row will sum up to 1, or if they are all 0, do nothing
       standardized = each_over_axis.map do |row|
         sum = row.sum
@@ -38,16 +38,16 @@ module Numo
     #
     # @ example
     #
-    #   Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].windowed
+    #   Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].window
     #   Numo::DFloat [[1, 1, 0], [1, 1, 1], [0, 1, 1]]
     #
     # @ example
     #   # Input will be equivalent to output in this case
-    #   Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].windowed
+    #   Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].window
     #   Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]]
     #
     # @return [Numo::NArray]
-    def windowed
+    def window
       # in windowed calculations, the diagonal is set to 1
       # if trace (sum of diag) is 0, add it, else return input
       if trace.zero?

data/lib/spatial_stats/spatial_stats.so ADDED

Binary file

data/lib/spatial_stats/utils.rb CHANGED

@@ -6,5 +6,30 @@ module SpatialStats
   ##
   # The Utils module contains various utilities used in the gem.
   module Utils
+    ##
+    # Compute the false discovery rate (FDR) of a set of p-values given
+    # an alpha value.
+    #
+    # If there is no FDR available in the dataset, the Bonferroni Bound is
+    # returned instead.
+    #
+    # @param [Array] pvals from an mc test
+    # @param [Float] alpha value for the fdr
+    #
+    # @returns [Float] either the FDR or Bonferroni Bound
+    def self.fdr(pvals, alpha)
+      n = pvals.size
+      b_bound = alpha / n
+      pvals.sort!
+      p_val = b_bound
+      (0..n - 1).each do |i|
+        p_fdr = (i + 1) * b_bound
+        break unless pvals[i] <= p_fdr
+        p_val = p_fdr
+      end
+      p_val
+    end
   end
 end

data/lib/spatial_stats/utils/lag.rb CHANGED

@@ -11,36 +11,36 @@ module SpatialStats
       # Dot product of the row_standardized input matrix
       # by the input vector, variables.
       #
-      # @param [Numo::NArray] matrix 2-D square matrix.
+      # @param [WeightsMatrix] matrix holding target weights.
       # @param [Array] variables vector multiplying the matrix
       #
       # @return [Array] resultant vector
       def self.neighbor_average(matrix, variables)
-        matrix = matrix.row_standardized
+        matrix = matrix.standardize
         neighbor_sum(matrix, variables)
       end
       ##
       # Dot product of the input matrix by the input vector, variables.
       #
-      # @param [Numo::NArray] matrix 2-D square matrix.
+      # @param [WeightsMatrix] matrix holding target weights.
       # @param [Array] variables vector multiplying the matrix
       #
       # @return [Array] resultant vector
       def self.neighbor_sum(matrix, variables)
-        matrix.dot(variables).to_a
+        matrix.sparse.mulvec(variables)
       end
       ##
-      # Dot product of the input windowed, row standardizd matrix by
+      # Dot product of the input windowed, row standardized matrix by
       # the input vector, variables.
       #
-      # @param [Numo::NArray] matrix 2-D square matrix.
+      # @param [WeightsMatrix] matrix holding target weights.
       # @param [Array] variables vector multiplying the matrix
       #
       # @return [Array] resultant vector
       def self.window_average(matrix, variables)
-        matrix = matrix.windowed.row_standardized
+        matrix = matrix.window.standardize
         window_sum(matrix, variables)
       end
@@ -48,13 +48,13 @@ module SpatialStats
       # Dot product of the input windowed matrix by
       # the input vector, variables.
       #
-      # @param [Numo::NArray] matrix 2-D square matrix.
+      # @param [WeightsMatrix] matrix holding target weights.
       # @param [Array] variables vector multiplying the matrix
       #
       # @return [Array] resultant vector
       def self.window_sum(matrix, variables)
-        matrix = matrix.windowed
-        matrix.dot(variables).to_a
+        matrix = matrix.window
+        matrix.sparse.mulvec(variables)
       end
     end
   end

data/lib/spatial_stats/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SpatialStats
-  VERSION = '0.2.2'
+  VERSION = '1.0.4'
 end

data/lib/spatial_stats/weights/contiguous.rb CHANGED

@@ -15,21 +15,26 @@ module SpatialStats
       #
       # @return [WeightsMatrix]
       def self.rook(scope, field)
-        p_key = scope.primary_key
-        keys = scope.pluck(p_key).sort
         neighbors = SpatialStats::Queries::Weights
                     .rook_contiguity_neighbors(scope, field)
+        # get keys to make sure we have consistent dimensions when
+        # some entries don't have neighbors.
+        # define a new hash that has all the keys from scope
+        keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
         neighbors = neighbors.group_by(&:i_id)
+        missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
+        neighbors = neighbors.merge(missing_neighbors)
         weights = neighbors.transform_values do |value|
           value.map do |neighbor|
-            hash = neighbor.as_json(only: [:j_id]).symbolize_keys
+            hash = { id: neighbor[:j_id] }
             hash[:weight] = 1
             hash
           end
         end
-        SpatialStats::Weights::WeightsMatrix.new(keys, weights)
+        SpatialStats::Weights::WeightsMatrix.new(weights)
       end
       ##
@@ -40,21 +45,26 @@ module SpatialStats
       #
       # @return [WeightsMatrix]
       def self.queen(scope, field)
-        p_key = scope.primary_key
-        keys = scope.pluck(p_key).sort
         neighbors = SpatialStats::Queries::Weights
                     .queen_contiguity_neighbors(scope, field)
+        # get keys to make sure we have consistent dimensions when
+        # some entries don't have neighbors.
+        # define a new hash that has all the keys from scope
+        keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
         neighbors = neighbors.group_by(&:i_id)
+        missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
+        neighbors = neighbors.merge(missing_neighbors)
         weights = neighbors.transform_values do |value|
           value.map do |neighbor|
-            hash = neighbor.as_json(only: [:j_id]).symbolize_keys
+            hash = { id: neighbor[:j_id] }
             hash[:weight] = 1
             hash
           end
         end
-        SpatialStats::Weights::WeightsMatrix.new(keys, weights)
+        SpatialStats::Weights::WeightsMatrix.new(weights)
       end
     end
   end