RubyGems - spatial_stats - Versions diffs - 0.1.1 → 0.2.1 - Mend

spatial_stats 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/README.md +185 -9
data/lib/spatial_stats.rb +7 -4
data/lib/spatial_stats/enumerable_ext.rb +29 -0
data/lib/spatial_stats/global.rb +15 -0
data/lib/spatial_stats/global/bivariate_moran.rb +48 -4
data/lib/spatial_stats/global/moran.rb +69 -19
data/lib/spatial_stats/global/stat.rb +29 -17
data/lib/spatial_stats/local.rb +16 -1
data/lib/spatial_stats/local/bivariate_moran.rb +45 -4
data/lib/spatial_stats/local/geary.rb +34 -47
data/lib/spatial_stats/local/getis_ord.rb +109 -0
data/lib/spatial_stats/local/moran.rb +55 -22
data/lib/spatial_stats/local/multivariate_geary.rb +77 -22
data/lib/spatial_stats/local/stat.rb +160 -88
data/lib/spatial_stats/narray_ext.rb +27 -0
data/lib/spatial_stats/queries.rb +6 -0
data/lib/spatial_stats/queries/variables.rb +16 -3
data/lib/spatial_stats/queries/weights.rb +91 -9
data/lib/spatial_stats/utils.rb +7 -0
data/lib/spatial_stats/utils/lag.rb +34 -2
data/lib/spatial_stats/version.rb +1 -1
data/lib/spatial_stats/weights.rb +9 -0
data/lib/spatial_stats/weights/contiguous.rb +18 -0
data/lib/spatial_stats/weights/distant.rb +41 -4
data/lib/spatial_stats/weights/weights_matrix.rb +25 -0
metadata +5 -4
data/lib/spatial_stats/local/g.rb +0 -75

data/lib/spatial_stats/global/stat.rb CHANGED

@@ -2,8 +2,12 @@
 module SpatialStats
   module Global
+    ##
+    # Stat is the abstract base class for global stats.
+    # It defines the methods that are common between all classes
+    # and will raise a NotImplementedError on those that are specific
+    # for each type of statistic.
     class Stat
-      # Base class for global stats
       def initialize(scope, field, weights)
         @scope = scope
         @field = field
@@ -11,10 +15,14 @@ module SpatialStats
       end
       attr_accessor :scope, :field, :weights
-      def i
-        raise NotImplementedError, 'method i not defined'
+      def stat
+        raise NotImplementedError, 'method stat not defined'
       end
+      ##
+      # The expected value of +#stat+
+      #
+      # @return [Float]
       def expectation
         raise NotImplementedError, 'method expectation not implemented'
       end
@@ -23,8 +31,12 @@ module SpatialStats
         raise NotImplementedError, 'method variance not implemented'
       end
+      ##
+      # Z-score of the statistic.
+      #
+      # @return [Float] the number of deviations from the mean
       def z_score
-        (i - expectation) / Math.sqrt(variance)
+        (stat - expectation) / Math.sqrt(variance)
       end
       def mc(permutations, seed)
@@ -35,17 +47,17 @@ module SpatialStats
         end
         # r is the number of equal to or more extreme samples
         # one sided
-        i_orig = i
+        stat_orig = stat
         r = 0
         shuffles.each do |shuffle|
-          stat = self.class.new(@scope, @field, @weights)
-          stat.x = shuffle
+          klass = self.class.new(@scope, @field, @weights)
+          klass.x = shuffle
           # https://geodacenter.github.io/glossary.html#ppvalue
-          if i_orig.positive?
-            r += 1 if stat.i >= i_orig
+          if stat_orig.positive?
+            r += 1 if klass.stat >= stat_orig
           else
-            r += 1 if stat.i <= i_orig
+            r += 1 if klass.stat <= stat_orig
           end
         end
@@ -61,17 +73,17 @@ module SpatialStats
         end
         # r is the number of equal to or more extreme samples
-        i_orig = i
+        stat_orig = stat
         r = 0
         shuffles.each do |shuffle|
-          stat = self.class.new(@scope, @x_field, @y_field, @weights)
-          stat.x = x
-          stat.y = shuffle
+          klass = self.class.new(@scope, @x_field, @y_field, @weights)
+          klass.x = x
+          klass.y = shuffle
-          if i_orig.positive?
-            r += 1 if stat.i >= i_orig
+          if stat_orig.positive?
+            r += 1 if klass.stat >= stat_orig
           else
-            r += 1 if stat.i <= i_orig
+            r += 1 if klass.stat <= stat_orig
           end
         end

data/lib/spatial_stats/local.rb CHANGED

@@ -2,7 +2,22 @@
 require 'spatial_stats/local/stat'
 require 'spatial_stats/local/bivariate_moran'
-require 'spatial_stats/local/g'
 require 'spatial_stats/local/geary'
+require 'spatial_stats/local/getis_ord'
 require 'spatial_stats/local/moran'
 require 'spatial_stats/local/multivariate_geary'
+module SpatialStats
+  ##
+  # The Local module provides functionality for local spatial statistics.
+  # Local spatial statistics describe each location in the dataset with a value,
+  # like how similar or dissimilar each area is to its neighbors.
+  #
+  # All local classes define a +stat+ method that returns the described
+  # statistic and an +mc+ method that runs a permutation test determine a
+  # pseudo p-value for the statistic. Some also define +variance+ and
+  # +z_score+  methods that can be used to calculate p-values if the
+  # distribution is known.
+  module Local
+  end
+end

data/lib/spatial_stats/local/bivariate_moran.rb CHANGED

@@ -2,7 +2,19 @@
 module SpatialStats
   module Local
+    ##
+    # BivariateMoran computes the local correlation between a variable x and
+    # spatially lagged variable y.
     class BivariateMoran < Stat
+      ##
+      # A new instance of BivariateMoran
+      #
+      # @param [ActiveRecord::Relation] scope
+      # @param [Symbol, String] x_field to query from scope
+      # @param [Symbol, String] y_field to query from scope
+      # @param [WeightsMatrix] weights to define relationship between observations in scope
+      #
+      # @return [Moran]
       def initialize(scope, x_field, y_field, weights)
         @scope = scope
         @x_field = x_field
@@ -10,18 +22,42 @@ module SpatialStats
         @weights = weights
       end
       attr_accessor :scope, :x_field, :y_field, :weights
-      attr_writer :x, :y
-      def i
+      ##
+      # Computes the local indicator of spatial correlation for
+      # x against lagged y.
+      #
+      # @return [Array] of correlations for each observation.
+      def stat
         x.each_with_index.map do |_xi, idx|
-          i_i(idx)
+          stat_i(idx)
         end
       end
+      alias i stat
-      def i_i(idx)
+      ##
+      # Computes Bivariate Moran's I at a single index. Multiplies x at
+      # this index by the lagged y value at this index.
+      #
+      # @param [Integer] idx to perfrom the calculation on
+      #
+      # @return [Float] correlation at idx
+      def stat_i(idx)
         x[idx] * y_lag[idx]
       end
+      ##
+      # Permutation test to determine a pseudo p-values of the +#stat+ method.
+      # Shuffles y values, hold x values, recomputes +#stat+ for each variation,
+      # then compares to the computed one. The ratio of more extreme values to
+      # permutations is returned for each observation.
+      #
+      # @see https://geodacenter.github.io/glossary.html#perm
+      #
+      # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
+      # @param [Integer] seed used in random number generator for shuffles.
+      #
+      # @return [Array] of p-values
       def mc(permutations = 99, seed = nil)
         mc_bv(permutations, seed)
       end
@@ -38,6 +74,11 @@ module SpatialStats
       private
+      def mc_i(wi, perms, idx)
+        y_lag_i = (wi * perms).sum(1)
+        x[idx] * y_lag_i
+      end
       def y_lag
         @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
       end

data/lib/spatial_stats/local/geary.rb CHANGED

@@ -2,71 +2,58 @@
 module SpatialStats
   module Local
+    ##
+    # Geary's C statistic computes the spatial lag of the difference between
+    # variable zi and it's neighbors squared, in the set z. The local version
+    # returns a value for each entry.
     class Geary < Stat
+      ##
+      # A new instance of Geary
+      #
+      # @param [ActiveRecord::Relation] scope
+      # @param [Symbol, String] field to query from scope
+      # @param [WeightsMatrix] weights to define relationship between observations in scope
+      #
+      # @return [Geary]
       def initialize(scope, field, weights)
         super(scope, field, weights)
       end
-      attr_writer :x
-      def i
+      ##
+      # Computes Geary's C for every observation in the +scoe+.
+      # Geary's C is defined as the square distance between
+      # an observation and it's neighbors, factored to their weights.
+      #
+      # @return [Array] the C value for each observation
+      def stat
         z.each_with_index.map do |_zi, idx|
-          i_i(idx)
+          stat_i(idx)
         end
       end
+      alias c stat
-      def i_i(idx)
-        n = w.shape[0]
-        sum = 0
-        (0..n - 1).each do |j|
-          sum += w[idx, j] * ((z[idx] - z[j])**2)
-        end
-        sum
-      end
+      ##
+      # Values of the +field+ queried from the +scope+
+      #
+      # @return [Array]
       def x
         @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
                                                .standardize
       end
       alias z x
-      def mc(permutations = 99, seed = nil)
-        # For local tests, we need to shuffle the values
-        # but for each item, hold its value in place and shuffle
-        # its neighbors. Then we will only test for that item instead
-        # of the entire set. This will be done for each item.
-        rng = gen_rng(seed)
-        shuffles = crand(x, permutations, rng)
-        # r is the number of equal to or more extreme samples
-        i_orig = i
-        rs = [0] * i_orig.size
-        shuffles.each_with_index do |perms, idx|
-          ii_orig = i_orig[idx]
-          perms.each do |perm|
-            stat = self.class.new(scope, field, weights)
-            stat.x = perm
-            ii_new = stat.i_i(idx)
-            # https://geodacenter.github.io/glossary.html#ppvalue
-            # NOTE: this is inconsistent with the output from GeoDa
-            # for local permutation tests, they seem to use greater than
-            # not greater than or equal to. I'm going to go by the definition
-            # in the glossary for now.
-            if ii_orig.positive?
-              rs[idx] += 1 if ii_new >= ii_orig
-            else
-              rs[idx] += 1 if ii_new <= ii_orig
-            end
-          end
-        end
+      private
-        rs.map do |ri|
-          (ri + 1.0) / (permutations + 1.0)
-        end
+      def stat_i(idx)
+        zs = Numo::DFloat.cast(z)
+        zi = (z[idx] - zs)**2
+        (w[idx, true] * zi).sum
       end
-      private
+      def mc_i(wi, perms, idx)
+        zi = (z[idx] - perms)**2
+        (wi * zi).sum(1)
+      end
       def w
         @w ||= weights.full.row_standardized

data/lib/spatial_stats/local/getis_ord.rb ADDED

@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+module SpatialStats
+  module Local
+    ##
+    # GetisOrd's G and G* statistics compute the spatial autocorrelation of a
+    # variable, x. G computes the ratio of spatially lagged x to the sum of all
+    # other x's except xi for every entry. G* does the same calculation but
+    # includes xi in the spatial lag and denominator.
+    class GetisOrd < Stat
+      ##
+      # A new instance of GetisOrd
+      #
+      # @param [ActiveRecord::Relation] scope
+      # @param [Symbol, String] field to query from scope
+      # @param [WeightsMatrix] weights to define relationship between observations in scope
+      #
+      # @return [GetisOrd]
+      def initialize(scope, field, weights, star = nil)
+        super(scope, field, weights)
+        @star = star
+      end
+      attr_accessor :star
+      ##
+      # Computes the G or G* statistic for every observation in x.
+      #
+      # @return [Array] of autocorrelations for each observation.
+      def stat
+        x.each_with_index.map do |_x_val, idx|
+          stat_i(idx)
+        end
+      end
+      alias g stat
+      ##
+      # Values of the +field+ queried from the +scope+
+      #
+      # @return [Array]
+      def x
+        @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
+      end
+      alias z x
+      ##
+      # True if G* is being used, false if G is being used.
+      # If no value is passed in the constructor, it will be determined
+      # based off of the trace of the weights.
+      #
+      # @return [Boolean] of star
+      def star?
+        if @star.nil?
+          @star = weights.full.trace.positive?
+        else
+          @star
+        end
+      end
+      private
+      def stat_i(idx)
+        x_lag[idx] / denominators[idx]
+      end
+      def mc_i(wi, perms, idx)
+        x_lag_i = (wi * perms).sum(1)
+        x_lag_i / denominators[idx]
+      end
+      def w
+        @w ||= begin
+          if star?
+            weights.full.windowed.row_standardized
+          else
+            weights.standardized
+          end
+        end
+      end
+      def z_lag
+        # window if star is true
+        @z_lag ||= begin
+          if star?
+            SpatialStats::Utils::Lag.window_sum(w, x)
+          else
+            SpatialStats::Utils::Lag.neighbor_sum(w, x)
+          end
+        end
+      end
+      alias x_lag z_lag
+      def denominators
+        @denominators ||= begin
+          n = w.shape[0]
+          if star?
+            [x.sum] * n
+          else
+            # add everything but i
+            (0..n - 1).each.map do |idx|
+              terms = x.dup
+              terms.delete_at(idx)
+              terms.sum
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/spatial_stats/local/moran.rb CHANGED

@@ -1,36 +1,44 @@
 # frozen_string_literal: true
-# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
-# For now, instead of doing neighbor's variance (Si**2), I'm going to use
-# the total sample variance. This is how GeoDa does it, but is different
-# than arcgis. This shouldn't affect the expectation and variance of I.
 module SpatialStats
   module Local
+    ##
+    # Moran's I statistic computes the spatial autocorrelation of variable x.
+    # It does this by computing a spatially lagged version of itself and
+    # comparing that with each observation based on the weights matrix.
+    # The local version returns the spatial autocorrelation for each
+    # observation in the dataset.
     class Moran < Stat
+      ##
+      # A new instance of Moran
+      #
+      # @param [ActiveRecord::Relation] scope
+      # @param [Symbol, String] field to query from scope
+      # @param [WeightsMatrix] weights to define relationship between observations in scope
+      #
+      # @return [Moran]
       def initialize(scope, field, weights)
         super(scope, field, weights)
-        @scope = scope
-        @field = field
-        @weights = weights
       end
-      attr_writer :x, :z_lag
-      def i
+      ##
+      # Computes the local indicator of spatial autocorrelation (lisa) for
+      # x against lagged x.
+      #
+      # @return [Array] of autocorrelations for each observation.
+      def stat
         z.each_with_index.map do |_z_val, idx|
-          i_i(idx)
+          stat_i(idx)
         end
       end
-      def i_i(idx)
-        # method to compute i at a single index.
-        # this is important for permutation testing
-        # because for each test we only want the result from
-        # 1 index not the entire set, so this will save lots of
-        # computations.
-        sum_term = z_lag[idx]
-        (z[idx] / si2) * sum_term
-      end
+      alias i stat
+      ##
+      # Expected value of I for each observation. Since the weights matrix
+      # is standardized during the calculation, the expectation is the same for
+      # each observation.
+      #
+      # @return [Float]
       def expectation
         # since we are using row standardized weights, the expectation
         # will just be -1/(n-1) for all items. Otherwise, it would be
@@ -39,6 +47,12 @@ module SpatialStats
         -1.0 / (@weights.n - 1)
       end
+      ##
+      # Variance of I for each observation.
+      #
+      # @see https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
+      #
+      # @return [Array] of variances for each observation
       def variance
         # formula is A - B - (E[I])**2
         wt = w.row_standardized
@@ -54,14 +68,21 @@ module SpatialStats
         vars
       end
+      ##
+      # Values of the +field+ queried from the +scope+
+      #
+      # @return [Array]
       def x
         @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
                                                .standardize
       end
       alias z x
+      ##
+      # Spatially lagged x variable at each observation.
+      #
+      # @return [Array]
       def z_lag
-        # can't memoize yet because of mc testing
         # w is already row_standardized, so we are using
         # neighbor sum instead of neighbor_average to save cost
         @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
@@ -69,6 +90,18 @@ module SpatialStats
       private
+      def stat_i(idx)
+        sum_term = z_lag[idx]
+        (z[idx] / si2) * sum_term
+      end
+      def mc_i(wi, perms, idx)
+        # compute i for a single index given DFloat of neighbor weights
+        # and DFloat of neighbor z perms
+        z_lag_i = (wi * perms).sum(1)
+        z[idx] * z_lag_i
+      end
       def si2
         # @si2 ||= z.sample_variance
         # we standardize so sample_variance is 1