RubyGems - spatial_stats - Versions diffs - 0.2.2 → 1.0.4 - Mend

spatial_stats 0.2.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/README.md +126 -55
data/Rakefile +7 -0
data/ext/spatial_stats/csr_matrix.c +380 -0
data/ext/spatial_stats/csr_matrix.h +34 -0
data/ext/spatial_stats/extconf.rb +6 -0
data/ext/spatial_stats/spatial_stats.c +32 -0
data/lib/spatial_stats.rb +1 -0
data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
data/lib/spatial_stats/global/moran.rb +43 -36
data/lib/spatial_stats/global/stat.rb +55 -27
data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
data/lib/spatial_stats/local/geary.rb +35 -5
data/lib/spatial_stats/local/getis_ord.rb +45 -17
data/lib/spatial_stats/local/moran.rb +39 -9
data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
data/lib/spatial_stats/local/stat.rb +112 -80
data/lib/spatial_stats/narray_ext.rb +5 -5
data/lib/spatial_stats/spatial_stats.so +0 -0
data/lib/spatial_stats/utils.rb +25 -0
data/lib/spatial_stats/utils/lag.rb +10 -10
data/lib/spatial_stats/version.rb +1 -1
data/lib/spatial_stats/weights/contiguous.rb +20 -10
data/lib/spatial_stats/weights/distant.rb +38 -20
data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
metadata +33 -11
data/MIT-LICENSE +0 -20

data/lib/spatial_stats/local/bivariate_moran.rb CHANGED

@@ -19,10 +19,28 @@ module SpatialStats
         @scope = scope
         @x_field = x_field
         @y_field = y_field
-        @weights = weights
+        @weights = weights.standardize
       end
       attr_accessor :scope, :x_field, :y_field, :weights
+      ##
+      # A new instance of BivariateMoran, from vector and weights.
+      #
+      # @param [Array] x observations of dataset
+      # @param [Array] y observations of dataset
+      # @param [WeightsMatrix] weights to define relationships between observations
+      #
+      # @return [BivariateMoran]
+      def self.from_observations(x, y, weights)
+        n = weights.n
+        raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
+        instance = new(nil, nil, nil, weights.standardize)
+        instance.x = x
+        instance.y = y
+        instance
+      end
       ##
       # Computes the local indicator of spatial correlation for
       # x against lagged y.
@@ -62,6 +80,61 @@ module SpatialStats
         mc_bv(permutations, seed)
       end
+      ##
+      # Determines what quadrant an observation is in. Based on its value
+      # compared to its neighbors. This does not work for all stats, since
+      # it requires that values be negative.
+      #
+      # In a standardized array of z, high values are values greater than 0
+      # and it's neighbors are determined by the spatial lag and if that is
+      # positive then it's neighbors would be high, low otherwise.
+      #
+      # Quadrants are:
+      # [HH] a high value surrounded by other high values
+      # [LH] a low value surrounded by high values
+      # [LL] a low value surrounded by low values
+      # [HL] a high value surrounded by low values
+      #
+      # @return [Array] of labels
+      def quads
+        # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
+        z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, y)
+        zp = x.map(&:positive?)
+        lp = z_lag.map(&:positive?)
+        # hh = zp & lp
+        # lh = zp ^ true & lp
+        # ll = zp ^ true & lp ^ true
+        # hl = zp next to lp ^ true
+        hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
+        lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
+        ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
+        hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
+        # now zip lists and map them to proper terms
+        quad_terms = %w[HH LH LL HL]
+        hh.zip(lh, ll, hl).map do |feature|
+          quad_terms[feature.index(true)]
+        end
+      end
+      alias groups quads
+      ##
+      # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
+      # in a hash array.
+      #
+      # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
+      # @param [Integer] seed used in random number generator for shuffles.
+      #
+      # @return [Array]
+      def summary(permutations = 99, seed = nil)
+        p_vals = mc(permutations, seed)
+        data = weights.keys.zip(stat, p_vals, groups)
+        data.map do |row|
+          { key: row[0], stat: row[1], p: row[2], group: row[3] }
+        end
+      end
       def x
         @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
                                                .standardize
@@ -79,8 +152,17 @@ module SpatialStats
         x[idx] * y_lag_i
       end
+      def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
+        # Since moran can be positive or negative, go by this definition
+        if stat_i_orig.positive?
+          (stat_i_new >= stat_i_orig).count
+        else
+          (stat_i_new <= stat_i_orig).count
+        end
+      end
       def y_lag
-        @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
+        @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, y)
       end
     end
   end

data/lib/spatial_stats/local/geary.rb CHANGED

@@ -32,6 +32,25 @@ module SpatialStats
       end
       alias c stat
+      ##
+      # Computes the groups each observation belongs to.
+      # Potential groups for Geary's C are:
+      # [HH] High-High
+      # [LL] Low-Low
+      # [N] Negative - Group traditionally for HL and LH, but since the difference is squared they are in the same group.
+      #
+      #
+      # @return [Array] groups for each observation
+      def groups
+        quads.map do |quad|
+          if %w[HL LH].include?(quad)
+            'N'
+          else
+            quad
+          end
+        end
+      end
       ##
       # Values of the +field+ queried from the +scope+
       #
@@ -45,9 +64,11 @@ module SpatialStats
       private
       def stat_i(idx)
-        zs = Numo::DFloat.cast(z)
-        zi = (z[idx] - zs)**2
-        (w[idx, true] * zi).sum
+        # TODO: maybe don't even use stat_i
+        # just form all of the modified zs and then
+        # pass it to a loop of mulvec all implemented in c ext
+        zi = z.map { |val| (z[idx] - val)**2 }
+        weights.sparse.dot_row(zi, idx)
       end
       def mc_i(wi, perms, idx)
@@ -55,8 +76,17 @@ module SpatialStats
         (wi * zi).sum(1)
       end
-      def w
-        @w ||= weights.full.row_standardized
+      def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
+        # Geary cannot be negative, so we have to use this technique from
+        # GeoDa to determine p values. Note I slightly modified it to be inclusive
+        # on both tails not just the lower tail.
+        # https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981        mean = stat_i_new.mean
+        mean = stat_i_new.mean
+        if stat_i_orig <= mean
+          (stat_i_new <= stat_i_orig).count
+        else
+          (stat_i_new >= stat_i_orig).count
+        end
       end
     end
   end

data/lib/spatial_stats/local/getis_ord.rb CHANGED

@@ -14,13 +14,18 @@ module SpatialStats
       # @param [ActiveRecord::Relation] scope
       # @param [Symbol, String] field to query from scope
       # @param [WeightsMatrix] weights to define relationship between observations in scope
+      # @param [Boolean] star to preset if star will be true or false. Will be calculated otherwise.
       #
       # @return [GetisOrd]
       def initialize(scope, field, weights, star = nil)
-        super(scope, field, weights)
+        @scope = scope
+        @field = field
+        @weights = weights
         @star = star
+        calc_weights
       end
       attr_accessor :star
+      attr_writer :x
       ##
       # Computes the G or G* statistic for every observation in x.
@@ -33,6 +38,25 @@ module SpatialStats
       end
       alias g stat
+      ##
+      # Computes the groups each observation belongs to.
+      # Potential groups for G are:
+      # [H] High
+      # [L] Low
+      #
+      # Group is high when standardized z is positive, low otherwise.
+      #
+      # @return [Array] groups for each observation
+      def groups
+        z.standardize.map do |val|
+          if val.positive?
+            'H'
+          else
+            'L'
+          end
+        end
+      end
       ##
       # Values of the +field+ queried from the +scope+
       #
@@ -50,7 +74,7 @@ module SpatialStats
       # @return [Boolean] of star
       def star?
         if @star.nil?
-          @star = weights.full.trace.positive?
+          @star = weights.dense.trace.positive?
         else
           @star
         end
@@ -67,25 +91,29 @@ module SpatialStats
         x_lag_i / denominators[idx]
       end
-      def w
-        @w ||= begin
-          if star?
-            weights.full.windowed.row_standardized
-          else
-            weights.standardized
-          end
+      def mc_observation_calc(stat_i_orig, stat_i_new, permutations)
+        # GetisOrd cannot be negative, so we have to use this technique from
+        # ESDA to determine if we should select p or 1-p.
+        # https://github.com/pysal/esda/blob/master/esda/getisord.py#L388
+        num_larger = (stat_i_new >= stat_i_orig).count
+        is_low = (permutations - num_larger) < num_larger
+        if is_low
+          permutations - num_larger
+        else
+          num_larger
         end
       end
+      def calc_weights
+        @weights = if star?
+                     weights.window.standardize
+                   else
+                     weights.standardize
+                   end
+      end
       def z_lag
-        # window if star is true
-        @z_lag ||= begin
-          if star?
-            SpatialStats::Utils::Lag.window_sum(w, x)
-          else
-            SpatialStats::Utils::Lag.neighbor_sum(w, x)
-          end
-        end
+        @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, x)
       end
       alias x_lag z_lag

data/lib/spatial_stats/local/moran.rb CHANGED

@@ -55,12 +55,11 @@ module SpatialStats
       # @return [Array] of variances for each observation
       def variance
         # formula is A - B - (E[I])**2
-        wt = w.row_standardized
         exp = expectation
         vars = []
-        a_terms = a_calc(wt)
-        b_terms = b_calc(wt)
+        a_terms = a_calc
+        b_terms = b_calc
         a_terms.each_with_index do |a_term, idx|
           vars << (a_term - b_terms[idx] - (exp**2))
@@ -68,6 +67,21 @@ module SpatialStats
         vars
       end
+      ##
+      # Computes the groups each observation belongs to.
+      # Potential groups for Moran's I are:
+      # [HH] High-High
+      # [HL] High-Low
+      # [LH] Low-High
+      # [LL] Low-Low
+      #
+      # This is the same as the +#quads+ method in the +Stat+ class.
+      #
+      # @return [Array] groups for each observation
+      def groups
+        quads
+      end
       ##
       # Values of the +field+ queried from the +scope+
       #
@@ -85,7 +99,7 @@ module SpatialStats
       def z_lag
         # w is already row_standardized, so we are using
         # neighbor sum instead of neighbor_average to save cost
-        @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
+        @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, z)
       end
       private
@@ -102,6 +116,15 @@ module SpatialStats
         z[idx] * z_lag_i
       end
+      def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
+        # Since moran can be positive or negative, go by this definition
+        if stat_i_orig.positive?
+          (stat_i_new >= stat_i_orig).count
+        else
+          (stat_i_new <= stat_i_orig).count
+        end
+      end
       def si2
         # @si2 ||= z.sample_variance
         # we standardize so sample_variance is 1
@@ -109,20 +132,27 @@ module SpatialStats
       end
       # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
-      def a_calc(wt)
-        n = wt.shape[0]
+      # TODO: sparse
+      def a_calc
+        n = weights.n
         b2i = b2i_calc
+        wts = weights.sparse.values
+        row_index = weights.sparse.row_index
         a_terms = []
         (0..n - 1).each do |idx|
-          sigma_term = wt[idx, true].to_a.sum { |v| v**2 }
+          row_range = row_index[idx]..(row_index[idx + 1] - 1)
+          wt = wts[row_range]
+          sigma_term = wt.sum { |v| v**2 }
           a_terms << (n - b2i) * sigma_term / (n - 1)
         end
         a_terms
       end
-      def b_calc(wt)
-        n = wt.shape[0]
+      def b_calc
+        n = weights.n
         b2i = b2i_calc
         b_terms = []

data/lib/spatial_stats/local/multivariate_geary.rb CHANGED

@@ -23,7 +23,7 @@ module SpatialStats
       def initialize(scope, fields, weights)
         @scope = scope
         @fields = fields
-        @weights = weights
+        @weights = weights.standardize
       end
       attr_accessor :scope, :fields, :weights
@@ -60,37 +60,47 @@ module SpatialStats
         # of indices, which will return a list of new orders for the fields.
         # They will then be shuffled corresponding to the new indices.
         rng = gen_rng(seed)
-        n = w.shape[0]
-        indices = (0..(n - 1)).to_a
-        shuffles = crand(indices, permutations, rng)
+        rids = crand(permutations, rng)
+        n_1 = weights.n - 1
+        sparse = weights.sparse
+        row_index = sparse.row_index
+        ws = sparse.values
+        wc = weights.wc
         stat_orig = stat
-        rs = [0] * n
-        ws = neighbor_weights
-        idx = 0
-        while idx < n
+        ids = (0..n_1).to_a
+        observations = Array.new(weights.n)
+        (0..n_1).each do |idx|
+          idsi = ids.dup
+          idsi.delete_at(idx)
+          idsi.shuffle!(random: rng)
+          idsi = Numo::Int32.cast(idsi)
+          sample = rids[idsi[rids[true, 0..wc[idx] - 1]]]
+          # account for case where there are no neighbors
+          row_range = row_index[idx]..(row_index[idx + 1] - 1)
+          if row_range.size.zero?
+            observations[idx] = permutations
+            next
+          end
+          wi = Numo::DFloat.cast(ws[row_range])
+          stat_i_new = mc_i(wi, sample, idx)
           stat_i_orig = stat_orig[idx]
-          wi = Numo::DFloat.cast(ws[idx])
-          # for each field, compute the C value at that index.
-          stat_i_new = mc_i(wi, shuffles[idx], idx)
-          rs[idx] = if stat_i_orig.positive?
-                      (stat_i_new >= stat_i_orig).count
-                    else
-                      (stat_i_new <= stat_i_orig).count
-                    end
-          idx += 1
+          observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
+                                                  permutations)
         end
-        rs.map do |ri|
+        observations.map do |ri|
           (ri + 1.0) / (permutations + 1.0)
         end
       end
+      def groups
+        raise NotImplementedError, 'groups not implemented'
+      end
       private
       def mc_i(wi, perms, idx)
@@ -108,6 +118,19 @@ module SpatialStats
         cs.mean(0)
       end
+      def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
+        # Geary cannot be negative, so we have to use this technique from
+        # GeoDa to determine p values. Note I slightly modified it to be inclusive
+        # on both tails not just the lower tail.
+        # https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981        mean = stat_i_new.mean
+        mean = stat_i_new.mean
+        if stat_i_orig <= mean
+          (stat_i_new <= stat_i_orig).count
+        else
+          (stat_i_new >= stat_i_orig).count
+        end
+      end
       def field_data
         @field_data ||= fields.map do |field|
           SpatialStats::Queries::Variables.query_field(@scope, field)