spatial_stats 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,12 @@
2
2
 
3
3
  module SpatialStats
4
4
  module Global
5
+ ##
6
+ # Stat is the abstract base class for global stats.
7
+ # It defines the methods that are common between all classes
8
+ # and will raise a NotImplementedError on those that are specific
9
+ # for each type of statistic.
5
10
  class Stat
6
- # Base class for global stats
7
11
  def initialize(scope, field, weights)
8
12
  @scope = scope
9
13
  @field = field
@@ -11,10 +15,14 @@ module SpatialStats
11
15
  end
12
16
  attr_accessor :scope, :field, :weights
13
17
 
14
- def i
15
- raise NotImplementedError, 'method i not defined'
18
+ def stat
19
+ raise NotImplementedError, 'method stat not defined'
16
20
  end
17
21
 
22
+ ##
23
+ # The expected value of +#stat+
24
+ #
25
+ # @return [Float]
18
26
  def expectation
19
27
  raise NotImplementedError, 'method expectation not implemented'
20
28
  end
@@ -23,8 +31,12 @@ module SpatialStats
23
31
  raise NotImplementedError, 'method variance not implemented'
24
32
  end
25
33
 
34
+ ##
35
+ # Z-score of the statistic.
36
+ #
37
+ # @return [Float] the number of deviations from the mean
26
38
  def z_score
27
- (i - expectation) / Math.sqrt(variance)
39
+ (stat - expectation) / Math.sqrt(variance)
28
40
  end
29
41
 
30
42
  def mc(permutations, seed)
@@ -35,17 +47,17 @@ module SpatialStats
35
47
  end
36
48
  # r is the number of equal to or more extreme samples
37
49
  # one sided
38
- i_orig = i
50
+ stat_orig = stat
39
51
  r = 0
40
52
  shuffles.each do |shuffle|
41
- stat = self.class.new(@scope, @field, @weights)
42
- stat.x = shuffle
53
+ klass = self.class.new(@scope, @field, @weights)
54
+ klass.x = shuffle
43
55
 
44
56
  # https://geodacenter.github.io/glossary.html#ppvalue
45
- if i_orig.positive?
46
- r += 1 if stat.i >= i_orig
57
+ if stat_orig.positive?
58
+ r += 1 if klass.stat >= stat_orig
47
59
  else
48
- r += 1 if stat.i <= i_orig
60
+ r += 1 if klass.stat <= stat_orig
49
61
  end
50
62
  end
51
63
 
@@ -61,17 +73,17 @@ module SpatialStats
61
73
  end
62
74
 
63
75
  # r is the number of equal to or more extreme samples
64
- i_orig = i
76
+ stat_orig = stat
65
77
  r = 0
66
78
  shuffles.each do |shuffle|
67
- stat = self.class.new(@scope, @x_field, @y_field, @weights)
68
- stat.x = x
69
- stat.y = shuffle
79
+ klass = self.class.new(@scope, @x_field, @y_field, @weights)
80
+ klass.x = x
81
+ klass.y = shuffle
70
82
 
71
- if i_orig.positive?
72
- r += 1 if stat.i >= i_orig
83
+ if stat_orig.positive?
84
+ r += 1 if klass.stat >= stat_orig
73
85
  else
74
- r += 1 if stat.i <= i_orig
86
+ r += 1 if klass.stat <= stat_orig
75
87
  end
76
88
  end
77
89
 
@@ -2,7 +2,22 @@
2
2
 
3
3
  require 'spatial_stats/local/stat'
4
4
  require 'spatial_stats/local/bivariate_moran'
5
- require 'spatial_stats/local/g'
6
5
  require 'spatial_stats/local/geary'
6
+ require 'spatial_stats/local/getis_ord'
7
7
  require 'spatial_stats/local/moran'
8
8
  require 'spatial_stats/local/multivariate_geary'
9
+
10
+ module SpatialStats
11
+ ##
12
+ # The Local module provides functionality for local spatial statistics.
13
+ # Local spatial statistics describe each location in the dataset with a value,
14
+ # like how similar or dissimilar each area is to its neighbors.
15
+ #
16
+ # All local classes define a +stat+ method that returns the described
17
+ # statistic and an +mc+ method that runs a permutation test determine a
18
+ # pseudo p-value for the statistic. Some also define +variance+ and
19
+ # +z_score+ methods that can be used to calculate p-values if the
20
+ # distribution is known.
21
+ module Local
22
+ end
23
+ end
@@ -2,7 +2,19 @@
2
2
 
3
3
  module SpatialStats
4
4
  module Local
5
+ ##
6
+ # BivariateMoran computes the local correlation between a variable x and
7
+ # spatially lagged variable y.
5
8
  class BivariateMoran < Stat
9
+ ##
10
+ # A new instance of BivariateMoran
11
+ #
12
+ # @param [ActiveRecord::Relation] scope
13
+ # @param [Symbol, String] x_field to query from scope
14
+ # @param [Symbol, String] y_field to query from scope
15
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
16
+ #
17
+ # @return [Moran]
6
18
  def initialize(scope, x_field, y_field, weights)
7
19
  @scope = scope
8
20
  @x_field = x_field
@@ -10,18 +22,42 @@ module SpatialStats
10
22
  @weights = weights
11
23
  end
12
24
  attr_accessor :scope, :x_field, :y_field, :weights
13
- attr_writer :x, :y
14
25
 
15
- def i
26
+ ##
27
+ # Computes the local indicator of spatial correlation for
28
+ # x against lagged y.
29
+ #
30
+ # @return [Array] of correlations for each observation.
31
+ def stat
16
32
  x.each_with_index.map do |_xi, idx|
17
- i_i(idx)
33
+ stat_i(idx)
18
34
  end
19
35
  end
36
+ alias i stat
20
37
 
21
- def i_i(idx)
38
+ ##
39
+ # Computes Bivariate Moran's I at a single index. Multiplies x at
40
+ # this index by the lagged y value at this index.
41
+ #
42
+ # @param [Integer] idx to perfrom the calculation on
43
+ #
44
+ # @return [Float] correlation at idx
45
+ def stat_i(idx)
22
46
  x[idx] * y_lag[idx]
23
47
  end
24
48
 
49
+ ##
50
+ # Permutation test to determine a pseudo p-values of the +#stat+ method.
51
+ # Shuffles y values, hold x values, recomputes +#stat+ for each variation,
52
+ # then compares to the computed one. The ratio of more extreme values to
53
+ # permutations is returned for each observation.
54
+ #
55
+ # @see https://geodacenter.github.io/glossary.html#perm
56
+ #
57
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
58
+ # @param [Integer] seed used in random number generator for shuffles.
59
+ #
60
+ # @return [Array] of p-values
25
61
  def mc(permutations = 99, seed = nil)
26
62
  mc_bv(permutations, seed)
27
63
  end
@@ -38,6 +74,11 @@ module SpatialStats
38
74
 
39
75
  private
40
76
 
77
+ def mc_i(wi, perms, idx)
78
+ y_lag_i = (wi * perms).sum(1)
79
+ x[idx] * y_lag_i
80
+ end
81
+
41
82
  def y_lag
42
83
  @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
43
84
  end
@@ -2,71 +2,58 @@
2
2
 
3
3
  module SpatialStats
4
4
  module Local
5
+ ##
6
+ # Geary's C statistic computes the spatial lag of the difference between
7
+ # variable zi and it's neighbors squared, in the set z. The local version
8
+ # returns a value for each entry.
5
9
  class Geary < Stat
10
+ ##
11
+ # A new instance of Geary
12
+ #
13
+ # @param [ActiveRecord::Relation] scope
14
+ # @param [Symbol, String] field to query from scope
15
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
16
+ #
17
+ # @return [Geary]
6
18
  def initialize(scope, field, weights)
7
19
  super(scope, field, weights)
8
20
  end
9
- attr_writer :x
10
21
 
11
- def i
22
+ ##
23
+ # Computes Geary's C for every observation in the +scoe+.
24
+ # Geary's C is defined as the square distance between
25
+ # an observation and it's neighbors, factored to their weights.
26
+ #
27
+ # @return [Array] the C value for each observation
28
+ def stat
12
29
  z.each_with_index.map do |_zi, idx|
13
- i_i(idx)
30
+ stat_i(idx)
14
31
  end
15
32
  end
33
+ alias c stat
16
34
 
17
- def i_i(idx)
18
- n = w.shape[0]
19
- sum = 0
20
- (0..n - 1).each do |j|
21
- sum += w[idx, j] * ((z[idx] - z[j])**2)
22
- end
23
- sum
24
- end
25
-
35
+ ##
36
+ # Values of the +field+ queried from the +scope+
37
+ #
38
+ # @return [Array]
26
39
  def x
27
40
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
28
41
  .standardize
29
42
  end
30
43
  alias z x
31
44
 
32
- def mc(permutations = 99, seed = nil)
33
- # For local tests, we need to shuffle the values
34
- # but for each item, hold its value in place and shuffle
35
- # its neighbors. Then we will only test for that item instead
36
- # of the entire set. This will be done for each item.
37
- rng = gen_rng(seed)
38
- shuffles = crand(x, permutations, rng)
39
-
40
- # r is the number of equal to or more extreme samples
41
- i_orig = i
42
- rs = [0] * i_orig.size
43
-
44
- shuffles.each_with_index do |perms, idx|
45
- ii_orig = i_orig[idx]
46
- perms.each do |perm|
47
- stat = self.class.new(scope, field, weights)
48
- stat.x = perm
49
- ii_new = stat.i_i(idx)
50
-
51
- # https://geodacenter.github.io/glossary.html#ppvalue
52
- # NOTE: this is inconsistent with the output from GeoDa
53
- # for local permutation tests, they seem to use greater than
54
- # not greater than or equal to. I'm going to go by the definition
55
- # in the glossary for now.
56
- if ii_orig.positive?
57
- rs[idx] += 1 if ii_new >= ii_orig
58
- else
59
- rs[idx] += 1 if ii_new <= ii_orig
60
- end
61
- end
62
- end
45
+ private
63
46
 
64
- rs.map do |ri|
65
- (ri + 1.0) / (permutations + 1.0)
66
- end
47
+ def stat_i(idx)
48
+ zs = Numo::DFloat.cast(z)
49
+ zi = (z[idx] - zs)**2
50
+ (w[idx, true] * zi).sum
67
51
  end
68
52
 
69
- private
53
+ def mc_i(wi, perms, idx)
54
+ zi = (z[idx] - perms)**2
55
+ (wi * zi).sum(1)
56
+ end
70
57
 
71
58
  def w
72
59
  @w ||= weights.full.row_standardized
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ ##
6
+ # GetisOrd's G and G* statistics compute the spatial autocorrelation of a
7
+ # variable, x. G computes the ratio of spatially lagged x to the sum of all
8
+ # other x's except xi for every entry. G* does the same calculation but
9
+ # includes xi in the spatial lag and denominator.
10
+ class GetisOrd < Stat
11
+ ##
12
+ # A new instance of GetisOrd
13
+ #
14
+ # @param [ActiveRecord::Relation] scope
15
+ # @param [Symbol, String] field to query from scope
16
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
17
+ #
18
+ # @return [GetisOrd]
19
+ def initialize(scope, field, weights, star = nil)
20
+ super(scope, field, weights)
21
+ @star = star
22
+ end
23
+ attr_accessor :star
24
+
25
+ ##
26
+ # Computes the G or G* statistic for every observation in x.
27
+ #
28
+ # @return [Array] of autocorrelations for each observation.
29
+ def stat
30
+ x.each_with_index.map do |_x_val, idx|
31
+ stat_i(idx)
32
+ end
33
+ end
34
+ alias g stat
35
+
36
+ ##
37
+ # Values of the +field+ queried from the +scope+
38
+ #
39
+ # @return [Array]
40
+ def x
41
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
42
+ end
43
+ alias z x
44
+
45
+ ##
46
+ # True if G* is being used, false if G is being used.
47
+ # If no value is passed in the constructor, it will be determined
48
+ # based off of the trace of the weights.
49
+ #
50
+ # @return [Boolean] of star
51
+ def star?
52
+ if @star.nil?
53
+ @star = weights.full.trace.positive?
54
+ else
55
+ @star
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def stat_i(idx)
62
+ x_lag[idx] / denominators[idx]
63
+ end
64
+
65
+ def mc_i(wi, perms, idx)
66
+ x_lag_i = (wi * perms).sum(1)
67
+ x_lag_i / denominators[idx]
68
+ end
69
+
70
+ def w
71
+ @w ||= begin
72
+ if star?
73
+ weights.full.windowed.row_standardized
74
+ else
75
+ weights.standardized
76
+ end
77
+ end
78
+ end
79
+
80
+ def z_lag
81
+ # window if star is true
82
+ @z_lag ||= begin
83
+ if star?
84
+ SpatialStats::Utils::Lag.window_sum(w, x)
85
+ else
86
+ SpatialStats::Utils::Lag.neighbor_sum(w, x)
87
+ end
88
+ end
89
+ end
90
+ alias x_lag z_lag
91
+
92
+ def denominators
93
+ @denominators ||= begin
94
+ n = w.shape[0]
95
+ if star?
96
+ [x.sum] * n
97
+ else
98
+ # add everything but i
99
+ (0..n - 1).each.map do |idx|
100
+ terms = x.dup
101
+ terms.delete_at(idx)
102
+ terms.sum
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -1,36 +1,44 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
4
- # For now, instead of doing neighbor's variance (Si**2), I'm going to use
5
- # the total sample variance. This is how GeoDa does it, but is different
6
- # than arcgis. This shouldn't affect the expectation and variance of I.
7
3
  module SpatialStats
8
4
  module Local
5
+ ##
6
+ # Moran's I statistic computes the spatial autocorrelation of variable x.
7
+ # It does this by computing a spatially lagged version of itself and
8
+ # comparing that with each observation based on the weights matrix.
9
+ # The local version returns the spatial autocorrelation for each
10
+ # observation in the dataset.
9
11
  class Moran < Stat
12
+ ##
13
+ # A new instance of Moran
14
+ #
15
+ # @param [ActiveRecord::Relation] scope
16
+ # @param [Symbol, String] field to query from scope
17
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
18
+ #
19
+ # @return [Moran]
10
20
  def initialize(scope, field, weights)
11
21
  super(scope, field, weights)
12
- @scope = scope
13
- @field = field
14
- @weights = weights
15
22
  end
16
- attr_writer :x, :z_lag
17
23
 
18
- def i
24
+ ##
25
+ # Computes the local indicator of spatial autocorrelation (lisa) for
26
+ # x against lagged x.
27
+ #
28
+ # @return [Array] of autocorrelations for each observation.
29
+ def stat
19
30
  z.each_with_index.map do |_z_val, idx|
20
- i_i(idx)
31
+ stat_i(idx)
21
32
  end
22
33
  end
23
-
24
- def i_i(idx)
25
- # method to compute i at a single index.
26
- # this is important for permutation testing
27
- # because for each test we only want the result from
28
- # 1 index not the entire set, so this will save lots of
29
- # computations.
30
- sum_term = z_lag[idx]
31
- (z[idx] / si2) * sum_term
32
- end
33
-
34
+ alias i stat
35
+
36
+ ##
37
+ # Expected value of I for each observation. Since the weights matrix
38
+ # is standardized during the calculation, the expectation is the same for
39
+ # each observation.
40
+ #
41
+ # @return [Float]
34
42
  def expectation
35
43
  # since we are using row standardized weights, the expectation
36
44
  # will just be -1/(n-1) for all items. Otherwise, it would be
@@ -39,6 +47,12 @@ module SpatialStats
39
47
  -1.0 / (@weights.n - 1)
40
48
  end
41
49
 
50
+ ##
51
+ # Variance of I for each observation.
52
+ #
53
+ # @see https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
54
+ #
55
+ # @return [Array] of variances for each observation
42
56
  def variance
43
57
  # formula is A - B - (E[I])**2
44
58
  wt = w.row_standardized
@@ -54,14 +68,21 @@ module SpatialStats
54
68
  vars
55
69
  end
56
70
 
71
+ ##
72
+ # Values of the +field+ queried from the +scope+
73
+ #
74
+ # @return [Array]
57
75
  def x
58
76
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
59
77
  .standardize
60
78
  end
61
79
  alias z x
62
80
 
81
+ ##
82
+ # Spatially lagged x variable at each observation.
83
+ #
84
+ # @return [Array]
63
85
  def z_lag
64
- # can't memoize yet because of mc testing
65
86
  # w is already row_standardized, so we are using
66
87
  # neighbor sum instead of neighbor_average to save cost
67
88
  @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
@@ -69,6 +90,18 @@ module SpatialStats
69
90
 
70
91
  private
71
92
 
93
+ def stat_i(idx)
94
+ sum_term = z_lag[idx]
95
+ (z[idx] / si2) * sum_term
96
+ end
97
+
98
+ def mc_i(wi, perms, idx)
99
+ # compute i for a single index given DFloat of neighbor weights
100
+ # and DFloat of neighbor z perms
101
+ z_lag_i = (wi * perms).sum(1)
102
+ z[idx] * z_lag_i
103
+ end
104
+
72
105
  def si2
73
106
  # @si2 ||= z.sample_variance
74
107
  # we standardize so sample_variance is 1