spatial_stats 0.1.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,8 +2,12 @@
2
2
 
3
3
  module SpatialStats
4
4
  module Global
5
+ ##
6
+ # Stat is the abstract base class for global stats.
7
+ # It defines the methods that are common between all classes
8
+ # and will raise a NotImplementedError on those that are specific
9
+ # for each type of statistic.
5
10
  class Stat
6
- # Base class for global stats
7
11
  def initialize(scope, field, weights)
8
12
  @scope = scope
9
13
  @field = field
@@ -11,10 +15,14 @@ module SpatialStats
11
15
  end
12
16
  attr_accessor :scope, :field, :weights
13
17
 
14
- def i
15
- raise NotImplementedError, 'method i not defined'
18
+ def stat
19
+ raise NotImplementedError, 'method stat not defined'
16
20
  end
17
21
 
22
+ ##
23
+ # The expected value of +#stat+
24
+ #
25
+ # @return [Float]
18
26
  def expectation
19
27
  raise NotImplementedError, 'method expectation not implemented'
20
28
  end
@@ -23,8 +31,12 @@ module SpatialStats
23
31
  raise NotImplementedError, 'method variance not implemented'
24
32
  end
25
33
 
34
+ ##
35
+ # Z-score of the statistic.
36
+ #
37
+ # @return [Float] the number of deviations from the mean
26
38
  def z_score
27
- (i - expectation) / Math.sqrt(variance)
39
+ (stat - expectation) / Math.sqrt(variance)
28
40
  end
29
41
 
30
42
  def mc(permutations, seed)
@@ -35,17 +47,17 @@ module SpatialStats
35
47
  end
36
48
  # r is the number of equal to or more extreme samples
37
49
  # one sided
38
- i_orig = i
50
+ stat_orig = stat
39
51
  r = 0
40
52
  shuffles.each do |shuffle|
41
- stat = self.class.new(@scope, @field, @weights)
42
- stat.x = shuffle
53
+ klass = self.class.new(@scope, @field, @weights)
54
+ klass.x = shuffle
43
55
 
44
56
  # https://geodacenter.github.io/glossary.html#ppvalue
45
- if i_orig.positive?
46
- r += 1 if stat.i >= i_orig
57
+ if stat_orig.positive?
58
+ r += 1 if klass.stat >= stat_orig
47
59
  else
48
- r += 1 if stat.i <= i_orig
60
+ r += 1 if klass.stat <= stat_orig
49
61
  end
50
62
  end
51
63
 
@@ -61,17 +73,17 @@ module SpatialStats
61
73
  end
62
74
 
63
75
  # r is the number of equal to or more extreme samples
64
- i_orig = i
76
+ stat_orig = stat
65
77
  r = 0
66
78
  shuffles.each do |shuffle|
67
- stat = self.class.new(@scope, @x_field, @y_field, @weights)
68
- stat.x = x
69
- stat.y = shuffle
79
+ klass = self.class.new(@scope, @x_field, @y_field, @weights)
80
+ klass.x = x
81
+ klass.y = shuffle
70
82
 
71
- if i_orig.positive?
72
- r += 1 if stat.i >= i_orig
83
+ if stat_orig.positive?
84
+ r += 1 if klass.stat >= stat_orig
73
85
  else
74
- r += 1 if stat.i <= i_orig
86
+ r += 1 if klass.stat <= stat_orig
75
87
  end
76
88
  end
77
89
 
@@ -2,7 +2,22 @@
2
2
 
3
3
  require 'spatial_stats/local/stat'
4
4
  require 'spatial_stats/local/bivariate_moran'
5
- require 'spatial_stats/local/g'
6
5
  require 'spatial_stats/local/geary'
6
+ require 'spatial_stats/local/getis_ord'
7
7
  require 'spatial_stats/local/moran'
8
8
  require 'spatial_stats/local/multivariate_geary'
9
+
10
+ module SpatialStats
11
+ ##
12
+ # The Local module provides functionality for local spatial statistics.
13
+ # Local spatial statistics describe each location in the dataset with a value,
14
+ # like how similar or dissimilar each area is to its neighbors.
15
+ #
16
+ # All local classes define a +stat+ method that returns the described
17
+ # statistic and an +mc+ method that runs a permutation test determine a
18
+ # pseudo p-value for the statistic. Some also define +variance+ and
19
+ # +z_score+ methods that can be used to calculate p-values if the
20
+ # distribution is known.
21
+ module Local
22
+ end
23
+ end
@@ -2,7 +2,19 @@
2
2
 
3
3
  module SpatialStats
4
4
  module Local
5
+ ##
6
+ # BivariateMoran computes the local correlation between a variable x and
7
+ # spatially lagged variable y.
5
8
  class BivariateMoran < Stat
9
+ ##
10
+ # A new instance of BivariateMoran
11
+ #
12
+ # @param [ActiveRecord::Relation] scope
13
+ # @param [Symbol, String] x_field to query from scope
14
+ # @param [Symbol, String] y_field to query from scope
15
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
16
+ #
17
+ # @return [Moran]
6
18
  def initialize(scope, x_field, y_field, weights)
7
19
  @scope = scope
8
20
  @x_field = x_field
@@ -10,18 +22,42 @@ module SpatialStats
10
22
  @weights = weights
11
23
  end
12
24
  attr_accessor :scope, :x_field, :y_field, :weights
13
- attr_writer :x, :y
14
25
 
15
- def i
26
+ ##
27
+ # Computes the local indicator of spatial correlation for
28
+ # x against lagged y.
29
+ #
30
+ # @return [Array] of correlations for each observation.
31
+ def stat
16
32
  x.each_with_index.map do |_xi, idx|
17
- i_i(idx)
33
+ stat_i(idx)
18
34
  end
19
35
  end
36
+ alias i stat
20
37
 
21
- def i_i(idx)
38
+ ##
39
+ # Computes Bivariate Moran's I at a single index. Multiplies x at
40
+ # this index by the lagged y value at this index.
41
+ #
42
+ # @param [Integer] idx to perfrom the calculation on
43
+ #
44
+ # @return [Float] correlation at idx
45
+ def stat_i(idx)
22
46
  x[idx] * y_lag[idx]
23
47
  end
24
48
 
49
+ ##
50
+ # Permutation test to determine a pseudo p-values of the +#stat+ method.
51
+ # Shuffles y values, hold x values, recomputes +#stat+ for each variation,
52
+ # then compares to the computed one. The ratio of more extreme values to
53
+ # permutations is returned for each observation.
54
+ #
55
+ # @see https://geodacenter.github.io/glossary.html#perm
56
+ #
57
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
58
+ # @param [Integer] seed used in random number generator for shuffles.
59
+ #
60
+ # @return [Array] of p-values
25
61
  def mc(permutations = 99, seed = nil)
26
62
  mc_bv(permutations, seed)
27
63
  end
@@ -38,6 +74,11 @@ module SpatialStats
38
74
 
39
75
  private
40
76
 
77
+ def mc_i(wi, perms, idx)
78
+ y_lag_i = (wi * perms).sum(1)
79
+ x[idx] * y_lag_i
80
+ end
81
+
41
82
  def y_lag
42
83
  @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
43
84
  end
@@ -2,71 +2,58 @@
2
2
 
3
3
  module SpatialStats
4
4
  module Local
5
+ ##
6
+ # Geary's C statistic computes the spatial lag of the difference between
7
+ # variable zi and it's neighbors squared, in the set z. The local version
8
+ # returns a value for each entry.
5
9
  class Geary < Stat
10
+ ##
11
+ # A new instance of Geary
12
+ #
13
+ # @param [ActiveRecord::Relation] scope
14
+ # @param [Symbol, String] field to query from scope
15
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
16
+ #
17
+ # @return [Geary]
6
18
  def initialize(scope, field, weights)
7
19
  super(scope, field, weights)
8
20
  end
9
- attr_writer :x
10
21
 
11
- def i
22
+ ##
23
+ # Computes Geary's C for every observation in the +scoe+.
24
+ # Geary's C is defined as the square distance between
25
+ # an observation and it's neighbors, factored to their weights.
26
+ #
27
+ # @return [Array] the C value for each observation
28
+ def stat
12
29
  z.each_with_index.map do |_zi, idx|
13
- i_i(idx)
30
+ stat_i(idx)
14
31
  end
15
32
  end
33
+ alias c stat
16
34
 
17
- def i_i(idx)
18
- n = w.shape[0]
19
- sum = 0
20
- (0..n - 1).each do |j|
21
- sum += w[idx, j] * ((z[idx] - z[j])**2)
22
- end
23
- sum
24
- end
25
-
35
+ ##
36
+ # Values of the +field+ queried from the +scope+
37
+ #
38
+ # @return [Array]
26
39
  def x
27
40
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
28
41
  .standardize
29
42
  end
30
43
  alias z x
31
44
 
32
- def mc(permutations = 99, seed = nil)
33
- # For local tests, we need to shuffle the values
34
- # but for each item, hold its value in place and shuffle
35
- # its neighbors. Then we will only test for that item instead
36
- # of the entire set. This will be done for each item.
37
- rng = gen_rng(seed)
38
- shuffles = crand(x, permutations, rng)
39
-
40
- # r is the number of equal to or more extreme samples
41
- i_orig = i
42
- rs = [0] * i_orig.size
43
-
44
- shuffles.each_with_index do |perms, idx|
45
- ii_orig = i_orig[idx]
46
- perms.each do |perm|
47
- stat = self.class.new(scope, field, weights)
48
- stat.x = perm
49
- ii_new = stat.i_i(idx)
50
-
51
- # https://geodacenter.github.io/glossary.html#ppvalue
52
- # NOTE: this is inconsistent with the output from GeoDa
53
- # for local permutation tests, they seem to use greater than
54
- # not greater than or equal to. I'm going to go by the definition
55
- # in the glossary for now.
56
- if ii_orig.positive?
57
- rs[idx] += 1 if ii_new >= ii_orig
58
- else
59
- rs[idx] += 1 if ii_new <= ii_orig
60
- end
61
- end
62
- end
45
+ private
63
46
 
64
- rs.map do |ri|
65
- (ri + 1.0) / (permutations + 1.0)
66
- end
47
+ def stat_i(idx)
48
+ zs = Numo::DFloat.cast(z)
49
+ zi = (z[idx] - zs)**2
50
+ (w[idx, true] * zi).sum
67
51
  end
68
52
 
69
- private
53
+ def mc_i(wi, perms, idx)
54
+ zi = (z[idx] - perms)**2
55
+ (wi * zi).sum(1)
56
+ end
70
57
 
71
58
  def w
72
59
  @w ||= weights.full.row_standardized
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ ##
6
+ # GetisOrd's G and G* statistics compute the spatial autocorrelation of a
7
+ # variable, x. G computes the ratio of spatially lagged x to the sum of all
8
+ # other x's except xi for every entry. G* does the same calculation but
9
+ # includes xi in the spatial lag and denominator.
10
+ class GetisOrd < Stat
11
+ ##
12
+ # A new instance of GetisOrd
13
+ #
14
+ # @param [ActiveRecord::Relation] scope
15
+ # @param [Symbol, String] field to query from scope
16
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
17
+ #
18
+ # @return [GetisOrd]
19
+ def initialize(scope, field, weights, star = nil)
20
+ super(scope, field, weights)
21
+ @star = star
22
+ end
23
+ attr_accessor :star
24
+
25
+ ##
26
+ # Computes the G or G* statistic for every observation in x.
27
+ #
28
+ # @return [Array] of autocorrelations for each observation.
29
+ def stat
30
+ x.each_with_index.map do |_x_val, idx|
31
+ stat_i(idx)
32
+ end
33
+ end
34
+ alias g stat
35
+
36
+ ##
37
+ # Values of the +field+ queried from the +scope+
38
+ #
39
+ # @return [Array]
40
+ def x
41
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
42
+ end
43
+ alias z x
44
+
45
+ ##
46
+ # True if G* is being used, false if G is being used.
47
+ # If no value is passed in the constructor, it will be determined
48
+ # based off of the trace of the weights.
49
+ #
50
+ # @return [Boolean] of star
51
+ def star?
52
+ if @star.nil?
53
+ @star = weights.full.trace.positive?
54
+ else
55
+ @star
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def stat_i(idx)
62
+ x_lag[idx] / denominators[idx]
63
+ end
64
+
65
+ def mc_i(wi, perms, idx)
66
+ x_lag_i = (wi * perms).sum(1)
67
+ x_lag_i / denominators[idx]
68
+ end
69
+
70
+ def w
71
+ @w ||= begin
72
+ if star?
73
+ weights.full.windowed.row_standardized
74
+ else
75
+ weights.standardized
76
+ end
77
+ end
78
+ end
79
+
80
+ def z_lag
81
+ # window if star is true
82
+ @z_lag ||= begin
83
+ if star?
84
+ SpatialStats::Utils::Lag.window_sum(w, x)
85
+ else
86
+ SpatialStats::Utils::Lag.neighbor_sum(w, x)
87
+ end
88
+ end
89
+ end
90
+ alias x_lag z_lag
91
+
92
+ def denominators
93
+ @denominators ||= begin
94
+ n = w.shape[0]
95
+ if star?
96
+ [x.sum] * n
97
+ else
98
+ # add everything but i
99
+ (0..n - 1).each.map do |idx|
100
+ terms = x.dup
101
+ terms.delete_at(idx)
102
+ terms.sum
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -1,36 +1,44 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
4
- # For now, instead of doing neighbor's variance (Si**2), I'm going to use
5
- # the total sample variance. This is how GeoDa does it, but is different
6
- # than arcgis. This shouldn't affect the expectation and variance of I.
7
3
  module SpatialStats
8
4
  module Local
5
+ ##
6
+ # Moran's I statistic computes the spatial autocorrelation of variable x.
7
+ # It does this by computing a spatially lagged version of itself and
8
+ # comparing that with each observation based on the weights matrix.
9
+ # The local version returns the spatial autocorrelation for each
10
+ # observation in the dataset.
9
11
  class Moran < Stat
12
+ ##
13
+ # A new instance of Moran
14
+ #
15
+ # @param [ActiveRecord::Relation] scope
16
+ # @param [Symbol, String] field to query from scope
17
+ # @param [WeightsMatrix] weights to define relationship between observations in scope
18
+ #
19
+ # @return [Moran]
10
20
  def initialize(scope, field, weights)
11
21
  super(scope, field, weights)
12
- @scope = scope
13
- @field = field
14
- @weights = weights
15
22
  end
16
- attr_writer :x, :z_lag
17
23
 
18
- def i
24
+ ##
25
+ # Computes the local indicator of spatial autocorrelation (lisa) for
26
+ # x against lagged x.
27
+ #
28
+ # @return [Array] of autocorrelations for each observation.
29
+ def stat
19
30
  z.each_with_index.map do |_z_val, idx|
20
- i_i(idx)
31
+ stat_i(idx)
21
32
  end
22
33
  end
23
-
24
- def i_i(idx)
25
- # method to compute i at a single index.
26
- # this is important for permutation testing
27
- # because for each test we only want the result from
28
- # 1 index not the entire set, so this will save lots of
29
- # computations.
30
- sum_term = z_lag[idx]
31
- (z[idx] / si2) * sum_term
32
- end
33
-
34
+ alias i stat
35
+
36
+ ##
37
+ # Expected value of I for each observation. Since the weights matrix
38
+ # is standardized during the calculation, the expectation is the same for
39
+ # each observation.
40
+ #
41
+ # @return [Float]
34
42
  def expectation
35
43
  # since we are using row standardized weights, the expectation
36
44
  # will just be -1/(n-1) for all items. Otherwise, it would be
@@ -39,6 +47,12 @@ module SpatialStats
39
47
  -1.0 / (@weights.n - 1)
40
48
  end
41
49
 
50
+ ##
51
+ # Variance of I for each observation.
52
+ #
53
+ # @see https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
54
+ #
55
+ # @return [Array] of variances for each observation
42
56
  def variance
43
57
  # formula is A - B - (E[I])**2
44
58
  wt = w.row_standardized
@@ -54,14 +68,21 @@ module SpatialStats
54
68
  vars
55
69
  end
56
70
 
71
+ ##
72
+ # Values of the +field+ queried from the +scope+
73
+ #
74
+ # @return [Array]
57
75
  def x
58
76
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
59
77
  .standardize
60
78
  end
61
79
  alias z x
62
80
 
81
+ ##
82
+ # Spatially lagged x variable at each observation.
83
+ #
84
+ # @return [Array]
63
85
  def z_lag
64
- # can't memoize yet because of mc testing
65
86
  # w is already row_standardized, so we are using
66
87
  # neighbor sum instead of neighbor_average to save cost
67
88
  @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
@@ -69,6 +90,18 @@ module SpatialStats
69
90
 
70
91
  private
71
92
 
93
+ def stat_i(idx)
94
+ sum_term = z_lag[idx]
95
+ (z[idx] / si2) * sum_term
96
+ end
97
+
98
+ def mc_i(wi, perms, idx)
99
+ # compute i for a single index given DFloat of neighbor weights
100
+ # and DFloat of neighbor z perms
101
+ z_lag_i = (wi * perms).sum(1)
102
+ z[idx] * z_lag_i
103
+ end
104
+
72
105
  def si2
73
106
  # @si2 ||= z.sample_variance
74
107
  # we standardize so sample_variance is 1