spatial_stats 0.2.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,7 +19,7 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
23
  end
24
24
  attr_writer :x, :y
25
25
 
@@ -29,8 +29,7 @@ module SpatialStats
29
29
  #
30
30
  # @return [Float]
31
31
  def stat
32
- w = @weights.standardized
33
- y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
32
+ y_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, y)
34
33
  numerator = 0
35
34
  x.each_with_index do |xi, idx|
36
35
  numerator += xi * y_lag[idx]
@@ -55,19 +54,20 @@ module SpatialStats
55
54
  #
56
55
  # @return [Float]
57
56
  def variance
58
- n = @weights.n
59
- wij = @weights.full
60
- w = wij.sum
57
+ n = weights.n
58
+ w_sum = n.to_f
61
59
  e = expectation
62
60
 
63
- s1 = s1_calc(n, wij)
64
- s2 = s2_calc(n, wij)
61
+ wij = weights.sparse.coordinates
62
+
63
+ s1 = s1_calc(wij)
64
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
65
65
  s3 = s3_calc(n, x)
66
66
 
67
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
68
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
67
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
68
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
69
69
 
70
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
70
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
71
71
  var_right = e**2
72
72
  var_left - var_right
73
73
  end
@@ -88,6 +88,19 @@ module SpatialStats
88
88
  mc_bv(permutations, seed)
89
89
  end
90
90
 
91
+ ##
92
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
93
+ # in a hash.
94
+ #
95
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
96
+ # @param [Integer] seed used in random number generator for shuffles.
97
+ #
98
+ # @return [Hash]
99
+ def summary(permutations = 99, seed = nil)
100
+ p_val = mc(permutations, seed)
101
+ { stat: stat, p: p_val }
102
+ end
103
+
91
104
  ##
92
105
  # Standardized variables queried from +x_field+.
93
106
  #
@@ -108,32 +121,41 @@ module SpatialStats
108
121
 
109
122
  private
110
123
 
124
+ def stat_mc(perms)
125
+ x_arr = Numo::DFloat.cast(x)
126
+ lag = w.dot(perms.transpose)
127
+ x_arr.dot(lag) / (x_arr**2).sum
128
+ end
129
+
111
130
  def s3_calc(n, zs)
112
131
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
113
132
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
114
133
  numerator / denominator
115
134
  end
116
135
 
117
- def s2_calc(n, wij)
136
+ def s2_calc(n, wij, row_index)
118
137
  s2 = 0
119
- (0..n - 1).each do |i|
138
+ wij_arr = wij.to_a # for row slicing
139
+ (0..n - 1).each do |idx|
140
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
120
141
  left_term = 0
121
142
  right_term = 0
122
- (0..n - 1).each do |j|
123
- left_term += wij[i, j]
124
- right_term += wij[j, i]
143
+
144
+ row.each do |coords, val|
145
+ left_term += val
146
+ right_term += wij[coords.reverse] || 0
125
147
  end
126
148
  s2 += (left_term + right_term)**2
127
149
  end
128
150
  s2
129
151
  end
130
152
 
131
- def s1_calc(n, wij)
153
+ def s1_calc(wij)
132
154
  s1 = 0
133
- (0..n - 1).each do |i|
134
- (0..n - 1).each do |j|
135
- s1 += (wij[i, j] + wij[j, i])**2
136
- end
155
+ wij.each do |coords, val|
156
+ # (wij + wji)**2
157
+ wji = wij[coords.reverse] || 0
158
+ s1 += (val + wji)**2
137
159
  end
138
160
  s1 / 2
139
161
  end
@@ -29,8 +29,7 @@ module SpatialStats
29
29
  # compute's Moran's I. numerator is sum of zi * spatial lag of zi
30
30
  # denominator is sum of zi**2.
31
31
  # have to use row-standardized weights
32
- w = @weights.standardized
33
- z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
32
+ z_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, z)
34
33
  numerator = 0
35
34
  z.each_with_index do |zi, j|
36
35
  row_sum = zi * z_lag[j]
@@ -49,7 +48,7 @@ module SpatialStats
49
48
  # @return [Float]
50
49
  def expectation
51
50
  # -1/(n-1)
52
- -1.0 / (@weights.n - 1)
51
+ -1.0 / (weights.n - 1)
53
52
  end
54
53
 
55
54
  ##
@@ -58,19 +57,20 @@ module SpatialStats
58
57
  #
59
58
  # @return [Float]
60
59
  def variance
61
- n = @weights.n
62
- wij = @weights.full
63
- w = wij.sum
60
+ n = weights.n
61
+ w_sum = n # standardized weights
64
62
  e = expectation
65
63
 
66
- s1 = s1_calc(n, wij)
67
- s2 = s2_calc(n, wij)
64
+ wij = weights.sparse.coordinates
65
+
66
+ s1 = s1_calc(wij)
67
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
68
68
  s3 = s3_calc(n, z)
69
69
 
70
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
71
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
70
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
71
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
72
72
 
73
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
73
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
74
74
  var_right = e**2
75
75
  var_left - var_right
76
76
  end
@@ -92,58 +92,66 @@ module SpatialStats
92
92
  end
93
93
 
94
94
  ##
95
- # Values of the +field+ queried from the +scope+
95
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
96
+ # in a hash.
96
97
  #
97
- # @return [Array]
98
- def x
99
- @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
100
- end
101
-
102
- # TODO: remove these last 2 methods and just standardize x.
103
- ##
104
- # Mean of x
98
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
99
+ # @param [Integer] seed used in random number generator for shuffles.
105
100
  #
106
- # @return [Float]
107
- def zbar
108
- x.sum / x.size
101
+ # @return [Hash]
102
+ def summary(permutations = 99, seed = nil)
103
+ p_val = mc(permutations, seed)
104
+ { stat: stat, p: p_val }
109
105
  end
110
106
 
111
107
  ##
112
- # Array of xi - zbar for i: [0:n-1]
108
+ # Values of the +field+ queried from the +scope+
113
109
  #
114
110
  # @return [Array]
115
- def z
116
- x.map { |val| val - zbar }
111
+ def x
112
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
113
+ .standardize
117
114
  end
115
+ alias z x
118
116
 
119
117
  private
120
118
 
119
+ def stat_mc(perms)
120
+ z_arr = Numo::DFloat.cast(z)
121
+ lag = w.dot(perms.transpose)
122
+ z_arr.dot(lag) / (z_arr**2).sum
123
+ end
124
+
121
125
  def s3_calc(n, zs)
122
126
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
123
127
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
124
128
  numerator / denominator
125
129
  end
126
130
 
127
- def s2_calc(n, wij)
131
+ # use row_index to take slices of wij
132
+ def s2_calc(n, wij, row_index)
128
133
  s2 = 0
134
+ wij_arr = wij.to_a # for row slicing
129
135
  (0..n - 1).each do |idx|
136
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
130
137
  left_term = 0
131
138
  right_term = 0
132
- (0..n - 1).each do |j|
133
- left_term += wij[idx, j]
134
- right_term += wij[j, idx]
139
+
140
+ row.each do |coords, val|
141
+ left_term += val
142
+ right_term += wij[coords.reverse] || 0
135
143
  end
136
144
  s2 += (left_term + right_term)**2
137
145
  end
138
146
  s2
139
147
  end
140
148
 
141
- def s1_calc(n, wij)
149
+ def s1_calc(wij)
142
150
  s1 = 0
143
- (0..n - 1).each do |idx|
144
- (0..n - 1).each do |j|
145
- s1 += (wij[idx, j] + wij[j, idx])**2
146
- end
151
+ wij.each do |coords, val|
152
+ # (wij + wji)**2
153
+ wji = wij[coords.reverse] || 0
154
+ s1 += (val + wji)**2
147
155
  end
148
156
  s1 / 2
149
157
  end
@@ -11,7 +11,7 @@ module SpatialStats
11
11
  def initialize(scope, field, weights)
12
12
  @scope = scope
13
13
  @field = field
14
- @weights = weights
14
+ @weights = weights.standardize
15
15
  end
16
16
  attr_accessor :scope, :field, :weights
17
17
 
@@ -45,21 +45,21 @@ module SpatialStats
45
45
  permutations.times do
46
46
  shuffles << x.shuffle(random: rng)
47
47
  end
48
+ shuffles = Numo::DFloat.cast(shuffles)
49
+
48
50
  # r is the number of equal to or more extreme samples
49
51
  # one sided
50
- stat_orig = stat
51
- r = 0
52
- shuffles.each do |shuffle|
53
- klass = self.class.new(@scope, @field, @weights)
54
- klass.x = shuffle
55
-
56
- # https://geodacenter.github.io/glossary.html#ppvalue
57
- if stat_orig.positive?
58
- r += 1 if klass.stat >= stat_orig
59
- else
60
- r += 1 if klass.stat <= stat_orig
61
- end
62
- end
52
+ stat_orig = stat.round(5)
53
+ # r = 0
54
+
55
+ # compute new stat values
56
+ stat_new = stat_mc(shuffles)
57
+
58
+ r = if stat_orig.positive?
59
+ (stat_new >= stat_orig).count
60
+ else
61
+ (stat_new <= stat_orig).count
62
+ end
63
63
 
64
64
  (r + 1.0) / (permutations + 1.0)
65
65
  end
@@ -71,27 +71,31 @@ module SpatialStats
71
71
  permutations.times do
72
72
  shuffles << y.shuffle(random: rng)
73
73
  end
74
+ shuffles = Numo::DFloat.cast(shuffles)
74
75
 
75
76
  # r is the number of equal to or more extreme samples
76
- stat_orig = stat
77
- r = 0
78
- shuffles.each do |shuffle|
79
- klass = self.class.new(@scope, @x_field, @y_field, @weights)
80
- klass.x = x
81
- klass.y = shuffle
82
-
83
- if stat_orig.positive?
84
- r += 1 if klass.stat >= stat_orig
85
- else
86
- r += 1 if klass.stat <= stat_orig
87
- end
88
- end
77
+ stat_orig = stat.round(5)
78
+ stat_new = stat_mc(shuffles)
79
+
80
+ r = if stat_orig.positive?
81
+ (stat_new >= stat_orig).count
82
+ else
83
+ (stat_new <= stat_orig).count
84
+ end
89
85
 
90
86
  (r + 1.0) / (permutations + 1.0)
91
87
  end
92
88
 
93
89
  private
94
90
 
91
+ def stat_mc(_shuffles)
92
+ raise NotImplementedError, 'private method stat_mc not defined'
93
+ end
94
+
95
+ def w
96
+ @w ||= weights.dense
97
+ end
98
+
95
99
  def gen_rng(seed)
96
100
  if seed
97
101
  Random.new(seed)
@@ -19,9 +19,10 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
23
  end
24
24
  attr_accessor :scope, :x_field, :y_field, :weights
25
+ attr_writer :x, :y
25
26
 
26
27
  ##
27
28
  # Computes the local indicator of spatial correlation for
@@ -62,6 +63,61 @@ module SpatialStats
62
63
  mc_bv(permutations, seed)
63
64
  end
64
65
 
66
+ ##
67
+ # Determines what quadrant an observation is in. Based on its value
68
+ # compared to its neighbors. This does not work for all stats, since
69
+ # it requires that values be negative.
70
+ #
71
+ # In a standardized array of z, high values are values greater than 0
72
+ # and it's neighbors are determined by the spatial lag and if that is
73
+ # positive then it's neighbors would be high, low otherwise.
74
+ #
75
+ # Quadrants are:
76
+ # [HH] a high value surrounded by other high values
77
+ # [LH] a low value surrounded by high values
78
+ # [LL] a low value surrounded by low values
79
+ # [HL] a high value surrounded by low values
80
+ #
81
+ # @return [Array] of labels
82
+ def quads
83
+ # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
84
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, y)
85
+ zp = x.map(&:positive?)
86
+ lp = z_lag.map(&:positive?)
87
+
88
+ # hh = zp & lp
89
+ # lh = zp ^ true & lp
90
+ # ll = zp ^ true & lp ^ true
91
+ # hl = zp next to lp ^ true
92
+ hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
93
+ lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
94
+ ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
95
+ hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
96
+
97
+ # now zip lists and map them to proper terms
98
+ quad_terms = %w[HH LH LL HL]
99
+ hh.zip(lh, ll, hl).map do |feature|
100
+ quad_terms[feature.index(true)]
101
+ end
102
+ end
103
+ alias groups quads
104
+
105
+ ##
106
+ # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
107
+ # in a hash array.
108
+ #
109
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
110
+ # @param [Integer] seed used in random number generator for shuffles.
111
+ #
112
+ # @return [Array]
113
+ def summary(permutations = 99, seed = nil)
114
+ p_vals = mc(permutations, seed)
115
+ data = weights.keys.zip(stat, p_vals, groups)
116
+ data.map do |row|
117
+ { key: row[0], stat: row[1], p: row[2], group: row[3] }
118
+ end
119
+ end
120
+
65
121
  def x
66
122
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
67
123
  .standardize
@@ -79,8 +135,17 @@ module SpatialStats
79
135
  x[idx] * y_lag_i
80
136
  end
81
137
 
138
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
139
+ # Since moran can be positive or negative, go by this definition
140
+ if stat_i_orig.positive?
141
+ (stat_i_new >= stat_i_orig).count
142
+ else
143
+ (stat_i_new <= stat_i_orig).count
144
+ end
145
+ end
146
+
82
147
  def y_lag
83
- @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
148
+ @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, y)
84
149
  end
85
150
  end
86
151
  end
@@ -18,6 +18,7 @@ module SpatialStats
18
18
  def initialize(scope, field, weights)
19
19
  super(scope, field, weights)
20
20
  end
21
+ attr_writer :x
21
22
 
22
23
  ##
23
24
  # Computes Geary's C for every observation in the +scoe+.
@@ -32,6 +33,25 @@ module SpatialStats
32
33
  end
33
34
  alias c stat
34
35
 
36
+ ##
37
+ # Computes the groups each observation belongs to.
38
+ # Potential groups for Geary's C are:
39
+ # [HH] High-High
40
+ # [LL] Low-Low
41
+ # [N] Negative - Group traditionally for HL and LH, but since the difference is squared they are in the same group.
42
+ #
43
+ #
44
+ # @return [Array] groups for each observation
45
+ def groups
46
+ quads.map do |quad|
47
+ if %w[HL LH].include?(quad)
48
+ 'N'
49
+ else
50
+ quad
51
+ end
52
+ end
53
+ end
54
+
35
55
  ##
36
56
  # Values of the +field+ queried from the +scope+
37
57
  #
@@ -45,9 +65,11 @@ module SpatialStats
45
65
  private
46
66
 
47
67
  def stat_i(idx)
48
- zs = Numo::DFloat.cast(z)
49
- zi = (z[idx] - zs)**2
50
- (w[idx, true] * zi).sum
68
+ # TODO: maybe don't even use stat_i
69
+ # just form all of the modified zs and then
70
+ # pass it to a loop of mulvec all implemented in c ext
71
+ zi = z.map { |val| (z[idx] - val)**2 }
72
+ weights.sparse.dot_row(zi, idx)
51
73
  end
52
74
 
53
75
  def mc_i(wi, perms, idx)
@@ -55,8 +77,17 @@ module SpatialStats
55
77
  (wi * zi).sum(1)
56
78
  end
57
79
 
58
- def w
59
- @w ||= weights.full.row_standardized
80
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
81
+ # Geary cannot be negative, so we have to use this technique from
82
+ # GeoDa to determine p values. Note I slightly modified it to be inclusive
83
+ # on both tails not just the lower tail.
84
+ # https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981 mean = stat_i_new.mean
85
+ mean = stat_i_new.mean
86
+ if stat_i_orig <= mean
87
+ (stat_i_new <= stat_i_orig).count
88
+ else
89
+ (stat_i_new >= stat_i_orig).count
90
+ end
60
91
  end
61
92
  end
62
93
  end
@@ -14,13 +14,18 @@ module SpatialStats
14
14
  # @param [ActiveRecord::Relation] scope
15
15
  # @param [Symbol, String] field to query from scope
16
16
  # @param [WeightsMatrix] weights to define relationship between observations in scope
17
+ # @param [Boolean] star to preset if star will be true or false. Will be calculated otherwise.
17
18
  #
18
19
  # @return [GetisOrd]
19
20
  def initialize(scope, field, weights, star = nil)
20
- super(scope, field, weights)
21
+ @scope = scope
22
+ @field = field
23
+ @weights = weights
21
24
  @star = star
25
+ calc_weights
22
26
  end
23
27
  attr_accessor :star
28
+ attr_writer :x
24
29
 
25
30
  ##
26
31
  # Computes the G or G* statistic for every observation in x.
@@ -33,6 +38,25 @@ module SpatialStats
33
38
  end
34
39
  alias g stat
35
40
 
41
+ ##
42
+ # Computes the groups each observation belongs to.
43
+ # Potential groups for G are:
44
+ # [H] High
45
+ # [L] Low
46
+ #
47
+ # Group is high when standardized z is positive, low otherwise.
48
+ #
49
+ # @return [Array] groups for each observation
50
+ def groups
51
+ z.standardize.map do |val|
52
+ if val.positive?
53
+ 'H'
54
+ else
55
+ 'L'
56
+ end
57
+ end
58
+ end
59
+
36
60
  ##
37
61
  # Values of the +field+ queried from the +scope+
38
62
  #
@@ -50,7 +74,7 @@ module SpatialStats
50
74
  # @return [Boolean] of star
51
75
  def star?
52
76
  if @star.nil?
53
- @star = weights.full.trace.positive?
77
+ @star = weights.dense.trace.positive?
54
78
  else
55
79
  @star
56
80
  end
@@ -67,25 +91,29 @@ module SpatialStats
67
91
  x_lag_i / denominators[idx]
68
92
  end
69
93
 
70
- def w
71
- @w ||= begin
72
- if star?
73
- weights.full.windowed.row_standardized
74
- else
75
- weights.standardized
76
- end
94
+ def mc_observation_calc(stat_i_orig, stat_i_new, permutations)
95
+ # GetisOrd cannot be negative, so we have to use this technique from
96
+ # ESDA to determine if we should select p or 1-p.
97
+ # https://github.com/pysal/esda/blob/master/esda/getisord.py#L388
98
+ num_larger = (stat_i_new >= stat_i_orig).count
99
+ is_low = (permutations - num_larger) < num_larger
100
+ if is_low
101
+ permutations - num_larger
102
+ else
103
+ num_larger
77
104
  end
78
105
  end
79
106
 
107
+ def calc_weights
108
+ @weights = if star?
109
+ weights.window.standardize
110
+ else
111
+ weights.standardize
112
+ end
113
+ end
114
+
80
115
  def z_lag
81
- # window if star is true
82
- @z_lag ||= begin
83
- if star?
84
- SpatialStats::Utils::Lag.window_sum(w, x)
85
- else
86
- SpatialStats::Utils::Lag.neighbor_sum(w, x)
87
- end
88
- end
116
+ @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, x)
89
117
  end
90
118
  alias x_lag z_lag
91
119