spatial_stats 0.2.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
23
  end
24
24
  attr_writer :x, :y
25
25
 
@@ -29,8 +29,7 @@ module SpatialStats
29
29
  #
30
30
  # @return [Float]
31
31
  def stat
32
- w = @weights.standardized
33
- y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
32
+ y_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, y)
34
33
  numerator = 0
35
34
  x.each_with_index do |xi, idx|
36
35
  numerator += xi * y_lag[idx]
@@ -55,19 +54,20 @@ module SpatialStats
55
54
  #
56
55
  # @return [Float]
57
56
  def variance
58
- n = @weights.n
59
- wij = @weights.full
60
- w = wij.sum
57
+ n = weights.n
58
+ w_sum = n.to_f
61
59
  e = expectation
62
60
 
63
- s1 = s1_calc(n, wij)
64
- s2 = s2_calc(n, wij)
61
+ wij = weights.sparse.coordinates
62
+
63
+ s1 = s1_calc(wij)
64
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
65
65
  s3 = s3_calc(n, x)
66
66
 
67
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
68
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
67
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
68
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
69
69
 
70
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
70
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
71
71
  var_right = e**2
72
72
  var_left - var_right
73
73
  end
@@ -88,6 +88,19 @@ module SpatialStats
88
88
  mc_bv(permutations, seed)
89
89
  end
90
90
 
91
+ ##
92
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
93
+ # in a hash.
94
+ #
95
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
96
+ # @param [Integer] seed used in random number generator for shuffles.
97
+ #
98
+ # @return [Hash]
99
+ def summary(permutations = 99, seed = nil)
100
+ p_val = mc(permutations, seed)
101
+ { stat: stat, p: p_val }
102
+ end
103
+
91
104
  ##
92
105
  # Standardized variables queried from +x_field+.
93
106
  #
@@ -108,32 +121,41 @@ module SpatialStats
108
121
 
109
122
  private
110
123
 
124
+ def stat_mc(perms)
125
+ x_arr = Numo::DFloat.cast(x)
126
+ lag = w.dot(perms.transpose)
127
+ x_arr.dot(lag) / (x_arr**2).sum
128
+ end
129
+
111
130
  def s3_calc(n, zs)
112
131
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
113
132
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
114
133
  numerator / denominator
115
134
  end
116
135
 
117
- def s2_calc(n, wij)
136
+ def s2_calc(n, wij, row_index)
118
137
  s2 = 0
119
- (0..n - 1).each do |i|
138
+ wij_arr = wij.to_a # for row slicing
139
+ (0..n - 1).each do |idx|
140
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
120
141
  left_term = 0
121
142
  right_term = 0
122
- (0..n - 1).each do |j|
123
- left_term += wij[i, j]
124
- right_term += wij[j, i]
143
+
144
+ row.each do |coords, val|
145
+ left_term += val
146
+ right_term += wij[coords.reverse] || 0
125
147
  end
126
148
  s2 += (left_term + right_term)**2
127
149
  end
128
150
  s2
129
151
  end
130
152
 
131
- def s1_calc(n, wij)
153
+ def s1_calc(wij)
132
154
  s1 = 0
133
- (0..n - 1).each do |i|
134
- (0..n - 1).each do |j|
135
- s1 += (wij[i, j] + wij[j, i])**2
136
- end
155
+ wij.each do |coords, val|
156
+ # (wij + wji)**2
157
+ wji = wij[coords.reverse] || 0
158
+ s1 += (val + wji)**2
137
159
  end
138
160
  s1 / 2
139
161
  end
@@ -29,8 +29,7 @@ module SpatialStats
29
29
  # compute's Moran's I. numerator is sum of zi * spatial lag of zi
30
30
  # denominator is sum of zi**2.
31
31
  # have to use row-standardized weights
32
- w = @weights.standardized
33
- z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
32
+ z_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, z)
34
33
  numerator = 0
35
34
  z.each_with_index do |zi, j|
36
35
  row_sum = zi * z_lag[j]
@@ -49,7 +48,7 @@ module SpatialStats
49
48
  # @return [Float]
50
49
  def expectation
51
50
  # -1/(n-1)
52
- -1.0 / (@weights.n - 1)
51
+ -1.0 / (weights.n - 1)
53
52
  end
54
53
 
55
54
  ##
@@ -58,19 +57,20 @@ module SpatialStats
58
57
  #
59
58
  # @return [Float]
60
59
  def variance
61
- n = @weights.n
62
- wij = @weights.full
63
- w = wij.sum
60
+ n = weights.n
61
+ w_sum = n # standardized weights
64
62
  e = expectation
65
63
 
66
- s1 = s1_calc(n, wij)
67
- s2 = s2_calc(n, wij)
64
+ wij = weights.sparse.coordinates
65
+
66
+ s1 = s1_calc(wij)
67
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
68
68
  s3 = s3_calc(n, z)
69
69
 
70
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
71
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
70
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
71
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
72
72
 
73
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
73
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
74
74
  var_right = e**2
75
75
  var_left - var_right
76
76
  end
@@ -92,58 +92,66 @@ module SpatialStats
92
92
  end
93
93
 
94
94
  ##
95
- # Values of the +field+ queried from the +scope+
95
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
96
+ # in a hash.
96
97
  #
97
- # @return [Array]
98
- def x
99
- @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
100
- end
101
-
102
- # TODO: remove these last 2 methods and just standardize x.
103
- ##
104
- # Mean of x
98
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
99
+ # @param [Integer] seed used in random number generator for shuffles.
105
100
  #
106
- # @return [Float]
107
- def zbar
108
- x.sum / x.size
101
+ # @return [Hash]
102
+ def summary(permutations = 99, seed = nil)
103
+ p_val = mc(permutations, seed)
104
+ { stat: stat, p: p_val }
109
105
  end
110
106
 
111
107
  ##
112
- # Array of xi - zbar for i: [0:n-1]
108
+ # Values of the +field+ queried from the +scope+
113
109
  #
114
110
  # @return [Array]
115
- def z
116
- x.map { |val| val - zbar }
111
+ def x
112
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
113
+ .standardize
117
114
  end
115
+ alias z x
118
116
 
119
117
  private
120
118
 
119
+ def stat_mc(perms)
120
+ z_arr = Numo::DFloat.cast(z)
121
+ lag = w.dot(perms.transpose)
122
+ z_arr.dot(lag) / (z_arr**2).sum
123
+ end
124
+
121
125
  def s3_calc(n, zs)
122
126
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
123
127
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
124
128
  numerator / denominator
125
129
  end
126
130
 
127
- def s2_calc(n, wij)
131
+ # use row_index to take slices of wij
132
+ def s2_calc(n, wij, row_index)
128
133
  s2 = 0
134
+ wij_arr = wij.to_a # for row slicing
129
135
  (0..n - 1).each do |idx|
136
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
130
137
  left_term = 0
131
138
  right_term = 0
132
- (0..n - 1).each do |j|
133
- left_term += wij[idx, j]
134
- right_term += wij[j, idx]
139
+
140
+ row.each do |coords, val|
141
+ left_term += val
142
+ right_term += wij[coords.reverse] || 0
135
143
  end
136
144
  s2 += (left_term + right_term)**2
137
145
  end
138
146
  s2
139
147
  end
140
148
 
141
- def s1_calc(n, wij)
149
+ def s1_calc(wij)
142
150
  s1 = 0
143
- (0..n - 1).each do |idx|
144
- (0..n - 1).each do |j|
145
- s1 += (wij[idx, j] + wij[j, idx])**2
146
- end
151
+ wij.each do |coords, val|
152
+ # (wij + wji)**2
153
+ wji = wij[coords.reverse] || 0
154
+ s1 += (val + wji)**2
147
155
  end
148
156
  s1 / 2
149
157
  end
@@ -11,7 +11,7 @@ module SpatialStats
11
11
  def initialize(scope, field, weights)
12
12
  @scope = scope
13
13
  @field = field
14
- @weights = weights
14
+ @weights = weights.standardize
15
15
  end
16
16
  attr_accessor :scope, :field, :weights
17
17
 
@@ -45,21 +45,21 @@ module SpatialStats
45
45
  permutations.times do
46
46
  shuffles << x.shuffle(random: rng)
47
47
  end
48
+ shuffles = Numo::DFloat.cast(shuffles)
49
+
48
50
  # r is the number of equal to or more extreme samples
49
51
  # one sided
50
- stat_orig = stat
51
- r = 0
52
- shuffles.each do |shuffle|
53
- klass = self.class.new(@scope, @field, @weights)
54
- klass.x = shuffle
55
-
56
- # https://geodacenter.github.io/glossary.html#ppvalue
57
- if stat_orig.positive?
58
- r += 1 if klass.stat >= stat_orig
59
- else
60
- r += 1 if klass.stat <= stat_orig
61
- end
62
- end
52
+ stat_orig = stat.round(5)
53
+ # r = 0
54
+
55
+ # compute new stat values
56
+ stat_new = stat_mc(shuffles)
57
+
58
+ r = if stat_orig.positive?
59
+ (stat_new >= stat_orig).count
60
+ else
61
+ (stat_new <= stat_orig).count
62
+ end
63
63
 
64
64
  (r + 1.0) / (permutations + 1.0)
65
65
  end
@@ -71,27 +71,31 @@ module SpatialStats
71
71
  permutations.times do
72
72
  shuffles << y.shuffle(random: rng)
73
73
  end
74
+ shuffles = Numo::DFloat.cast(shuffles)
74
75
 
75
76
  # r is the number of equal to or more extreme samples
76
- stat_orig = stat
77
- r = 0
78
- shuffles.each do |shuffle|
79
- klass = self.class.new(@scope, @x_field, @y_field, @weights)
80
- klass.x = x
81
- klass.y = shuffle
82
-
83
- if stat_orig.positive?
84
- r += 1 if klass.stat >= stat_orig
85
- else
86
- r += 1 if klass.stat <= stat_orig
87
- end
88
- end
77
+ stat_orig = stat.round(5)
78
+ stat_new = stat_mc(shuffles)
79
+
80
+ r = if stat_orig.positive?
81
+ (stat_new >= stat_orig).count
82
+ else
83
+ (stat_new <= stat_orig).count
84
+ end
89
85
 
90
86
  (r + 1.0) / (permutations + 1.0)
91
87
  end
92
88
 
93
89
  private
94
90
 
91
+ def stat_mc(_shuffles)
92
+ raise NotImplementedError, 'private method stat_mc not defined'
93
+ end
94
+
95
+ def w
96
+ @w ||= weights.dense
97
+ end
98
+
95
99
  def gen_rng(seed)
96
100
  if seed
97
101
  Random.new(seed)
@@ -19,9 +19,10 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
23
  end
24
24
  attr_accessor :scope, :x_field, :y_field, :weights
25
+ attr_writer :x, :y
25
26
 
26
27
  ##
27
28
  # Computes the local indicator of spatial correlation for
@@ -62,6 +63,61 @@ module SpatialStats
62
63
  mc_bv(permutations, seed)
63
64
  end
64
65
 
66
+ ##
67
+ # Determines what quadrant an observation is in. Based on its value
68
+ # compared to its neighbors. This does not work for all stats, since
69
+ # it requires that values be negative.
70
+ #
71
+ # In a standardized array of z, high values are values greater than 0
72
+ # and it's neighbors are determined by the spatial lag and if that is
73
+ # positive then it's neighbors would be high, low otherwise.
74
+ #
75
+ # Quadrants are:
76
+ # [HH] a high value surrounded by other high values
77
+ # [LH] a low value surrounded by high values
78
+ # [LL] a low value surrounded by low values
79
+ # [HL] a high value surrounded by low values
80
+ #
81
+ # @return [Array] of labels
82
+ def quads
83
+ # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
84
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, y)
85
+ zp = x.map(&:positive?)
86
+ lp = z_lag.map(&:positive?)
87
+
88
+ # hh = zp & lp
89
+ # lh = zp ^ true & lp
90
+ # ll = zp ^ true & lp ^ true
91
+ # hl = zp next to lp ^ true
92
+ hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
93
+ lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
94
+ ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
95
+ hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
96
+
97
+ # now zip lists and map them to proper terms
98
+ quad_terms = %w[HH LH LL HL]
99
+ hh.zip(lh, ll, hl).map do |feature|
100
+ quad_terms[feature.index(true)]
101
+ end
102
+ end
103
+ alias groups quads
104
+
105
+ ##
106
+ # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
107
+ # in a hash array.
108
+ #
109
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
110
+ # @param [Integer] seed used in random number generator for shuffles.
111
+ #
112
+ # @return [Array]
113
+ def summary(permutations = 99, seed = nil)
114
+ p_vals = mc(permutations, seed)
115
+ data = weights.keys.zip(stat, p_vals, groups)
116
+ data.map do |row|
117
+ { key: row[0], stat: row[1], p: row[2], group: row[3] }
118
+ end
119
+ end
120
+
65
121
  def x
66
122
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
67
123
  .standardize
@@ -79,8 +135,17 @@ module SpatialStats
79
135
  x[idx] * y_lag_i
80
136
  end
81
137
 
138
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
139
+ # Since moran can be positive or negative, go by this definition
140
+ if stat_i_orig.positive?
141
+ (stat_i_new >= stat_i_orig).count
142
+ else
143
+ (stat_i_new <= stat_i_orig).count
144
+ end
145
+ end
146
+
82
147
  def y_lag
83
- @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
148
+ @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, y)
84
149
  end
85
150
  end
86
151
  end
@@ -18,6 +18,7 @@ module SpatialStats
18
18
  def initialize(scope, field, weights)
19
19
  super(scope, field, weights)
20
20
  end
21
+ attr_writer :x
21
22
 
22
23
  ##
23
24
  # Computes Geary's C for every observation in the +scoe+.
@@ -32,6 +33,25 @@ module SpatialStats
32
33
  end
33
34
  alias c stat
34
35
 
36
+ ##
37
+ # Computes the groups each observation belongs to.
38
+ # Potential groups for Geary's C are:
39
+ # [HH] High-High
40
+ # [LL] Low-Low
41
+ # [N] Negative - Group traditionally for HL and LH, but since the difference is squared they are in the same group.
42
+ #
43
+ #
44
+ # @return [Array] groups for each observation
45
+ def groups
46
+ quads.map do |quad|
47
+ if %w[HL LH].include?(quad)
48
+ 'N'
49
+ else
50
+ quad
51
+ end
52
+ end
53
+ end
54
+
35
55
  ##
36
56
  # Values of the +field+ queried from the +scope+
37
57
  #
@@ -45,9 +65,11 @@ module SpatialStats
45
65
  private
46
66
 
47
67
  def stat_i(idx)
48
- zs = Numo::DFloat.cast(z)
49
- zi = (z[idx] - zs)**2
50
- (w[idx, true] * zi).sum
68
+ # TODO: maybe don't even use stat_i
69
+ # just form all of the modified zs and then
70
+ # pass it to a loop of mulvec all implemented in c ext
71
+ zi = z.map { |val| (z[idx] - val)**2 }
72
+ weights.sparse.dot_row(zi, idx)
51
73
  end
52
74
 
53
75
  def mc_i(wi, perms, idx)
@@ -55,8 +77,17 @@ module SpatialStats
55
77
  (wi * zi).sum(1)
56
78
  end
57
79
 
58
- def w
59
- @w ||= weights.full.row_standardized
80
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
81
+ # Geary cannot be negative, so we have to use this technique from
82
+ # GeoDa to determine p values. Note I slightly modified it to be inclusive
83
+ # on both tails not just the lower tail.
84
+ # https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981 mean = stat_i_new.mean
85
+ mean = stat_i_new.mean
86
+ if stat_i_orig <= mean
87
+ (stat_i_new <= stat_i_orig).count
88
+ else
89
+ (stat_i_new >= stat_i_orig).count
90
+ end
60
91
  end
61
92
  end
62
93
  end
@@ -14,13 +14,18 @@ module SpatialStats
14
14
  # @param [ActiveRecord::Relation] scope
15
15
  # @param [Symbol, String] field to query from scope
16
16
  # @param [WeightsMatrix] weights to define relationship between observations in scope
17
+ # @param [Boolean] star to preset if star will be true or false. Will be calculated otherwise.
17
18
  #
18
19
  # @return [GetisOrd]
19
20
  def initialize(scope, field, weights, star = nil)
20
- super(scope, field, weights)
21
+ @scope = scope
22
+ @field = field
23
+ @weights = weights
21
24
  @star = star
25
+ calc_weights
22
26
  end
23
27
  attr_accessor :star
28
+ attr_writer :x
24
29
 
25
30
  ##
26
31
  # Computes the G or G* statistic for every observation in x.
@@ -33,6 +38,25 @@ module SpatialStats
33
38
  end
34
39
  alias g stat
35
40
 
41
+ ##
42
+ # Computes the groups each observation belongs to.
43
+ # Potential groups for G are:
44
+ # [H] High
45
+ # [L] Low
46
+ #
47
+ # Group is high when standardized z is positive, low otherwise.
48
+ #
49
+ # @return [Array] groups for each observation
50
+ def groups
51
+ z.standardize.map do |val|
52
+ if val.positive?
53
+ 'H'
54
+ else
55
+ 'L'
56
+ end
57
+ end
58
+ end
59
+
36
60
  ##
37
61
  # Values of the +field+ queried from the +scope+
38
62
  #
@@ -50,7 +74,7 @@ module SpatialStats
50
74
  # @return [Boolean] of star
51
75
  def star?
52
76
  if @star.nil?
53
- @star = weights.full.trace.positive?
77
+ @star = weights.dense.trace.positive?
54
78
  else
55
79
  @star
56
80
  end
@@ -67,25 +91,29 @@ module SpatialStats
67
91
  x_lag_i / denominators[idx]
68
92
  end
69
93
 
70
- def w
71
- @w ||= begin
72
- if star?
73
- weights.full.windowed.row_standardized
74
- else
75
- weights.standardized
76
- end
94
+ def mc_observation_calc(stat_i_orig, stat_i_new, permutations)
95
+ # GetisOrd cannot be negative, so we have to use this technique from
96
+ # ESDA to determine if we should select p or 1-p.
97
+ # https://github.com/pysal/esda/blob/master/esda/getisord.py#L388
98
+ num_larger = (stat_i_new >= stat_i_orig).count
99
+ is_low = (permutations - num_larger) < num_larger
100
+ if is_low
101
+ permutations - num_larger
102
+ else
103
+ num_larger
77
104
  end
78
105
  end
79
106
 
107
+ def calc_weights
108
+ @weights = if star?
109
+ weights.window.standardize
110
+ else
111
+ weights.standardize
112
+ end
113
+ end
114
+
80
115
  def z_lag
81
- # window if star is true
82
- @z_lag ||= begin
83
- if star?
84
- SpatialStats::Utils::Lag.window_sum(w, x)
85
- else
86
- SpatialStats::Utils::Lag.neighbor_sum(w, x)
87
- end
88
- end
116
+ @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, x)
89
117
  end
90
118
  alias x_lag z_lag
91
119