spatial_stats 0.2.2 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,10 +12,25 @@ module SpatialStats
12
12
  def initialize(scope, field, weights)
13
13
  @scope = scope
14
14
  @field = field
15
- @weights = weights
15
+ @weights = weights.standardize
16
16
  end
17
17
  attr_accessor :scope, :field, :weights
18
18
 
19
+ ##
20
+ # A new instance of Stat, from vector and weights.
21
+ #
22
+ # @param [Array] x observations of dataset
23
+ # @param [WeightsMatrix] weights to define relationships between observations
24
+ #
25
+ # @return [Stat]
26
+ def self.from_observations(x, weights)
27
+ raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
28
+
29
+ instance = new(nil, nil, weights.standardize)
30
+ instance.x = x
31
+ instance
32
+ end
33
+
19
34
  def stat
20
35
  raise NotImplementedError, 'method stat not defined'
21
36
  end
@@ -28,6 +43,15 @@ module SpatialStats
28
43
  raise NotImplementedError, 'method variance not implemented'
29
44
  end
30
45
 
46
+ def x=(values)
47
+ @x = values.standardize
48
+ end
49
+ alias z= x=
50
+
51
+ def y=(values)
52
+ @y = values.standardize
53
+ end
54
+
31
55
  ##
32
56
  # Z-score for each observation of the statistic.
33
57
  #
@@ -42,12 +66,12 @@ module SpatialStats
42
66
 
43
67
  ##
44
68
  # Conditional randomization algorithm used in permutation testing.
45
- # Outputs an array of length n of Numo::DFloat matrices of
46
- # size m x num_neighbors. Where m is the number of permutations and
47
- # num_neighbors is the number of neighbors for that observation.
69
+ # Returns a matrix with permuted index values that will be used for
70
+ # selecting values from the original data set.
48
71
  #
49
- # The values are randomly permutated values from arr that will act
50
- # as its neighbors for that permutation.
72
+ # The width of the matrix is the max number of neighbors + 1
73
+ # which is way less than it would be if the original vector
74
+ # was shuffled in full.
51
75
  #
52
76
  # This is super important because most weight matrices are very
53
77
  # sparse so the amount of shuffling/multiplication that is done
@@ -55,42 +79,26 @@ module SpatialStats
55
79
  #
56
80
  # @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
57
81
  #
58
- # @return [Array] of Numo::Narray matrices
82
+ # @return [Numo::Int32] matrix of shape perms x wc_max + 1
59
83
  #
60
- def crand(arr, permutations, rng)
84
+ def crand(permutations, rng)
61
85
  # basing this off the ESDA method
62
86
  # need to get k for max_neighbors
63
87
  # and wc for cardinalities of each item
64
88
  # this returns an array of length n with
65
- # (permutations x neighborz) Numo Arrays.
89
+ # (permutations x neighbors) Numo Arrays.
66
90
  # This helps reduce computation time because
67
91
  # we are only dealing with neighbors for each
68
92
  # entry not the entire list of permutations for each entry.
69
93
  n_1 = weights.n - 1
70
94
 
71
95
  # weight counts
72
- wc = [0] * weights.n
73
- k = 0
74
- (0..n_1).each do |idx|
75
- wc[idx] = (w[idx, true] > 0).count
76
- end
77
-
96
+ wc = weights.wc
78
97
  k = wc.max + 1
79
98
  prange = (0..permutations - 1).to_a
80
99
 
81
- arr = Numo::DFloat.cast(arr)
82
-
83
- ids = (0..n_1).to_a
84
100
  ids_perm = (0..n_1 - 1).to_a
85
- rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
86
-
87
- (0..n_1).map do |idx|
88
- idsi = ids.dup
89
- idsi.delete_at(idx)
90
- idsi.shuffle!(random: rng)
91
- idsi = Numo::Int32.cast(idsi)
92
- arr[idsi[rids[true, 0..wc[idx] - 1]]]
93
- end
101
+ Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
94
102
  end
95
103
 
96
104
  ##
@@ -111,31 +119,40 @@ module SpatialStats
111
119
  # its neighbors. Then we will only test for that item instead
112
120
  # of the entire set. This will be done for each item.
113
121
  rng = gen_rng(seed)
114
- shuffles = crand(x, permutations, rng)
115
- n = weights.n
116
- # r is the number of equal to or more extreme samples
117
- stat_orig = stat
118
- rs = [0] * n
119
-
120
- ws = neighbor_weights
122
+ rids = crand(permutations, rng)
121
123
 
122
- idx = 0
123
- while idx < n
124
- stat_i_orig = stat_orig[idx]
124
+ n_1 = weights.n - 1
125
+ sparse = weights.sparse
126
+ row_index = sparse.row_index
127
+ ws = sparse.values
128
+ wc = weights.wc
129
+ stat_orig = stat
125
130
 
126
- wi = Numo::DFloat.cast(ws[idx])
127
- stat_i_new = mc_i(wi, shuffles[idx], idx)
131
+ arr = Numo::DFloat.cast(x)
132
+ ids = (0..n_1).to_a
133
+ observations = Array.new(weights.n)
134
+ (0..n_1).each do |idx|
135
+ idsi = ids.dup
136
+ idsi.delete_at(idx)
137
+ idsi.shuffle!(random: rng)
138
+ idsi = Numo::Int32.cast(idsi)
139
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
128
140
 
129
- rs[idx] = if stat_i_orig.positive?
130
- (stat_i_new >= stat_i_orig).count
131
- else
132
- (stat_i_new <= stat_i_orig).count
133
- end
141
+ # account for case where there are no neighbors
142
+ row_range = row_index[idx]..(row_index[idx + 1] - 1)
143
+ if row_range.size.zero?
144
+ observations[idx] = permutations
145
+ next
146
+ end
134
147
 
135
- idx += 1
148
+ wi = Numo::DFloat.cast(ws[row_range])
149
+ stat_i_new = mc_i(wi, sample, idx)
150
+ stat_i_orig = stat_orig[idx]
151
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
152
+ permutations)
136
153
  end
137
154
 
138
- rs.map do |ri|
155
+ observations.map do |ri|
139
156
  (ri + 1.0) / (permutations + 1.0)
140
157
  end
141
158
  end
@@ -154,30 +171,40 @@ module SpatialStats
154
171
  # @return [Array] of p-values
155
172
  def mc_bv(permutations, seed)
156
173
  rng = gen_rng(seed)
157
- shuffles = crand(y, permutations, rng)
158
- n = weights.n
174
+ rids = crand(permutations, rng)
159
175
 
176
+ n_1 = weights.n - 1
177
+ sparse = weights.sparse
178
+ row_index = sparse.row_index
179
+ ws = sparse.values
180
+ wc = weights.wc
160
181
  stat_orig = stat
161
- rs = [0] * n
162
-
163
- ws = neighbor_weights
164
182
 
165
- idx = 0
166
- while idx < n
167
- stat_i_orig = stat_orig[idx]
168
- wi = Numo::DFloat.cast(ws[idx])
169
- stat_i_new = mc_i(wi, shuffles[idx], idx)
183
+ arr = Numo::DFloat.cast(y)
184
+ ids = (0..n_1).to_a
185
+ observations = Array.new(weights.n)
186
+ (0..n_1).each do |idx|
187
+ idsi = ids.dup
188
+ idsi.delete_at(idx)
189
+ idsi.shuffle!(random: rng)
190
+ idsi = Numo::Int32.cast(idsi)
191
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
170
192
 
171
- rs[idx] = if stat_i_orig.positive?
172
- (stat_i_new >= stat_i_orig).count
173
- else
174
- (stat_i_new <= stat_i_orig).count
175
- end
193
+ # account for case where there are no neighbors
194
+ row_range = row_index[idx]..(row_index[idx + 1] - 1)
195
+ if row_range.size.zero?
196
+ observations[idx] = permutations
197
+ next
198
+ end
176
199
 
177
- idx += 1
200
+ wi = Numo::DFloat.cast(ws[row_range])
201
+ stat_i_new = mc_i(wi, sample, idx)
202
+ stat_i_orig = stat_orig[idx]
203
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
204
+ permutations)
178
205
  end
179
206
 
180
- rs.map do |ri|
207
+ observations.map do |ri|
181
208
  (ri + 1.0) / (permutations + 1.0)
182
209
  end
183
210
  end
@@ -200,8 +227,7 @@ module SpatialStats
200
227
  # @return [Array] of labels
201
228
  def quads
202
229
  # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
203
- w = @weights.full
204
- z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
230
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, z)
205
231
  zp = z.map(&:positive?)
206
232
  lp = z_lag.map(&:positive?)
207
233
 
@@ -221,6 +247,22 @@ module SpatialStats
221
247
  end
222
248
  end
223
249
 
250
+ ##
251
+ # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
252
+ # in a hash array.
253
+ #
254
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
255
+ # @param [Integer] seed used in random number generator for shuffles.
256
+ #
257
+ # @return [Array]
258
+ def summary(permutations = 99, seed = nil)
259
+ p_vals = mc(permutations, seed)
260
+ data = weights.keys.zip(stat, p_vals, groups)
261
+ data.map do |row|
262
+ { key: row[0], stat: row[1], p: row[2], group: row[3] }
263
+ end
264
+ end
265
+
224
266
  private
225
267
 
226
268
  def stat_i
@@ -231,8 +273,12 @@ module SpatialStats
231
273
  raise NotImplementedError, 'method mc_i not defined'
232
274
  end
233
275
 
276
+ def mc_observation_calc(_stat_i_orig, _stat_i_new, _permutations)
277
+ raise NotImplementedError, 'method mc_observation_calc not defined'
278
+ end
279
+
234
280
  def w
235
- weights.standardized
281
+ @w ||= weights.dense
236
282
  end
237
283
 
238
284
  def gen_rng(seed = nil)
@@ -242,20 +288,6 @@ module SpatialStats
242
288
  Random.new
243
289
  end
244
290
  end
245
-
246
- def neighbor_weights
247
- # record the non-zero weights in variable length arrays for each
248
- # row in the weights table
249
- ws = [[]] * weights.n
250
- (0..weights.n - 1).each do |idx|
251
- neighbors = []
252
- w[idx, true].each do |wij|
253
- neighbors << wij if wij != 0
254
- end
255
- ws[idx] = neighbors
256
- end
257
- ws
258
- end
259
291
  end
260
292
  end
261
293
  end
@@ -12,11 +12,11 @@ module Numo
12
12
  #
13
13
  # @ example
14
14
  #
15
- # Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardized
15
+ # Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardize
16
16
  # Numo::DFloat [[0, 0.5, 0.5], [0.33333, 0.33333, 0.33333]]
17
17
  #
18
18
  # @return [Numo::NArray]
19
- def row_standardized
19
+ def row_standardize
20
20
  # every row will sum up to 1, or if they are all 0, do nothing
21
21
  standardized = each_over_axis.map do |row|
22
22
  sum = row.sum
@@ -38,16 +38,16 @@ module Numo
38
38
  #
39
39
  # @ example
40
40
  #
41
- # Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].windowed
41
+ # Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].window
42
42
  # Numo::DFloat [[1, 1, 0], [1, 1, 1], [0, 1, 1]]
43
43
  #
44
44
  # @ example
45
45
  # # Input will be equivalent to output in this case
46
- # Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].windowed
46
+ # Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].window
47
47
  # Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]]
48
48
  #
49
49
  # @return [Numo::NArray]
50
- def windowed
50
+ def window
51
51
  # in windowed calculations, the diagonal is set to 1
52
52
  # if trace (sum of diag) is 0, add it, else return input
53
53
  if trace.zero?
@@ -6,5 +6,30 @@ module SpatialStats
6
6
  ##
7
7
  # The Utils module contains various utilities used in the gem.
8
8
  module Utils
9
+ ##
10
+ # Compute the false discovery rate (FDR) of a set of p-values given
11
+ # an alpha value.
12
+ #
13
+ # If there is no FDR available in the dataset, the Bonferroni Bound is
14
+ # returned instead.
15
+ #
16
+ # @param [Array] pvals from an mc test
17
+ # @param [Float] alpha value for the fdr
18
+ #
19
+ # @returns [Float] either the FDR or Bonferroni Bound
20
+ def self.fdr(pvals, alpha)
21
+ n = pvals.size
22
+ b_bound = alpha / n
23
+ pvals.sort!
24
+
25
+ p_val = b_bound
26
+ (0..n - 1).each do |i|
27
+ p_fdr = (i + 1) * b_bound
28
+ break unless pvals[i] <= p_fdr
29
+
30
+ p_val = p_fdr
31
+ end
32
+ p_val
33
+ end
9
34
  end
10
35
  end
@@ -11,36 +11,36 @@ module SpatialStats
11
11
  # Dot product of the row_standardized input matrix
12
12
  # by the input vector, variables.
13
13
  #
14
- # @param [Numo::NArray] matrix 2-D square matrix.
14
+ # @param [WeightsMatrix] matrix holding target weights.
15
15
  # @param [Array] variables vector multiplying the matrix
16
16
  #
17
17
  # @return [Array] resultant vector
18
18
  def self.neighbor_average(matrix, variables)
19
- matrix = matrix.row_standardized
19
+ matrix = matrix.standardize
20
20
  neighbor_sum(matrix, variables)
21
21
  end
22
22
 
23
23
  ##
24
24
  # Dot product of the input matrix by the input vector, variables.
25
25
  #
26
- # @param [Numo::NArray] matrix 2-D square matrix.
26
+ # @param [WeightsMatrix] matrix holding target weights.
27
27
  # @param [Array] variables vector multiplying the matrix
28
28
  #
29
29
  # @return [Array] resultant vector
30
30
  def self.neighbor_sum(matrix, variables)
31
- matrix.dot(variables).to_a
31
+ matrix.sparse.mulvec(variables)
32
32
  end
33
33
 
34
34
  ##
35
- # Dot product of the input windowed, row standardizd matrix by
35
+ # Dot product of the input windowed, row standardized matrix by
36
36
  # the input vector, variables.
37
37
  #
38
- # @param [Numo::NArray] matrix 2-D square matrix.
38
+ # @param [WeightsMatrix] matrix holding target weights.
39
39
  # @param [Array] variables vector multiplying the matrix
40
40
  #
41
41
  # @return [Array] resultant vector
42
42
  def self.window_average(matrix, variables)
43
- matrix = matrix.windowed.row_standardized
43
+ matrix = matrix.window.standardize
44
44
  window_sum(matrix, variables)
45
45
  end
46
46
 
@@ -48,13 +48,13 @@ module SpatialStats
48
48
  # Dot product of the input windowed matrix by
49
49
  # the input vector, variables.
50
50
  #
51
- # @param [Numo::NArray] matrix 2-D square matrix.
51
+ # @param [WeightsMatrix] matrix holding target weights.
52
52
  # @param [Array] variables vector multiplying the matrix
53
53
  #
54
54
  # @return [Array] resultant vector
55
55
  def self.window_sum(matrix, variables)
56
- matrix = matrix.windowed
57
- matrix.dot(variables).to_a
56
+ matrix = matrix.window
57
+ matrix.sparse.mulvec(variables)
58
58
  end
59
59
  end
60
60
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpatialStats
4
- VERSION = '0.2.2'
4
+ VERSION = '1.0.4'
5
5
  end
@@ -15,21 +15,26 @@ module SpatialStats
15
15
  #
16
16
  # @return [WeightsMatrix]
17
17
  def self.rook(scope, field)
18
- p_key = scope.primary_key
19
- keys = scope.pluck(p_key).sort
20
-
21
18
  neighbors = SpatialStats::Queries::Weights
22
19
  .rook_contiguity_neighbors(scope, field)
23
20
 
21
+ # get keys to make sure we have consistent dimensions when
22
+ # some entries don't have neighbors.
23
+ # define a new hash that has all the keys from scope
24
+ keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
25
+
24
26
  neighbors = neighbors.group_by(&:i_id)
27
+ missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
28
+ neighbors = neighbors.merge(missing_neighbors)
29
+
25
30
  weights = neighbors.transform_values do |value|
26
31
  value.map do |neighbor|
27
- hash = neighbor.as_json(only: [:j_id]).symbolize_keys
32
+ hash = { id: neighbor[:j_id] }
28
33
  hash[:weight] = 1
29
34
  hash
30
35
  end
31
36
  end
32
- SpatialStats::Weights::WeightsMatrix.new(keys, weights)
37
+ SpatialStats::Weights::WeightsMatrix.new(weights)
33
38
  end
34
39
 
35
40
  ##
@@ -40,21 +45,26 @@ module SpatialStats
40
45
  #
41
46
  # @return [WeightsMatrix]
42
47
  def self.queen(scope, field)
43
- p_key = scope.primary_key
44
- keys = scope.pluck(p_key).sort
45
-
46
48
  neighbors = SpatialStats::Queries::Weights
47
49
  .queen_contiguity_neighbors(scope, field)
48
50
 
51
+ # get keys to make sure we have consistent dimensions when
52
+ # some entries don't have neighbors.
53
+ # define a new hash that has all the keys from scope
54
+ keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
55
+
49
56
  neighbors = neighbors.group_by(&:i_id)
57
+ missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
58
+ neighbors = neighbors.merge(missing_neighbors)
59
+
50
60
  weights = neighbors.transform_values do |value|
51
61
  value.map do |neighbor|
52
- hash = neighbor.as_json(only: [:j_id]).symbolize_keys
62
+ hash = { id: neighbor[:j_id] }
53
63
  hash[:weight] = 1
54
64
  hash
55
65
  end
56
66
  end
57
- SpatialStats::Weights::WeightsMatrix.new(keys, weights)
67
+ SpatialStats::Weights::WeightsMatrix.new(weights)
58
68
  end
59
69
  end
60
70
  end