spatial_stats 0.2.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,10 +12,25 @@ module SpatialStats
12
12
  def initialize(scope, field, weights)
13
13
  @scope = scope
14
14
  @field = field
15
- @weights = weights
15
+ @weights = weights.standardize
16
16
  end
17
17
  attr_accessor :scope, :field, :weights
18
18
 
19
+ ##
20
+ # A new instance of Stat, from vector and weights.
21
+ #
22
+ # @param [Array] x observations of dataset
23
+ # @param [WeightsMatrix] weights to define relationships between observations
24
+ #
25
+ # @return [Stat]
26
+ def self.from_observations(x, weights)
27
+ raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
28
+
29
+ instance = new(nil, nil, weights.standardize)
30
+ instance.x = x
31
+ instance
32
+ end
33
+
19
34
  def stat
20
35
  raise NotImplementedError, 'method stat not defined'
21
36
  end
@@ -28,6 +43,15 @@ module SpatialStats
28
43
  raise NotImplementedError, 'method variance not implemented'
29
44
  end
30
45
 
46
+ def x=(values)
47
+ @x = values.standardize
48
+ end
49
+ alias z= x=
50
+
51
+ def y=(values)
52
+ @y = values.standardize
53
+ end
54
+
31
55
  ##
32
56
  # Z-score for each observation of the statistic.
33
57
  #
@@ -42,12 +66,12 @@ module SpatialStats
42
66
 
43
67
  ##
44
68
  # Conditional randomization algorithm used in permutation testing.
45
- # Outputs an array of length n of Numo::DFloat matrices of
46
- # size m x num_neighbors. Where m is the number of permutations and
47
- # num_neighbors is the number of neighbors for that observation.
69
+ # Returns a matrix with permuted index values that will be used for
70
+ # selecting values from the original data set.
48
71
  #
49
- # The values are randomly permutated values from arr that will act
50
- # as its neighbors for that permutation.
72
+ # The width of the matrix is the max number of neighbors + 1
73
+ # which is way less than it would be if the original vector
74
+ # was shuffled in full.
51
75
  #
52
76
  # This is super important because most weight matrices are very
53
77
  # sparse so the amount of shuffling/multiplication that is done
@@ -55,42 +79,26 @@ module SpatialStats
55
79
  #
56
80
  # @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
57
81
  #
58
- # @return [Array] of Numo::Narray matrices
82
+ # @return [Numo::Int32] matrix of shape perms x wc_max + 1
59
83
  #
60
- def crand(arr, permutations, rng)
84
+ def crand(permutations, rng)
61
85
  # basing this off the ESDA method
62
86
  # need to get k for max_neighbors
63
87
  # and wc for cardinalities of each item
64
88
  # this returns an array of length n with
65
- # (permutations x neighborz) Numo Arrays.
89
+ # (permutations x neighbors) Numo Arrays.
66
90
  # This helps reduce computation time because
67
91
  # we are only dealing with neighbors for each
68
92
  # entry not the entire list of permutations for each entry.
69
93
  n_1 = weights.n - 1
70
94
 
71
95
  # weight counts
72
- wc = [0] * weights.n
73
- k = 0
74
- (0..n_1).each do |idx|
75
- wc[idx] = (w[idx, true] > 0).count
76
- end
77
-
96
+ wc = weights.wc
78
97
  k = wc.max + 1
79
98
  prange = (0..permutations - 1).to_a
80
99
 
81
- arr = Numo::DFloat.cast(arr)
82
-
83
- ids = (0..n_1).to_a
84
100
  ids_perm = (0..n_1 - 1).to_a
85
- rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
86
-
87
- (0..n_1).map do |idx|
88
- idsi = ids.dup
89
- idsi.delete_at(idx)
90
- idsi.shuffle!(random: rng)
91
- idsi = Numo::Int32.cast(idsi)
92
- arr[idsi[rids[true, 0..wc[idx] - 1]]]
93
- end
101
+ Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
94
102
  end
95
103
 
96
104
  ##
@@ -111,31 +119,40 @@ module SpatialStats
111
119
  # its neighbors. Then we will only test for that item instead
112
120
  # of the entire set. This will be done for each item.
113
121
  rng = gen_rng(seed)
114
- shuffles = crand(x, permutations, rng)
115
- n = weights.n
116
- # r is the number of equal to or more extreme samples
117
- stat_orig = stat
118
- rs = [0] * n
119
-
120
- ws = neighbor_weights
122
+ rids = crand(permutations, rng)
121
123
 
122
- idx = 0
123
- while idx < n
124
- stat_i_orig = stat_orig[idx]
124
+ n_1 = weights.n - 1
125
+ sparse = weights.sparse
126
+ row_index = sparse.row_index
127
+ ws = sparse.values
128
+ wc = weights.wc
129
+ stat_orig = stat
125
130
 
126
- wi = Numo::DFloat.cast(ws[idx])
127
- stat_i_new = mc_i(wi, shuffles[idx], idx)
131
+ arr = Numo::DFloat.cast(x)
132
+ ids = (0..n_1).to_a
133
+ observations = Array.new(weights.n)
134
+ (0..n_1).each do |idx|
135
+ idsi = ids.dup
136
+ idsi.delete_at(idx)
137
+ idsi.shuffle!(random: rng)
138
+ idsi = Numo::Int32.cast(idsi)
139
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
128
140
 
129
- rs[idx] = if stat_i_orig.positive?
130
- (stat_i_new >= stat_i_orig).count
131
- else
132
- (stat_i_new <= stat_i_orig).count
133
- end
141
+ # account for case where there are no neighbors
142
+ row_range = row_index[idx]..(row_index[idx + 1] - 1)
143
+ if row_range.size.zero?
144
+ observations[idx] = permutations
145
+ next
146
+ end
134
147
 
135
- idx += 1
148
+ wi = Numo::DFloat.cast(ws[row_range])
149
+ stat_i_new = mc_i(wi, sample, idx)
150
+ stat_i_orig = stat_orig[idx]
151
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
152
+ permutations)
136
153
  end
137
154
 
138
- rs.map do |ri|
155
+ observations.map do |ri|
139
156
  (ri + 1.0) / (permutations + 1.0)
140
157
  end
141
158
  end
@@ -154,30 +171,40 @@ module SpatialStats
154
171
  # @return [Array] of p-values
155
172
  def mc_bv(permutations, seed)
156
173
  rng = gen_rng(seed)
157
- shuffles = crand(y, permutations, rng)
158
- n = weights.n
174
+ rids = crand(permutations, rng)
159
175
 
176
+ n_1 = weights.n - 1
177
+ sparse = weights.sparse
178
+ row_index = sparse.row_index
179
+ ws = sparse.values
180
+ wc = weights.wc
160
181
  stat_orig = stat
161
- rs = [0] * n
162
-
163
- ws = neighbor_weights
164
182
 
165
- idx = 0
166
- while idx < n
167
- stat_i_orig = stat_orig[idx]
168
- wi = Numo::DFloat.cast(ws[idx])
169
- stat_i_new = mc_i(wi, shuffles[idx], idx)
183
+ arr = Numo::DFloat.cast(y)
184
+ ids = (0..n_1).to_a
185
+ observations = Array.new(weights.n)
186
+ (0..n_1).each do |idx|
187
+ idsi = ids.dup
188
+ idsi.delete_at(idx)
189
+ idsi.shuffle!(random: rng)
190
+ idsi = Numo::Int32.cast(idsi)
191
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
170
192
 
171
- rs[idx] = if stat_i_orig.positive?
172
- (stat_i_new >= stat_i_orig).count
173
- else
174
- (stat_i_new <= stat_i_orig).count
175
- end
193
+ # account for case where there are no neighbors
194
+ row_range = row_index[idx]..(row_index[idx + 1] - 1)
195
+ if row_range.size.zero?
196
+ observations[idx] = permutations
197
+ next
198
+ end
176
199
 
177
- idx += 1
200
+ wi = Numo::DFloat.cast(ws[row_range])
201
+ stat_i_new = mc_i(wi, sample, idx)
202
+ stat_i_orig = stat_orig[idx]
203
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
204
+ permutations)
178
205
  end
179
206
 
180
- rs.map do |ri|
207
+ observations.map do |ri|
181
208
  (ri + 1.0) / (permutations + 1.0)
182
209
  end
183
210
  end
@@ -200,8 +227,7 @@ module SpatialStats
200
227
  # @return [Array] of labels
201
228
  def quads
202
229
  # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
203
- w = @weights.full
204
- z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
230
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, z)
205
231
  zp = z.map(&:positive?)
206
232
  lp = z_lag.map(&:positive?)
207
233
 
@@ -221,6 +247,22 @@ module SpatialStats
221
247
  end
222
248
  end
223
249
 
250
+ ##
251
+ # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
252
+ # in a hash array.
253
+ #
254
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
255
+ # @param [Integer] seed used in random number generator for shuffles.
256
+ #
257
+ # @return [Array]
258
+ def summary(permutations = 99, seed = nil)
259
+ p_vals = mc(permutations, seed)
260
+ data = weights.keys.zip(stat, p_vals, groups)
261
+ data.map do |row|
262
+ { key: row[0], stat: row[1], p: row[2], group: row[3] }
263
+ end
264
+ end
265
+
224
266
  private
225
267
 
226
268
  def stat_i
@@ -231,8 +273,12 @@ module SpatialStats
231
273
  raise NotImplementedError, 'method mc_i not defined'
232
274
  end
233
275
 
276
+ def mc_observation_calc(_stat_i_orig, _stat_i_new, _permutations)
277
+ raise NotImplementedError, 'method mc_observation_calc not defined'
278
+ end
279
+
234
280
  def w
235
- weights.standardized
281
+ @w ||= weights.dense
236
282
  end
237
283
 
238
284
  def gen_rng(seed = nil)
@@ -242,20 +288,6 @@ module SpatialStats
242
288
  Random.new
243
289
  end
244
290
  end
245
-
246
- def neighbor_weights
247
- # record the non-zero weights in variable length arrays for each
248
- # row in the weights table
249
- ws = [[]] * weights.n
250
- (0..weights.n - 1).each do |idx|
251
- neighbors = []
252
- w[idx, true].each do |wij|
253
- neighbors << wij if wij != 0
254
- end
255
- ws[idx] = neighbors
256
- end
257
- ws
258
- end
259
291
  end
260
292
  end
261
293
  end
@@ -12,11 +12,11 @@ module Numo
12
12
  #
13
13
  # @ example
14
14
  #
15
- # Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardized
15
+ # Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardize
16
16
  # Numo::DFloat [[0, 0.5, 0.5], [0.33333, 0.33333, 0.33333]]
17
17
  #
18
18
  # @return [Numo::NArray]
19
- def row_standardized
19
+ def row_standardize
20
20
  # every row will sum up to 1, or if they are all 0, do nothing
21
21
  standardized = each_over_axis.map do |row|
22
22
  sum = row.sum
@@ -38,16 +38,16 @@ module Numo
38
38
  #
39
39
  # @ example
40
40
  #
41
- # Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].windowed
41
+ # Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].window
42
42
  # Numo::DFloat [[1, 1, 0], [1, 1, 1], [0, 1, 1]]
43
43
  #
44
44
  # @ example
45
45
  # # Input will be equivalent to output in this case
46
- # Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].windowed
46
+ # Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].window
47
47
  # Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]]
48
48
  #
49
49
  # @return [Numo::NArray]
50
- def windowed
50
+ def window
51
51
  # in windowed calculations, the diagonal is set to 1
52
52
  # if trace (sum of diag) is 0, add it, else return input
53
53
  if trace.zero?
@@ -6,5 +6,30 @@ module SpatialStats
6
6
  ##
7
7
  # The Utils module contains various utilities used in the gem.
8
8
  module Utils
9
+ ##
10
+ # Compute the false discovery rate (FDR) of a set of p-values given
11
+ # an alpha value.
12
+ #
13
+ # If there is no FDR available in the dataset, the Bonferroni Bound is
14
+ # returned instead.
15
+ #
16
+ # @param [Array] pvals from an mc test
17
+ # @param [Float] alpha value for the fdr
18
+ #
19
+ # @returns [Float] either the FDR or Bonferroni Bound
20
+ def self.fdr(pvals, alpha)
21
+ n = pvals.size
22
+ b_bound = alpha / n
23
+ pvals.sort!
24
+
25
+ p_val = b_bound
26
+ (0..n - 1).each do |i|
27
+ p_fdr = (i + 1) * b_bound
28
+ break unless pvals[i] <= p_fdr
29
+
30
+ p_val = p_fdr
31
+ end
32
+ p_val
33
+ end
9
34
  end
10
35
  end
@@ -11,36 +11,36 @@ module SpatialStats
11
11
  # Dot product of the row_standardized input matrix
12
12
  # by the input vector, variables.
13
13
  #
14
- # @param [Numo::NArray] matrix 2-D square matrix.
14
+ # @param [WeightsMatrix] matrix holding target weights.
15
15
  # @param [Array] variables vector multiplying the matrix
16
16
  #
17
17
  # @return [Array] resultant vector
18
18
  def self.neighbor_average(matrix, variables)
19
- matrix = matrix.row_standardized
19
+ matrix = matrix.standardize
20
20
  neighbor_sum(matrix, variables)
21
21
  end
22
22
 
23
23
  ##
24
24
  # Dot product of the input matrix by the input vector, variables.
25
25
  #
26
- # @param [Numo::NArray] matrix 2-D square matrix.
26
+ # @param [WeightsMatrix] matrix holding target weights.
27
27
  # @param [Array] variables vector multiplying the matrix
28
28
  #
29
29
  # @return [Array] resultant vector
30
30
  def self.neighbor_sum(matrix, variables)
31
- matrix.dot(variables).to_a
31
+ matrix.sparse.mulvec(variables)
32
32
  end
33
33
 
34
34
  ##
35
- # Dot product of the input windowed, row standardizd matrix by
35
+ # Dot product of the input windowed, row standardized matrix by
36
36
  # the input vector, variables.
37
37
  #
38
- # @param [Numo::NArray] matrix 2-D square matrix.
38
+ # @param [WeightsMatrix] matrix holding target weights.
39
39
  # @param [Array] variables vector multiplying the matrix
40
40
  #
41
41
  # @return [Array] resultant vector
42
42
  def self.window_average(matrix, variables)
43
- matrix = matrix.windowed.row_standardized
43
+ matrix = matrix.window.standardize
44
44
  window_sum(matrix, variables)
45
45
  end
46
46
 
@@ -48,13 +48,13 @@ module SpatialStats
48
48
  # Dot product of the input windowed matrix by
49
49
  # the input vector, variables.
50
50
  #
51
- # @param [Numo::NArray] matrix 2-D square matrix.
51
+ # @param [WeightsMatrix] matrix holding target weights.
52
52
  # @param [Array] variables vector multiplying the matrix
53
53
  #
54
54
  # @return [Array] resultant vector
55
55
  def self.window_sum(matrix, variables)
56
- matrix = matrix.windowed
57
- matrix.dot(variables).to_a
56
+ matrix = matrix.window
57
+ matrix.sparse.mulvec(variables)
58
58
  end
59
59
  end
60
60
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpatialStats
4
- VERSION = '0.2.2'
4
+ VERSION = '1.0.4'
5
5
  end
@@ -15,21 +15,26 @@ module SpatialStats
15
15
  #
16
16
  # @return [WeightsMatrix]
17
17
  def self.rook(scope, field)
18
- p_key = scope.primary_key
19
- keys = scope.pluck(p_key).sort
20
-
21
18
  neighbors = SpatialStats::Queries::Weights
22
19
  .rook_contiguity_neighbors(scope, field)
23
20
 
21
+ # get keys to make sure we have consistent dimensions when
22
+ # some entries don't have neighbors.
23
+ # define a new hash that has all the keys from scope
24
+ keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
25
+
24
26
  neighbors = neighbors.group_by(&:i_id)
27
+ missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
28
+ neighbors = neighbors.merge(missing_neighbors)
29
+
25
30
  weights = neighbors.transform_values do |value|
26
31
  value.map do |neighbor|
27
- hash = neighbor.as_json(only: [:j_id]).symbolize_keys
32
+ hash = { id: neighbor[:j_id] }
28
33
  hash[:weight] = 1
29
34
  hash
30
35
  end
31
36
  end
32
- SpatialStats::Weights::WeightsMatrix.new(keys, weights)
37
+ SpatialStats::Weights::WeightsMatrix.new(weights)
33
38
  end
34
39
 
35
40
  ##
@@ -40,21 +45,26 @@ module SpatialStats
40
45
  #
41
46
  # @return [WeightsMatrix]
42
47
  def self.queen(scope, field)
43
- p_key = scope.primary_key
44
- keys = scope.pluck(p_key).sort
45
-
46
48
  neighbors = SpatialStats::Queries::Weights
47
49
  .queen_contiguity_neighbors(scope, field)
48
50
 
51
+ # get keys to make sure we have consistent dimensions when
52
+ # some entries don't have neighbors.
53
+ # define a new hash that has all the keys from scope
54
+ keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
55
+
49
56
  neighbors = neighbors.group_by(&:i_id)
57
+ missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
58
+ neighbors = neighbors.merge(missing_neighbors)
59
+
50
60
  weights = neighbors.transform_values do |value|
51
61
  value.map do |neighbor|
52
- hash = neighbor.as_json(only: [:j_id]).symbolize_keys
62
+ hash = { id: neighbor[:j_id] }
53
63
  hash[:weight] = 1
54
64
  hash
55
65
  end
56
66
  end
57
- SpatialStats::Weights::WeightsMatrix.new(keys, weights)
67
+ SpatialStats::Weights::WeightsMatrix.new(weights)
58
68
  end
59
69
  end
60
70
  end