spatial_stats 0.2.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +126 -55
- data/Rakefile +7 -0
- data/ext/spatial_stats/csr_matrix.c +380 -0
- data/ext/spatial_stats/csr_matrix.h +34 -0
- data/ext/spatial_stats/extconf.rb +6 -0
- data/ext/spatial_stats/spatial_stats.c +32 -0
- data/lib/spatial_stats.rb +1 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
- data/lib/spatial_stats/global/moran.rb +43 -36
- data/lib/spatial_stats/global/stat.rb +55 -27
- data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
- data/lib/spatial_stats/local/geary.rb +35 -5
- data/lib/spatial_stats/local/getis_ord.rb +45 -17
- data/lib/spatial_stats/local/moran.rb +39 -9
- data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
- data/lib/spatial_stats/local/stat.rb +112 -80
- data/lib/spatial_stats/narray_ext.rb +5 -5
- data/lib/spatial_stats/spatial_stats.so +0 -0
- data/lib/spatial_stats/utils.rb +25 -0
- data/lib/spatial_stats/utils/lag.rb +10 -10
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights/contiguous.rb +20 -10
- data/lib/spatial_stats/weights/distant.rb +38 -20
- data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
- metadata +33 -11
- data/MIT-LICENSE +0 -20
@@ -12,10 +12,25 @@ module SpatialStats
|
|
12
12
|
def initialize(scope, field, weights)
|
13
13
|
@scope = scope
|
14
14
|
@field = field
|
15
|
-
@weights = weights
|
15
|
+
@weights = weights.standardize
|
16
16
|
end
|
17
17
|
attr_accessor :scope, :field, :weights
|
18
18
|
|
19
|
+
##
|
20
|
+
# A new instance of Stat, from vector and weights.
|
21
|
+
#
|
22
|
+
# @param [Array] x observations of dataset
|
23
|
+
# @param [WeightsMatrix] weights to define relationships between observations
|
24
|
+
#
|
25
|
+
# @return [Stat]
|
26
|
+
def self.from_observations(x, weights)
|
27
|
+
raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
|
28
|
+
|
29
|
+
instance = new(nil, nil, weights.standardize)
|
30
|
+
instance.x = x
|
31
|
+
instance
|
32
|
+
end
|
33
|
+
|
19
34
|
def stat
|
20
35
|
raise NotImplementedError, 'method stat not defined'
|
21
36
|
end
|
@@ -28,6 +43,15 @@ module SpatialStats
|
|
28
43
|
raise NotImplementedError, 'method variance not implemented'
|
29
44
|
end
|
30
45
|
|
46
|
+
def x=(values)
|
47
|
+
@x = values.standardize
|
48
|
+
end
|
49
|
+
alias z= x=
|
50
|
+
|
51
|
+
def y=(values)
|
52
|
+
@y = values.standardize
|
53
|
+
end
|
54
|
+
|
31
55
|
##
|
32
56
|
# Z-score for each observation of the statistic.
|
33
57
|
#
|
@@ -42,12 +66,12 @@ module SpatialStats
|
|
42
66
|
|
43
67
|
##
|
44
68
|
# Conditional randomization algorithm used in permutation testing.
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# num_neighbors is the number of neighbors for that observation.
|
69
|
+
# Returns a matrix with permuted index values that will be used for
|
70
|
+
# selecting values from the original data set.
|
48
71
|
#
|
49
|
-
# The
|
50
|
-
#
|
72
|
+
# The width of the matrix is the max number of neighbors + 1
|
73
|
+
# which is way less than it would be if the original vector
|
74
|
+
# was shuffled in full.
|
51
75
|
#
|
52
76
|
# This is super important because most weight matrices are very
|
53
77
|
# sparse so the amount of shuffling/multiplication that is done
|
@@ -55,42 +79,26 @@ module SpatialStats
|
|
55
79
|
#
|
56
80
|
# @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
|
57
81
|
#
|
58
|
-
# @return [
|
82
|
+
# @return [Numo::Int32] matrix of shape perms x wc_max + 1
|
59
83
|
#
|
60
|
-
def crand(
|
84
|
+
def crand(permutations, rng)
|
61
85
|
# basing this off the ESDA method
|
62
86
|
# need to get k for max_neighbors
|
63
87
|
# and wc for cardinalities of each item
|
64
88
|
# this returns an array of length n with
|
65
|
-
# (permutations x
|
89
|
+
# (permutations x neighbors) Numo Arrays.
|
66
90
|
# This helps reduce computation time because
|
67
91
|
# we are only dealing with neighbors for each
|
68
92
|
# entry not the entire list of permutations for each entry.
|
69
93
|
n_1 = weights.n - 1
|
70
94
|
|
71
95
|
# weight counts
|
72
|
-
wc =
|
73
|
-
k = 0
|
74
|
-
(0..n_1).each do |idx|
|
75
|
-
wc[idx] = (w[idx, true] > 0).count
|
76
|
-
end
|
77
|
-
|
96
|
+
wc = weights.wc
|
78
97
|
k = wc.max + 1
|
79
98
|
prange = (0..permutations - 1).to_a
|
80
99
|
|
81
|
-
arr = Numo::DFloat.cast(arr)
|
82
|
-
|
83
|
-
ids = (0..n_1).to_a
|
84
100
|
ids_perm = (0..n_1 - 1).to_a
|
85
|
-
|
86
|
-
|
87
|
-
(0..n_1).map do |idx|
|
88
|
-
idsi = ids.dup
|
89
|
-
idsi.delete_at(idx)
|
90
|
-
idsi.shuffle!(random: rng)
|
91
|
-
idsi = Numo::Int32.cast(idsi)
|
92
|
-
arr[idsi[rids[true, 0..wc[idx] - 1]]]
|
93
|
-
end
|
101
|
+
Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
|
94
102
|
end
|
95
103
|
|
96
104
|
##
|
@@ -111,31 +119,40 @@ module SpatialStats
|
|
111
119
|
# its neighbors. Then we will only test for that item instead
|
112
120
|
# of the entire set. This will be done for each item.
|
113
121
|
rng = gen_rng(seed)
|
114
|
-
|
115
|
-
n = weights.n
|
116
|
-
# r is the number of equal to or more extreme samples
|
117
|
-
stat_orig = stat
|
118
|
-
rs = [0] * n
|
119
|
-
|
120
|
-
ws = neighbor_weights
|
122
|
+
rids = crand(permutations, rng)
|
121
123
|
|
122
|
-
|
123
|
-
|
124
|
-
|
124
|
+
n_1 = weights.n - 1
|
125
|
+
sparse = weights.sparse
|
126
|
+
row_index = sparse.row_index
|
127
|
+
ws = sparse.values
|
128
|
+
wc = weights.wc
|
129
|
+
stat_orig = stat
|
125
130
|
|
126
|
-
|
127
|
-
|
131
|
+
arr = Numo::DFloat.cast(x)
|
132
|
+
ids = (0..n_1).to_a
|
133
|
+
observations = Array.new(weights.n)
|
134
|
+
(0..n_1).each do |idx|
|
135
|
+
idsi = ids.dup
|
136
|
+
idsi.delete_at(idx)
|
137
|
+
idsi.shuffle!(random: rng)
|
138
|
+
idsi = Numo::Int32.cast(idsi)
|
139
|
+
sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
|
128
140
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
141
|
+
# account for case where there are no neighbors
|
142
|
+
row_range = row_index[idx]..(row_index[idx + 1] - 1)
|
143
|
+
if row_range.size.zero?
|
144
|
+
observations[idx] = permutations
|
145
|
+
next
|
146
|
+
end
|
134
147
|
|
135
|
-
|
148
|
+
wi = Numo::DFloat.cast(ws[row_range])
|
149
|
+
stat_i_new = mc_i(wi, sample, idx)
|
150
|
+
stat_i_orig = stat_orig[idx]
|
151
|
+
observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
|
152
|
+
permutations)
|
136
153
|
end
|
137
154
|
|
138
|
-
|
155
|
+
observations.map do |ri|
|
139
156
|
(ri + 1.0) / (permutations + 1.0)
|
140
157
|
end
|
141
158
|
end
|
@@ -154,30 +171,40 @@ module SpatialStats
|
|
154
171
|
# @return [Array] of p-values
|
155
172
|
def mc_bv(permutations, seed)
|
156
173
|
rng = gen_rng(seed)
|
157
|
-
|
158
|
-
n = weights.n
|
174
|
+
rids = crand(permutations, rng)
|
159
175
|
|
176
|
+
n_1 = weights.n - 1
|
177
|
+
sparse = weights.sparse
|
178
|
+
row_index = sparse.row_index
|
179
|
+
ws = sparse.values
|
180
|
+
wc = weights.wc
|
160
181
|
stat_orig = stat
|
161
|
-
rs = [0] * n
|
162
|
-
|
163
|
-
ws = neighbor_weights
|
164
182
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
183
|
+
arr = Numo::DFloat.cast(y)
|
184
|
+
ids = (0..n_1).to_a
|
185
|
+
observations = Array.new(weights.n)
|
186
|
+
(0..n_1).each do |idx|
|
187
|
+
idsi = ids.dup
|
188
|
+
idsi.delete_at(idx)
|
189
|
+
idsi.shuffle!(random: rng)
|
190
|
+
idsi = Numo::Int32.cast(idsi)
|
191
|
+
sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
|
170
192
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
193
|
+
# account for case where there are no neighbors
|
194
|
+
row_range = row_index[idx]..(row_index[idx + 1] - 1)
|
195
|
+
if row_range.size.zero?
|
196
|
+
observations[idx] = permutations
|
197
|
+
next
|
198
|
+
end
|
176
199
|
|
177
|
-
|
200
|
+
wi = Numo::DFloat.cast(ws[row_range])
|
201
|
+
stat_i_new = mc_i(wi, sample, idx)
|
202
|
+
stat_i_orig = stat_orig[idx]
|
203
|
+
observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
|
204
|
+
permutations)
|
178
205
|
end
|
179
206
|
|
180
|
-
|
207
|
+
observations.map do |ri|
|
181
208
|
(ri + 1.0) / (permutations + 1.0)
|
182
209
|
end
|
183
210
|
end
|
@@ -200,8 +227,7 @@ module SpatialStats
|
|
200
227
|
# @return [Array] of labels
|
201
228
|
def quads
|
202
229
|
# https://github.com/pysal/esda/blob/master/esda/moran.py#L925
|
203
|
-
|
204
|
-
z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
|
230
|
+
z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, z)
|
205
231
|
zp = z.map(&:positive?)
|
206
232
|
lp = z_lag.map(&:positive?)
|
207
233
|
|
@@ -221,6 +247,22 @@ module SpatialStats
|
|
221
247
|
end
|
222
248
|
end
|
223
249
|
|
250
|
+
##
|
251
|
+
# Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
|
252
|
+
# in a hash array.
|
253
|
+
#
|
254
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
255
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
256
|
+
#
|
257
|
+
# @return [Array]
|
258
|
+
def summary(permutations = 99, seed = nil)
|
259
|
+
p_vals = mc(permutations, seed)
|
260
|
+
data = weights.keys.zip(stat, p_vals, groups)
|
261
|
+
data.map do |row|
|
262
|
+
{ key: row[0], stat: row[1], p: row[2], group: row[3] }
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
224
266
|
private
|
225
267
|
|
226
268
|
def stat_i
|
@@ -231,8 +273,12 @@ module SpatialStats
|
|
231
273
|
raise NotImplementedError, 'method mc_i not defined'
|
232
274
|
end
|
233
275
|
|
276
|
+
def mc_observation_calc(_stat_i_orig, _stat_i_new, _permutations)
|
277
|
+
raise NotImplementedError, 'method mc_observation_calc not defined'
|
278
|
+
end
|
279
|
+
|
234
280
|
def w
|
235
|
-
weights.
|
281
|
+
@w ||= weights.dense
|
236
282
|
end
|
237
283
|
|
238
284
|
def gen_rng(seed = nil)
|
@@ -242,20 +288,6 @@ module SpatialStats
|
|
242
288
|
Random.new
|
243
289
|
end
|
244
290
|
end
|
245
|
-
|
246
|
-
def neighbor_weights
|
247
|
-
# record the non-zero weights in variable length arrays for each
|
248
|
-
# row in the weights table
|
249
|
-
ws = [[]] * weights.n
|
250
|
-
(0..weights.n - 1).each do |idx|
|
251
|
-
neighbors = []
|
252
|
-
w[idx, true].each do |wij|
|
253
|
-
neighbors << wij if wij != 0
|
254
|
-
end
|
255
|
-
ws[idx] = neighbors
|
256
|
-
end
|
257
|
-
ws
|
258
|
-
end
|
259
291
|
end
|
260
292
|
end
|
261
293
|
end
|
@@ -12,11 +12,11 @@ module Numo
|
|
12
12
|
#
|
13
13
|
# @ example
|
14
14
|
#
|
15
|
-
# Numo::DFloat [[0, 1, 1], [1, 1, 1]].
|
15
|
+
# Numo::DFloat [[0, 1, 1], [1, 1, 1]].row_standardize
|
16
16
|
# Numo::DFloat [[0, 0.5, 0.5], [0.33333, 0.33333, 0.33333]]
|
17
17
|
#
|
18
18
|
# @return [Numo::NArray]
|
19
|
-
def
|
19
|
+
def row_standardize
|
20
20
|
# every row will sum up to 1, or if they are all 0, do nothing
|
21
21
|
standardized = each_over_axis.map do |row|
|
22
22
|
sum = row.sum
|
@@ -38,16 +38,16 @@ module Numo
|
|
38
38
|
#
|
39
39
|
# @ example
|
40
40
|
#
|
41
|
-
# Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].
|
41
|
+
# Numo::DFloat [[0, 1, 0], [1, 0, 1], [0, 1, 0]].window
|
42
42
|
# Numo::DFloat [[1, 1, 0], [1, 1, 1], [0, 1, 1]]
|
43
43
|
#
|
44
44
|
# @ example
|
45
45
|
# # Input will be equivalent to output in this case
|
46
|
-
# Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].
|
46
|
+
# Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]].window
|
47
47
|
# Numo::DFloat [[1, 1, 0], [1, 0, 1], [0, 1, 0]]
|
48
48
|
#
|
49
49
|
# @return [Numo::NArray]
|
50
|
-
def
|
50
|
+
def window
|
51
51
|
# in windowed calculations, the diagonal is set to 1
|
52
52
|
# if trace (sum of diag) is 0, add it, else return input
|
53
53
|
if trace.zero?
|
Binary file
|
data/lib/spatial_stats/utils.rb
CHANGED
@@ -6,5 +6,30 @@ module SpatialStats
|
|
6
6
|
##
|
7
7
|
# The Utils module contains various utilities used in the gem.
|
8
8
|
module Utils
|
9
|
+
##
|
10
|
+
# Compute the false discovery rate (FDR) of a set of p-values given
|
11
|
+
# an alpha value.
|
12
|
+
#
|
13
|
+
# If there is no FDR available in the dataset, the Bonferroni Bound is
|
14
|
+
# returned instead.
|
15
|
+
#
|
16
|
+
# @param [Array] pvals from an mc test
|
17
|
+
# @param [Float] alpha value for the fdr
|
18
|
+
#
|
19
|
+
# @returns [Float] either the FDR or Bonferroni Bound
|
20
|
+
def self.fdr(pvals, alpha)
|
21
|
+
n = pvals.size
|
22
|
+
b_bound = alpha / n
|
23
|
+
pvals.sort!
|
24
|
+
|
25
|
+
p_val = b_bound
|
26
|
+
(0..n - 1).each do |i|
|
27
|
+
p_fdr = (i + 1) * b_bound
|
28
|
+
break unless pvals[i] <= p_fdr
|
29
|
+
|
30
|
+
p_val = p_fdr
|
31
|
+
end
|
32
|
+
p_val
|
33
|
+
end
|
9
34
|
end
|
10
35
|
end
|
@@ -11,36 +11,36 @@ module SpatialStats
|
|
11
11
|
# Dot product of the row_standardized input matrix
|
12
12
|
# by the input vector, variables.
|
13
13
|
#
|
14
|
-
# @param [
|
14
|
+
# @param [WeightsMatrix] matrix holding target weights.
|
15
15
|
# @param [Array] variables vector multiplying the matrix
|
16
16
|
#
|
17
17
|
# @return [Array] resultant vector
|
18
18
|
def self.neighbor_average(matrix, variables)
|
19
|
-
matrix = matrix.
|
19
|
+
matrix = matrix.standardize
|
20
20
|
neighbor_sum(matrix, variables)
|
21
21
|
end
|
22
22
|
|
23
23
|
##
|
24
24
|
# Dot product of the input matrix by the input vector, variables.
|
25
25
|
#
|
26
|
-
# @param [
|
26
|
+
# @param [WeightsMatrix] matrix holding target weights.
|
27
27
|
# @param [Array] variables vector multiplying the matrix
|
28
28
|
#
|
29
29
|
# @return [Array] resultant vector
|
30
30
|
def self.neighbor_sum(matrix, variables)
|
31
|
-
matrix.
|
31
|
+
matrix.sparse.mulvec(variables)
|
32
32
|
end
|
33
33
|
|
34
34
|
##
|
35
|
-
# Dot product of the input windowed, row
|
35
|
+
# Dot product of the input windowed, row standardized matrix by
|
36
36
|
# the input vector, variables.
|
37
37
|
#
|
38
|
-
# @param [
|
38
|
+
# @param [WeightsMatrix] matrix holding target weights.
|
39
39
|
# @param [Array] variables vector multiplying the matrix
|
40
40
|
#
|
41
41
|
# @return [Array] resultant vector
|
42
42
|
def self.window_average(matrix, variables)
|
43
|
-
matrix = matrix.
|
43
|
+
matrix = matrix.window.standardize
|
44
44
|
window_sum(matrix, variables)
|
45
45
|
end
|
46
46
|
|
@@ -48,13 +48,13 @@ module SpatialStats
|
|
48
48
|
# Dot product of the input windowed matrix by
|
49
49
|
# the input vector, variables.
|
50
50
|
#
|
51
|
-
# @param [
|
51
|
+
# @param [WeightsMatrix] matrix holding target weights.
|
52
52
|
# @param [Array] variables vector multiplying the matrix
|
53
53
|
#
|
54
54
|
# @return [Array] resultant vector
|
55
55
|
def self.window_sum(matrix, variables)
|
56
|
-
matrix = matrix.
|
57
|
-
matrix.
|
56
|
+
matrix = matrix.window
|
57
|
+
matrix.sparse.mulvec(variables)
|
58
58
|
end
|
59
59
|
end
|
60
60
|
end
|
@@ -15,21 +15,26 @@ module SpatialStats
|
|
15
15
|
#
|
16
16
|
# @return [WeightsMatrix]
|
17
17
|
def self.rook(scope, field)
|
18
|
-
p_key = scope.primary_key
|
19
|
-
keys = scope.pluck(p_key).sort
|
20
|
-
|
21
18
|
neighbors = SpatialStats::Queries::Weights
|
22
19
|
.rook_contiguity_neighbors(scope, field)
|
23
20
|
|
21
|
+
# get keys to make sure we have consistent dimensions when
|
22
|
+
# some entries don't have neighbors.
|
23
|
+
# define a new hash that has all the keys from scope
|
24
|
+
keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
|
25
|
+
|
24
26
|
neighbors = neighbors.group_by(&:i_id)
|
27
|
+
missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
|
28
|
+
neighbors = neighbors.merge(missing_neighbors)
|
29
|
+
|
25
30
|
weights = neighbors.transform_values do |value|
|
26
31
|
value.map do |neighbor|
|
27
|
-
hash =
|
32
|
+
hash = { id: neighbor[:j_id] }
|
28
33
|
hash[:weight] = 1
|
29
34
|
hash
|
30
35
|
end
|
31
36
|
end
|
32
|
-
SpatialStats::Weights::WeightsMatrix.new(
|
37
|
+
SpatialStats::Weights::WeightsMatrix.new(weights)
|
33
38
|
end
|
34
39
|
|
35
40
|
##
|
@@ -40,21 +45,26 @@ module SpatialStats
|
|
40
45
|
#
|
41
46
|
# @return [WeightsMatrix]
|
42
47
|
def self.queen(scope, field)
|
43
|
-
p_key = scope.primary_key
|
44
|
-
keys = scope.pluck(p_key).sort
|
45
|
-
|
46
48
|
neighbors = SpatialStats::Queries::Weights
|
47
49
|
.queen_contiguity_neighbors(scope, field)
|
48
50
|
|
51
|
+
# get keys to make sure we have consistent dimensions when
|
52
|
+
# some entries don't have neighbors.
|
53
|
+
# define a new hash that has all the keys from scope
|
54
|
+
keys = SpatialStats::Queries::Variables.query_field(scope, scope.klass.primary_key)
|
55
|
+
|
49
56
|
neighbors = neighbors.group_by(&:i_id)
|
57
|
+
missing_neighbors = Hash[(keys - neighbors.keys).map { |key| [key, []] }]
|
58
|
+
neighbors = neighbors.merge(missing_neighbors)
|
59
|
+
|
50
60
|
weights = neighbors.transform_values do |value|
|
51
61
|
value.map do |neighbor|
|
52
|
-
hash =
|
62
|
+
hash = { id: neighbor[:j_id] }
|
53
63
|
hash[:weight] = 1
|
54
64
|
hash
|
55
65
|
end
|
56
66
|
end
|
57
|
-
SpatialStats::Weights::WeightsMatrix.new(
|
67
|
+
SpatialStats::Weights::WeightsMatrix.new(weights)
|
58
68
|
end
|
59
69
|
end
|
60
70
|
end
|