spatial_stats 0.1.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +185 -9
- data/lib/spatial_stats.rb +7 -4
- data/lib/spatial_stats/enumerable_ext.rb +29 -0
- data/lib/spatial_stats/global.rb +15 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +48 -4
- data/lib/spatial_stats/global/moran.rb +69 -19
- data/lib/spatial_stats/global/stat.rb +29 -17
- data/lib/spatial_stats/local.rb +16 -1
- data/lib/spatial_stats/local/bivariate_moran.rb +45 -4
- data/lib/spatial_stats/local/geary.rb +34 -47
- data/lib/spatial_stats/local/getis_ord.rb +109 -0
- data/lib/spatial_stats/local/moran.rb +55 -22
- data/lib/spatial_stats/local/multivariate_geary.rb +77 -22
- data/lib/spatial_stats/local/stat.rb +160 -88
- data/lib/spatial_stats/narray_ext.rb +27 -0
- data/lib/spatial_stats/queries.rb +6 -0
- data/lib/spatial_stats/queries/variables.rb +16 -3
- data/lib/spatial_stats/queries/weights.rb +91 -9
- data/lib/spatial_stats/utils.rb +7 -0
- data/lib/spatial_stats/utils/lag.rb +34 -2
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights.rb +9 -0
- data/lib/spatial_stats/weights/contiguous.rb +18 -0
- data/lib/spatial_stats/weights/distant.rb +41 -4
- data/lib/spatial_stats/weights/weights_matrix.rb +25 -0
- metadata +5 -4
- data/lib/spatial_stats/local/g.rb +0 -75
@@ -2,7 +2,24 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# MultivariateGeary works like univariate Geary, except that it takes
|
7
|
+
# an array of data fields, rather than one data field. It measures the
|
8
|
+
# extent to which the average distance in attribute space between
|
9
|
+
# values and its neighbors compared to what they would be under spatial
|
10
|
+
# randomness.
|
11
|
+
#
|
12
|
+
# Functionally, C is computed by averaging the C values for each attribute
|
13
|
+
# at a certain location, under a univariate context.
|
5
14
|
class MultivariateGeary < Stat
|
15
|
+
##
|
16
|
+
# A new instance of Moran
|
17
|
+
#
|
18
|
+
# @param [ActiveRecord::Relation] scope
|
19
|
+
# @param [Symbol, String] fields to query from scope
|
20
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
21
|
+
#
|
22
|
+
# @return [MultivariateGeary]
|
6
23
|
def initialize(scope, fields, weights)
|
7
24
|
@scope = scope
|
8
25
|
@fields = fields
|
@@ -10,14 +27,33 @@ module SpatialStats
|
|
10
27
|
end
|
11
28
|
attr_accessor :scope, :fields, :weights
|
12
29
|
|
13
|
-
|
30
|
+
##
|
31
|
+
# Computes the stat for MultivariateGeary.
|
32
|
+
#
|
33
|
+
# @see https://geodacenter.github.io/workbook/6b_local_adv/lab6b.html#concept-5
|
34
|
+
#
|
35
|
+
# @return [Array] of C values for each observation.
|
36
|
+
def stat
|
14
37
|
m = fields.size
|
15
38
|
gearys = fields.map do |field|
|
16
|
-
Geary.new(scope, field, weights).
|
39
|
+
Geary.new(scope, field, weights).stat
|
17
40
|
end
|
18
41
|
gearys.transpose.map { |x| x.reduce(:+) / m }
|
19
42
|
end
|
43
|
+
alias c stat
|
20
44
|
|
45
|
+
##
|
46
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
47
|
+
# Shuffles all tuples, recomputes +#stat+ for each variation, then compares
|
48
|
+
# to the computed one. The ratio of more extreme values to
|
49
|
+
# permutations is returned for each observation.
|
50
|
+
#
|
51
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
52
|
+
#
|
53
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
54
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
55
|
+
#
|
56
|
+
# @return [Array] of p-values
|
21
57
|
def mc(permutations = 99, seed = nil)
|
22
58
|
# in this case, one tuple of vals is held constant, then
|
23
59
|
# the rest are shuffled, so for crand we will pass in an arr
|
@@ -28,26 +64,26 @@ module SpatialStats
|
|
28
64
|
indices = (0..(n - 1)).to_a
|
29
65
|
shuffles = crand(indices, permutations, rng)
|
30
66
|
|
31
|
-
|
32
|
-
rs = [0] *
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
67
|
+
stat_orig = stat
|
68
|
+
rs = [0] * n
|
69
|
+
|
70
|
+
ws = neighbor_weights
|
71
|
+
|
72
|
+
idx = 0
|
73
|
+
while idx < n
|
74
|
+
stat_i_orig = stat_orig[idx]
|
75
|
+
wi = Numo::DFloat.cast(ws[idx])
|
76
|
+
|
77
|
+
# for each field, compute the C value at that index.
|
78
|
+
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
79
|
+
|
80
|
+
rs[idx] = if stat_i_orig.positive?
|
81
|
+
(stat_i_new >= stat_i_orig).count
|
82
|
+
else
|
83
|
+
(stat_i_new <= stat_i_orig).count
|
84
|
+
end
|
85
|
+
|
86
|
+
idx += 1
|
51
87
|
end
|
52
88
|
|
53
89
|
rs.map do |ri|
|
@@ -57,12 +93,31 @@ module SpatialStats
|
|
57
93
|
|
58
94
|
private
|
59
95
|
|
96
|
+
def mc_i(wi, perms, idx)
|
97
|
+
m = fields.size
|
98
|
+
permutations = perms.shape[0]
|
99
|
+
|
100
|
+
cs = Numo::DFloat.zeros(m, permutations)
|
101
|
+
(0..m - 1).each do |mi|
|
102
|
+
z = field_data[mi]
|
103
|
+
zs = matrix_field_data[mi, true][perms]
|
104
|
+
c = (z[idx] - zs)**2
|
105
|
+
|
106
|
+
cs[mi, true] = (wi * c).sum(1)
|
107
|
+
end
|
108
|
+
cs.mean(0)
|
109
|
+
end
|
110
|
+
|
60
111
|
def field_data
|
61
112
|
@field_data ||= fields.map do |field|
|
62
113
|
SpatialStats::Queries::Variables.query_field(@scope, field)
|
63
114
|
.standardize
|
64
115
|
end
|
65
116
|
end
|
117
|
+
|
118
|
+
def matrix_field_data
|
119
|
+
@matrix_field_data ||= Numo::DFloat.cast(field_data)
|
120
|
+
end
|
66
121
|
end
|
67
122
|
end
|
68
123
|
end
|
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# Stat is the abstract base class for local stats.
|
7
|
+
# It defines the methods that are common between all classes
|
8
|
+
# and will raise a NotImplementedError on those that are specific
|
9
|
+
# for each type of statistic.
|
5
10
|
class Stat
|
6
11
|
# Base class for local stats
|
7
12
|
def initialize(scope, field, weights)
|
@@ -11,12 +16,8 @@ module SpatialStats
|
|
11
16
|
end
|
12
17
|
attr_accessor :scope, :field, :weights
|
13
18
|
|
14
|
-
def
|
15
|
-
raise NotImplementedError, 'method
|
16
|
-
end
|
17
|
-
|
18
|
-
def i_i(_idx)
|
19
|
-
raise NotImplementedError, 'method i_i not defined'
|
19
|
+
def stat
|
20
|
+
raise NotImplementedError, 'method stat not defined'
|
20
21
|
end
|
21
22
|
|
22
23
|
def expectation
|
@@ -27,47 +28,83 @@ module SpatialStats
|
|
27
28
|
raise NotImplementedError, 'method variance not implemented'
|
28
29
|
end
|
29
30
|
|
31
|
+
##
|
32
|
+
# Z-score for each observation of the statistic.
|
33
|
+
#
|
34
|
+
# @return [Array] of the number of deviations from the mean
|
30
35
|
def z_score
|
31
|
-
numerators =
|
36
|
+
numerators = stat.map { |v| v - expectation }
|
32
37
|
denominators = variance.map { |v| Math.sqrt(v) }
|
33
38
|
numerators.each_with_index.map do |numerator, idx|
|
34
39
|
numerator / denominators[idx]
|
35
40
|
end
|
36
41
|
end
|
37
42
|
|
43
|
+
##
|
44
|
+
# Conditional randomization algorithm used in permutation testing.
|
45
|
+
# Outputs an array of length n of Numo::DFloat matrices of
|
46
|
+
# size m x num_neighbors. Where m is the number of permutations and
|
47
|
+
# num_neighbors is the number of neighbors for that observation.
|
48
|
+
#
|
49
|
+
# The values are randomly permutated values from arr that will act
|
50
|
+
# as its neighbors for that permutation.
|
51
|
+
#
|
52
|
+
# This is super important because most weight matrices are very
|
53
|
+
# sparse so the amount of shuffling/multiplication that is done
|
54
|
+
# is reduced drastically.
|
55
|
+
#
|
56
|
+
# @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
|
57
|
+
#
|
58
|
+
# @return [Array] of Numo::Narray matrices
|
59
|
+
#
|
38
60
|
def crand(arr, permutations, rng)
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
61
|
+
# basing this off the ESDA method
|
62
|
+
# need to get k for max_neighbors
|
63
|
+
# and wc for cardinalities of each item
|
64
|
+
# this returns an array of length n with
|
65
|
+
# (permutations x neighborz) Numo Arrays.
|
66
|
+
# This helps reduce computation time because
|
67
|
+
# we are only dealing with neighbors for each
|
68
|
+
# entry not the entire list of permutations for each entry.
|
69
|
+
n_1 = weights.n - 1
|
70
|
+
|
71
|
+
# weight counts
|
72
|
+
wc = [0] * weights.n
|
73
|
+
k = 0
|
74
|
+
(0..n_1).each do |idx|
|
75
|
+
wc[idx] = (w[idx, true] > 0).count
|
50
76
|
end
|
51
|
-
end
|
52
77
|
|
53
|
-
|
54
|
-
|
55
|
-
|
78
|
+
k = wc.max + 1
|
79
|
+
prange = (0..permutations - 1).to_a
|
80
|
+
|
81
|
+
arr = Numo::DFloat.cast(arr)
|
56
82
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# end
|
61
|
-
# p rids
|
83
|
+
ids = (0..n_1).to_a
|
84
|
+
ids_perm = (0..n_1 - 1).to_a
|
85
|
+
rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
|
62
86
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
87
|
+
(0..n_1).map do |idx|
|
88
|
+
idsi = ids.dup
|
89
|
+
idsi.delete_at(idx)
|
90
|
+
idsi.shuffle!(random: rng)
|
91
|
+
idsi = Numo::Int32.cast(idsi)
|
92
|
+
arr[idsi[rids[true, 0..wc[idx] - 1]]]
|
93
|
+
end
|
94
|
+
end
|
70
95
|
|
96
|
+
##
|
97
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
98
|
+
# Shuffles x values, recomputes +#stat+ for each variation, then compares
|
99
|
+
# to the computed one. The ratio of more extreme values to
|
100
|
+
# permutations is returned for each observation.
|
101
|
+
#
|
102
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
103
|
+
#
|
104
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
105
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
106
|
+
#
|
107
|
+
# @return [Array] of p-values
|
71
108
|
def mc(permutations = 99, seed = nil)
|
72
109
|
# For local tests, we need to shuffle the values
|
73
110
|
# but for each item, hold its value in place and shuffle
|
@@ -75,44 +112,27 @@ module SpatialStats
|
|
75
112
|
# of the entire set. This will be done for each item.
|
76
113
|
rng = gen_rng(seed)
|
77
114
|
shuffles = crand(x, permutations, rng)
|
78
|
-
|
115
|
+
n = weights.n
|
79
116
|
# r is the number of equal to or more extreme samples
|
80
|
-
|
81
|
-
rs = [0] *
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
z_lag = lagged.dup
|
100
|
-
z_lag[idx] = lagged_var
|
101
|
-
stat.z_lag = z_lag
|
102
|
-
|
103
|
-
ii_new = stat.i_i(idx)
|
104
|
-
|
105
|
-
# https://geodacenter.github.io/glossary.html#ppvalue
|
106
|
-
# NOTE: this is inconsistent with the output from GeoDa
|
107
|
-
# for local permutation tests, they seem to use greater than
|
108
|
-
# not greater than or equal to. I'm going to go by the definition
|
109
|
-
# in the glossary for now.
|
110
|
-
if ii_orig.positive?
|
111
|
-
rs[idx] += 1 if ii_new >= ii_orig
|
112
|
-
else
|
113
|
-
rs[idx] += 1 if ii_new <= ii_orig
|
114
|
-
end
|
115
|
-
end
|
117
|
+
stat_orig = stat
|
118
|
+
rs = [0] * n
|
119
|
+
|
120
|
+
ws = neighbor_weights
|
121
|
+
|
122
|
+
idx = 0
|
123
|
+
while idx < n
|
124
|
+
stat_i_orig = stat_orig[idx]
|
125
|
+
|
126
|
+
wi = Numo::DFloat.cast(ws[idx])
|
127
|
+
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
128
|
+
|
129
|
+
rs[idx] = if stat_i_orig.positive?
|
130
|
+
(stat_i_new >= stat_i_orig).count
|
131
|
+
else
|
132
|
+
(stat_i_new <= stat_i_orig).count
|
133
|
+
end
|
134
|
+
|
135
|
+
idx += 1
|
116
136
|
end
|
117
137
|
|
118
138
|
rs.map do |ri|
|
@@ -120,27 +140,41 @@ module SpatialStats
|
|
120
140
|
end
|
121
141
|
end
|
122
142
|
|
143
|
+
##
|
144
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
145
|
+
# Shuffles y values, hold x values, recomputes +#stat+ for each variation,
|
146
|
+
# then compares to the computed one. The ratio of more extreme values to
|
147
|
+
# permutations is returned for each observation.
|
148
|
+
#
|
149
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
150
|
+
#
|
151
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
152
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
153
|
+
#
|
154
|
+
# @return [Array] of p-values
|
123
155
|
def mc_bv(permutations, seed)
|
124
156
|
rng = gen_rng(seed)
|
125
157
|
shuffles = crand(y, permutations, rng)
|
158
|
+
n = weights.n
|
126
159
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
160
|
+
stat_orig = stat
|
161
|
+
rs = [0] * n
|
162
|
+
|
163
|
+
ws = neighbor_weights
|
164
|
+
|
165
|
+
idx = 0
|
166
|
+
while idx < n
|
167
|
+
stat_i_orig = stat_orig[idx]
|
168
|
+
wi = Numo::DFloat.cast(ws[idx])
|
169
|
+
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
170
|
+
|
171
|
+
rs[idx] = if stat_i_orig.positive?
|
172
|
+
(stat_i_new >= stat_i_orig).count
|
173
|
+
else
|
174
|
+
(stat_i_new <= stat_i_orig).count
|
175
|
+
end
|
176
|
+
|
177
|
+
idx += 1
|
144
178
|
end
|
145
179
|
|
146
180
|
rs.map do |ri|
|
@@ -148,6 +182,22 @@ module SpatialStats
|
|
148
182
|
end
|
149
183
|
end
|
150
184
|
|
185
|
+
##
|
186
|
+
# Determines what quadrant an observation is in. Based on its value
|
187
|
+
# compared to its neighbors. This does not work for all stats, since
|
188
|
+
# it requires that values be negative.
|
189
|
+
#
|
190
|
+
# In a standardized array of z, high values are values greater than 0
|
191
|
+
# and it's neighbors are determined by the spatial lag and if that is
|
192
|
+
# positive then it's neighbors would be high, low otherwise.
|
193
|
+
#
|
194
|
+
# Quadrants are:
|
195
|
+
# [HH] a high value surrounded by other high values
|
196
|
+
# [LH] a low value surrounded by high values
|
197
|
+
# [LL] a low value surrounded by low values
|
198
|
+
# [HL] a high value surrounded by low values
|
199
|
+
#
|
200
|
+
# @return [Array] of labels
|
151
201
|
def quads
|
152
202
|
# https://github.com/pysal/esda/blob/master/esda/moran.py#L925
|
153
203
|
w = @weights.full
|
@@ -173,6 +223,14 @@ module SpatialStats
|
|
173
223
|
|
174
224
|
private
|
175
225
|
|
226
|
+
def stat_i
|
227
|
+
raise NotImplementedError, 'method stat_i not defined'
|
228
|
+
end
|
229
|
+
|
230
|
+
def mc_i
|
231
|
+
raise NotImplementedError, 'method mc_i not defined'
|
232
|
+
end
|
233
|
+
|
176
234
|
def w
|
177
235
|
weights.standardized
|
178
236
|
end
|
@@ -184,6 +242,20 @@ module SpatialStats
|
|
184
242
|
Random.new
|
185
243
|
end
|
186
244
|
end
|
245
|
+
|
246
|
+
def neighbor_weights
|
247
|
+
# record the non-zero weights in variable length arrays for each
|
248
|
+
# row in the weights table
|
249
|
+
ws = [[]] * weights.n
|
250
|
+
(0..weights.n - 1).each do |idx|
|
251
|
+
neighbors = []
|
252
|
+
w[idx, true].each do |wij|
|
253
|
+
neighbors << wij if wij != 0
|
254
|
+
end
|
255
|
+
ws[idx] = neighbors
|
256
|
+
end
|
257
|
+
ws
|
258
|
+
end
|
187
259
|
end
|
188
260
|
end
|
189
261
|
end
|