spatial_stats 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +185 -9
- data/lib/spatial_stats.rb +7 -4
- data/lib/spatial_stats/enumerable_ext.rb +29 -0
- data/lib/spatial_stats/global.rb +15 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +48 -4
- data/lib/spatial_stats/global/moran.rb +69 -19
- data/lib/spatial_stats/global/stat.rb +29 -17
- data/lib/spatial_stats/local.rb +16 -1
- data/lib/spatial_stats/local/bivariate_moran.rb +45 -4
- data/lib/spatial_stats/local/geary.rb +34 -47
- data/lib/spatial_stats/local/getis_ord.rb +109 -0
- data/lib/spatial_stats/local/moran.rb +55 -22
- data/lib/spatial_stats/local/multivariate_geary.rb +77 -22
- data/lib/spatial_stats/local/stat.rb +160 -88
- data/lib/spatial_stats/narray_ext.rb +27 -0
- data/lib/spatial_stats/queries.rb +6 -0
- data/lib/spatial_stats/queries/variables.rb +16 -3
- data/lib/spatial_stats/queries/weights.rb +91 -9
- data/lib/spatial_stats/utils.rb +7 -0
- data/lib/spatial_stats/utils/lag.rb +34 -2
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights.rb +9 -0
- data/lib/spatial_stats/weights/contiguous.rb +18 -0
- data/lib/spatial_stats/weights/distant.rb +41 -4
- data/lib/spatial_stats/weights/weights_matrix.rb +25 -0
- metadata +5 -4
- data/lib/spatial_stats/local/g.rb +0 -75
@@ -2,7 +2,24 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# MultivariateGeary works like univariate Geary, except that it takes
|
7
|
+
# an array of data fields, rather than one data field. It measures the
|
8
|
+
# extent to which the average distance in attribute space between
|
9
|
+
# values and its neighbors compared to what they would be under spatial
|
10
|
+
# randomness.
|
11
|
+
#
|
12
|
+
# Functionally, C is computed by averaging the C values for each attribute
|
13
|
+
# at a certain location, under a univariate context.
|
5
14
|
class MultivariateGeary < Stat
|
15
|
+
##
|
16
|
+
# A new instance of Moran
|
17
|
+
#
|
18
|
+
# @param [ActiveRecord::Relation] scope
|
19
|
+
# @param [Symbol, String] fields to query from scope
|
20
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
21
|
+
#
|
22
|
+
# @return [MultivariateGeary]
|
6
23
|
def initialize(scope, fields, weights)
|
7
24
|
@scope = scope
|
8
25
|
@fields = fields
|
@@ -10,14 +27,33 @@ module SpatialStats
|
|
10
27
|
end
|
11
28
|
attr_accessor :scope, :fields, :weights
|
12
29
|
|
13
|
-
|
30
|
+
##
|
31
|
+
# Computes the stat for MultivariateGeary.
|
32
|
+
#
|
33
|
+
# @see https://geodacenter.github.io/workbook/6b_local_adv/lab6b.html#concept-5
|
34
|
+
#
|
35
|
+
# @return [Array] of C values for each observation.
|
36
|
+
def stat
|
14
37
|
m = fields.size
|
15
38
|
gearys = fields.map do |field|
|
16
|
-
Geary.new(scope, field, weights).
|
39
|
+
Geary.new(scope, field, weights).stat
|
17
40
|
end
|
18
41
|
gearys.transpose.map { |x| x.reduce(:+) / m }
|
19
42
|
end
|
43
|
+
alias c stat
|
20
44
|
|
45
|
+
##
|
46
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
47
|
+
# Shuffles all tuples, recomputes +#stat+ for each variation, then compares
|
48
|
+
# to the computed one. The ratio of more extreme values to
|
49
|
+
# permutations is returned for each observation.
|
50
|
+
#
|
51
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
52
|
+
#
|
53
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
54
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
55
|
+
#
|
56
|
+
# @return [Array] of p-values
|
21
57
|
def mc(permutations = 99, seed = nil)
|
22
58
|
# in this case, one tuple of vals is held constant, then
|
23
59
|
# the rest are shuffled, so for crand we will pass in an arr
|
@@ -28,26 +64,26 @@ module SpatialStats
|
|
28
64
|
indices = (0..(n - 1)).to_a
|
29
65
|
shuffles = crand(indices, permutations, rng)
|
30
66
|
|
31
|
-
|
32
|
-
rs = [0] *
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
67
|
+
stat_orig = stat
|
68
|
+
rs = [0] * n
|
69
|
+
|
70
|
+
ws = neighbor_weights
|
71
|
+
|
72
|
+
idx = 0
|
73
|
+
while idx < n
|
74
|
+
stat_i_orig = stat_orig[idx]
|
75
|
+
wi = Numo::DFloat.cast(ws[idx])
|
76
|
+
|
77
|
+
# for each field, compute the C value at that index.
|
78
|
+
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
79
|
+
|
80
|
+
rs[idx] = if stat_i_orig.positive?
|
81
|
+
(stat_i_new >= stat_i_orig).count
|
82
|
+
else
|
83
|
+
(stat_i_new <= stat_i_orig).count
|
84
|
+
end
|
85
|
+
|
86
|
+
idx += 1
|
51
87
|
end
|
52
88
|
|
53
89
|
rs.map do |ri|
|
@@ -57,12 +93,31 @@ module SpatialStats
|
|
57
93
|
|
58
94
|
private
|
59
95
|
|
96
|
+
def mc_i(wi, perms, idx)
|
97
|
+
m = fields.size
|
98
|
+
permutations = perms.shape[0]
|
99
|
+
|
100
|
+
cs = Numo::DFloat.zeros(m, permutations)
|
101
|
+
(0..m - 1).each do |mi|
|
102
|
+
z = field_data[mi]
|
103
|
+
zs = matrix_field_data[mi, true][perms]
|
104
|
+
c = (z[idx] - zs)**2
|
105
|
+
|
106
|
+
cs[mi, true] = (wi * c).sum(1)
|
107
|
+
end
|
108
|
+
cs.mean(0)
|
109
|
+
end
|
110
|
+
|
60
111
|
def field_data
|
61
112
|
@field_data ||= fields.map do |field|
|
62
113
|
SpatialStats::Queries::Variables.query_field(@scope, field)
|
63
114
|
.standardize
|
64
115
|
end
|
65
116
|
end
|
117
|
+
|
118
|
+
def matrix_field_data
|
119
|
+
@matrix_field_data ||= Numo::DFloat.cast(field_data)
|
120
|
+
end
|
66
121
|
end
|
67
122
|
end
|
68
123
|
end
|
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# Stat is the abstract base class for local stats.
|
7
|
+
# It defines the methods that are common between all classes
|
8
|
+
# and will raise a NotImplementedError on those that are specific
|
9
|
+
# for each type of statistic.
|
5
10
|
class Stat
|
6
11
|
# Base class for local stats
|
7
12
|
def initialize(scope, field, weights)
|
@@ -11,12 +16,8 @@ module SpatialStats
|
|
11
16
|
end
|
12
17
|
attr_accessor :scope, :field, :weights
|
13
18
|
|
14
|
-
def
|
15
|
-
raise NotImplementedError, 'method
|
16
|
-
end
|
17
|
-
|
18
|
-
def i_i(_idx)
|
19
|
-
raise NotImplementedError, 'method i_i not defined'
|
19
|
+
def stat
|
20
|
+
raise NotImplementedError, 'method stat not defined'
|
20
21
|
end
|
21
22
|
|
22
23
|
def expectation
|
@@ -27,47 +28,83 @@ module SpatialStats
|
|
27
28
|
raise NotImplementedError, 'method variance not implemented'
|
28
29
|
end
|
29
30
|
|
31
|
+
##
|
32
|
+
# Z-score for each observation of the statistic.
|
33
|
+
#
|
34
|
+
# @return [Array] of the number of deviations from the mean
|
30
35
|
def z_score
|
31
|
-
numerators =
|
36
|
+
numerators = stat.map { |v| v - expectation }
|
32
37
|
denominators = variance.map { |v| Math.sqrt(v) }
|
33
38
|
numerators.each_with_index.map do |numerator, idx|
|
34
39
|
numerator / denominators[idx]
|
35
40
|
end
|
36
41
|
end
|
37
42
|
|
43
|
+
##
|
44
|
+
# Conditional randomization algorithm used in permutation testing.
|
45
|
+
# Outputs an array of length n of Numo::DFloat matrices of
|
46
|
+
# size m x num_neighbors. Where m is the number of permutations and
|
47
|
+
# num_neighbors is the number of neighbors for that observation.
|
48
|
+
#
|
49
|
+
# The values are randomly permutated values from arr that will act
|
50
|
+
# as its neighbors for that permutation.
|
51
|
+
#
|
52
|
+
# This is super important because most weight matrices are very
|
53
|
+
# sparse so the amount of shuffling/multiplication that is done
|
54
|
+
# is reduced drastically.
|
55
|
+
#
|
56
|
+
# @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
|
57
|
+
#
|
58
|
+
# @return [Array] of Numo::Narray matrices
|
59
|
+
#
|
38
60
|
def crand(arr, permutations, rng)
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
61
|
+
# basing this off the ESDA method
|
62
|
+
# need to get k for max_neighbors
|
63
|
+
# and wc for cardinalities of each item
|
64
|
+
# this returns an array of length n with
|
65
|
+
# (permutations x neighborz) Numo Arrays.
|
66
|
+
# This helps reduce computation time because
|
67
|
+
# we are only dealing with neighbors for each
|
68
|
+
# entry not the entire list of permutations for each entry.
|
69
|
+
n_1 = weights.n - 1
|
70
|
+
|
71
|
+
# weight counts
|
72
|
+
wc = [0] * weights.n
|
73
|
+
k = 0
|
74
|
+
(0..n_1).each do |idx|
|
75
|
+
wc[idx] = (w[idx, true] > 0).count
|
50
76
|
end
|
51
|
-
end
|
52
77
|
|
53
|
-
|
54
|
-
|
55
|
-
|
78
|
+
k = wc.max + 1
|
79
|
+
prange = (0..permutations - 1).to_a
|
80
|
+
|
81
|
+
arr = Numo::DFloat.cast(arr)
|
56
82
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# end
|
61
|
-
# p rids
|
83
|
+
ids = (0..n_1).to_a
|
84
|
+
ids_perm = (0..n_1 - 1).to_a
|
85
|
+
rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
|
62
86
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
87
|
+
(0..n_1).map do |idx|
|
88
|
+
idsi = ids.dup
|
89
|
+
idsi.delete_at(idx)
|
90
|
+
idsi.shuffle!(random: rng)
|
91
|
+
idsi = Numo::Int32.cast(idsi)
|
92
|
+
arr[idsi[rids[true, 0..wc[idx] - 1]]]
|
93
|
+
end
|
94
|
+
end
|
70
95
|
|
96
|
+
##
|
97
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
98
|
+
# Shuffles x values, recomputes +#stat+ for each variation, then compares
|
99
|
+
# to the computed one. The ratio of more extreme values to
|
100
|
+
# permutations is returned for each observation.
|
101
|
+
#
|
102
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
103
|
+
#
|
104
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
105
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
106
|
+
#
|
107
|
+
# @return [Array] of p-values
|
71
108
|
def mc(permutations = 99, seed = nil)
|
72
109
|
# For local tests, we need to shuffle the values
|
73
110
|
# but for each item, hold its value in place and shuffle
|
@@ -75,44 +112,27 @@ module SpatialStats
|
|
75
112
|
# of the entire set. This will be done for each item.
|
76
113
|
rng = gen_rng(seed)
|
77
114
|
shuffles = crand(x, permutations, rng)
|
78
|
-
|
115
|
+
n = weights.n
|
79
116
|
# r is the number of equal to or more extreme samples
|
80
|
-
|
81
|
-
rs = [0] *
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
z_lag = lagged.dup
|
100
|
-
z_lag[idx] = lagged_var
|
101
|
-
stat.z_lag = z_lag
|
102
|
-
|
103
|
-
ii_new = stat.i_i(idx)
|
104
|
-
|
105
|
-
# https://geodacenter.github.io/glossary.html#ppvalue
|
106
|
-
# NOTE: this is inconsistent with the output from GeoDa
|
107
|
-
# for local permutation tests, they seem to use greater than
|
108
|
-
# not greater than or equal to. I'm going to go by the definition
|
109
|
-
# in the glossary for now.
|
110
|
-
if ii_orig.positive?
|
111
|
-
rs[idx] += 1 if ii_new >= ii_orig
|
112
|
-
else
|
113
|
-
rs[idx] += 1 if ii_new <= ii_orig
|
114
|
-
end
|
115
|
-
end
|
117
|
+
stat_orig = stat
|
118
|
+
rs = [0] * n
|
119
|
+
|
120
|
+
ws = neighbor_weights
|
121
|
+
|
122
|
+
idx = 0
|
123
|
+
while idx < n
|
124
|
+
stat_i_orig = stat_orig[idx]
|
125
|
+
|
126
|
+
wi = Numo::DFloat.cast(ws[idx])
|
127
|
+
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
128
|
+
|
129
|
+
rs[idx] = if stat_i_orig.positive?
|
130
|
+
(stat_i_new >= stat_i_orig).count
|
131
|
+
else
|
132
|
+
(stat_i_new <= stat_i_orig).count
|
133
|
+
end
|
134
|
+
|
135
|
+
idx += 1
|
116
136
|
end
|
117
137
|
|
118
138
|
rs.map do |ri|
|
@@ -120,27 +140,41 @@ module SpatialStats
|
|
120
140
|
end
|
121
141
|
end
|
122
142
|
|
143
|
+
##
|
144
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
145
|
+
# Shuffles y values, hold x values, recomputes +#stat+ for each variation,
|
146
|
+
# then compares to the computed one. The ratio of more extreme values to
|
147
|
+
# permutations is returned for each observation.
|
148
|
+
#
|
149
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
150
|
+
#
|
151
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
152
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
153
|
+
#
|
154
|
+
# @return [Array] of p-values
|
123
155
|
def mc_bv(permutations, seed)
|
124
156
|
rng = gen_rng(seed)
|
125
157
|
shuffles = crand(y, permutations, rng)
|
158
|
+
n = weights.n
|
126
159
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
160
|
+
stat_orig = stat
|
161
|
+
rs = [0] * n
|
162
|
+
|
163
|
+
ws = neighbor_weights
|
164
|
+
|
165
|
+
idx = 0
|
166
|
+
while idx < n
|
167
|
+
stat_i_orig = stat_orig[idx]
|
168
|
+
wi = Numo::DFloat.cast(ws[idx])
|
169
|
+
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
170
|
+
|
171
|
+
rs[idx] = if stat_i_orig.positive?
|
172
|
+
(stat_i_new >= stat_i_orig).count
|
173
|
+
else
|
174
|
+
(stat_i_new <= stat_i_orig).count
|
175
|
+
end
|
176
|
+
|
177
|
+
idx += 1
|
144
178
|
end
|
145
179
|
|
146
180
|
rs.map do |ri|
|
@@ -148,6 +182,22 @@ module SpatialStats
|
|
148
182
|
end
|
149
183
|
end
|
150
184
|
|
185
|
+
##
|
186
|
+
# Determines what quadrant an observation is in. Based on its value
|
187
|
+
# compared to its neighbors. This does not work for all stats, since
|
188
|
+
# it requires that values be negative.
|
189
|
+
#
|
190
|
+
# In a standardized array of z, high values are values greater than 0
|
191
|
+
# and it's neighbors are determined by the spatial lag and if that is
|
192
|
+
# positive then it's neighbors would be high, low otherwise.
|
193
|
+
#
|
194
|
+
# Quadrants are:
|
195
|
+
# [HH] a high value surrounded by other high values
|
196
|
+
# [LH] a low value surrounded by high values
|
197
|
+
# [LL] a low value surrounded by low values
|
198
|
+
# [HL] a high value surrounded by low values
|
199
|
+
#
|
200
|
+
# @return [Array] of labels
|
151
201
|
def quads
|
152
202
|
# https://github.com/pysal/esda/blob/master/esda/moran.py#L925
|
153
203
|
w = @weights.full
|
@@ -173,6 +223,14 @@ module SpatialStats
|
|
173
223
|
|
174
224
|
private
|
175
225
|
|
226
|
+
def stat_i
|
227
|
+
raise NotImplementedError, 'method stat_i not defined'
|
228
|
+
end
|
229
|
+
|
230
|
+
def mc_i
|
231
|
+
raise NotImplementedError, 'method mc_i not defined'
|
232
|
+
end
|
233
|
+
|
176
234
|
def w
|
177
235
|
weights.standardized
|
178
236
|
end
|
@@ -184,6 +242,20 @@ module SpatialStats
|
|
184
242
|
Random.new
|
185
243
|
end
|
186
244
|
end
|
245
|
+
|
246
|
+
def neighbor_weights
|
247
|
+
# record the non-zero weights in variable length arrays for each
|
248
|
+
# row in the weights table
|
249
|
+
ws = [[]] * weights.n
|
250
|
+
(0..weights.n - 1).each do |idx|
|
251
|
+
neighbors = []
|
252
|
+
w[idx, true].each do |wij|
|
253
|
+
neighbors << wij if wij != 0
|
254
|
+
end
|
255
|
+
ws[idx] = neighbors
|
256
|
+
end
|
257
|
+
ws
|
258
|
+
end
|
187
259
|
end
|
188
260
|
end
|
189
261
|
end
|