spatial_stats 0.2.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +126 -55
- data/Rakefile +7 -0
- data/ext/spatial_stats/csr_matrix.c +380 -0
- data/ext/spatial_stats/csr_matrix.h +34 -0
- data/ext/spatial_stats/extconf.rb +6 -0
- data/ext/spatial_stats/spatial_stats.c +32 -0
- data/lib/spatial_stats.rb +1 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
- data/lib/spatial_stats/global/moran.rb +43 -36
- data/lib/spatial_stats/global/stat.rb +55 -27
- data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
- data/lib/spatial_stats/local/geary.rb +35 -5
- data/lib/spatial_stats/local/getis_ord.rb +45 -17
- data/lib/spatial_stats/local/moran.rb +39 -9
- data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
- data/lib/spatial_stats/local/stat.rb +112 -80
- data/lib/spatial_stats/narray_ext.rb +5 -5
- data/lib/spatial_stats/spatial_stats.so +0 -0
- data/lib/spatial_stats/utils.rb +25 -0
- data/lib/spatial_stats/utils/lag.rb +10 -10
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights/contiguous.rb +20 -10
- data/lib/spatial_stats/weights/distant.rb +38 -20
- data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
- metadata +33 -11
- data/MIT-LICENSE +0 -20
@@ -19,10 +19,28 @@ module SpatialStats
|
|
19
19
|
@scope = scope
|
20
20
|
@x_field = x_field
|
21
21
|
@y_field = y_field
|
22
|
-
@weights = weights
|
22
|
+
@weights = weights.standardize
|
23
23
|
end
|
24
24
|
attr_accessor :scope, :x_field, :y_field, :weights
|
25
25
|
|
26
|
+
##
|
27
|
+
# A new instance of BivariateMoran, from vector and weights.
|
28
|
+
#
|
29
|
+
# @param [Array] x observations of dataset
|
30
|
+
# @param [Array] y observations of dataset
|
31
|
+
# @param [WeightsMatrix] weights to define relationships between observations
|
32
|
+
#
|
33
|
+
# @return [BivariateMoran]
|
34
|
+
def self.from_observations(x, y, weights)
|
35
|
+
n = weights.n
|
36
|
+
raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
|
37
|
+
|
38
|
+
instance = new(nil, nil, nil, weights.standardize)
|
39
|
+
instance.x = x
|
40
|
+
instance.y = y
|
41
|
+
instance
|
42
|
+
end
|
43
|
+
|
26
44
|
##
|
27
45
|
# Computes the local indicator of spatial correlation for
|
28
46
|
# x against lagged y.
|
@@ -62,6 +80,61 @@ module SpatialStats
|
|
62
80
|
mc_bv(permutations, seed)
|
63
81
|
end
|
64
82
|
|
83
|
+
##
|
84
|
+
# Determines what quadrant an observation is in. Based on its value
|
85
|
+
# compared to its neighbors. This does not work for all stats, since
|
86
|
+
# it requires that values be negative.
|
87
|
+
#
|
88
|
+
# In a standardized array of z, high values are values greater than 0
|
89
|
+
# and it's neighbors are determined by the spatial lag and if that is
|
90
|
+
# positive then it's neighbors would be high, low otherwise.
|
91
|
+
#
|
92
|
+
# Quadrants are:
|
93
|
+
# [HH] a high value surrounded by other high values
|
94
|
+
# [LH] a low value surrounded by high values
|
95
|
+
# [LL] a low value surrounded by low values
|
96
|
+
# [HL] a high value surrounded by low values
|
97
|
+
#
|
98
|
+
# @return [Array] of labels
|
99
|
+
def quads
|
100
|
+
# https://github.com/pysal/esda/blob/master/esda/moran.py#L925
|
101
|
+
z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, y)
|
102
|
+
zp = x.map(&:positive?)
|
103
|
+
lp = z_lag.map(&:positive?)
|
104
|
+
|
105
|
+
# hh = zp & lp
|
106
|
+
# lh = zp ^ true & lp
|
107
|
+
# ll = zp ^ true & lp ^ true
|
108
|
+
# hl = zp next to lp ^ true
|
109
|
+
hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
|
110
|
+
lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
|
111
|
+
ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
|
112
|
+
hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
|
113
|
+
|
114
|
+
# now zip lists and map them to proper terms
|
115
|
+
quad_terms = %w[HH LH LL HL]
|
116
|
+
hh.zip(lh, ll, hl).map do |feature|
|
117
|
+
quad_terms[feature.index(true)]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
alias groups quads
|
121
|
+
|
122
|
+
##
|
123
|
+
# Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
|
124
|
+
# in a hash array.
|
125
|
+
#
|
126
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
127
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
128
|
+
#
|
129
|
+
# @return [Array]
|
130
|
+
def summary(permutations = 99, seed = nil)
|
131
|
+
p_vals = mc(permutations, seed)
|
132
|
+
data = weights.keys.zip(stat, p_vals, groups)
|
133
|
+
data.map do |row|
|
134
|
+
{ key: row[0], stat: row[1], p: row[2], group: row[3] }
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
65
138
|
def x
|
66
139
|
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
|
67
140
|
.standardize
|
@@ -79,8 +152,17 @@ module SpatialStats
|
|
79
152
|
x[idx] * y_lag_i
|
80
153
|
end
|
81
154
|
|
155
|
+
def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
|
156
|
+
# Since moran can be positive or negative, go by this definition
|
157
|
+
if stat_i_orig.positive?
|
158
|
+
(stat_i_new >= stat_i_orig).count
|
159
|
+
else
|
160
|
+
(stat_i_new <= stat_i_orig).count
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
82
164
|
def y_lag
|
83
|
-
@y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(
|
165
|
+
@y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, y)
|
84
166
|
end
|
85
167
|
end
|
86
168
|
end
|
@@ -32,6 +32,25 @@ module SpatialStats
|
|
32
32
|
end
|
33
33
|
alias c stat
|
34
34
|
|
35
|
+
##
|
36
|
+
# Computes the groups each observation belongs to.
|
37
|
+
# Potential groups for Geary's C are:
|
38
|
+
# [HH] High-High
|
39
|
+
# [LL] Low-Low
|
40
|
+
# [N] Negative - Group traditionally for HL and LH, but since the difference is squared they are in the same group.
|
41
|
+
#
|
42
|
+
#
|
43
|
+
# @return [Array] groups for each observation
|
44
|
+
def groups
|
45
|
+
quads.map do |quad|
|
46
|
+
if %w[HL LH].include?(quad)
|
47
|
+
'N'
|
48
|
+
else
|
49
|
+
quad
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
35
54
|
##
|
36
55
|
# Values of the +field+ queried from the +scope+
|
37
56
|
#
|
@@ -45,9 +64,11 @@ module SpatialStats
|
|
45
64
|
private
|
46
65
|
|
47
66
|
def stat_i(idx)
|
48
|
-
|
49
|
-
|
50
|
-
|
67
|
+
# TODO: maybe don't even use stat_i
|
68
|
+
# just form all of the modified zs and then
|
69
|
+
# pass it to a loop of mulvec all implemented in c ext
|
70
|
+
zi = z.map { |val| (z[idx] - val)**2 }
|
71
|
+
weights.sparse.dot_row(zi, idx)
|
51
72
|
end
|
52
73
|
|
53
74
|
def mc_i(wi, perms, idx)
|
@@ -55,8 +76,17 @@ module SpatialStats
|
|
55
76
|
(wi * zi).sum(1)
|
56
77
|
end
|
57
78
|
|
58
|
-
def
|
59
|
-
|
79
|
+
def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
|
80
|
+
# Geary cannot be negative, so we have to use this technique from
|
81
|
+
# GeoDa to determine p values. Note I slightly modified it to be inclusive
|
82
|
+
# on both tails not just the lower tail.
|
83
|
+
# https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981 mean = stat_i_new.mean
|
84
|
+
mean = stat_i_new.mean
|
85
|
+
if stat_i_orig <= mean
|
86
|
+
(stat_i_new <= stat_i_orig).count
|
87
|
+
else
|
88
|
+
(stat_i_new >= stat_i_orig).count
|
89
|
+
end
|
60
90
|
end
|
61
91
|
end
|
62
92
|
end
|
@@ -14,13 +14,18 @@ module SpatialStats
|
|
14
14
|
# @param [ActiveRecord::Relation] scope
|
15
15
|
# @param [Symbol, String] field to query from scope
|
16
16
|
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
17
|
+
# @param [Boolean] star to preset if star will be true or false. Will be calculated otherwise.
|
17
18
|
#
|
18
19
|
# @return [GetisOrd]
|
19
20
|
def initialize(scope, field, weights, star = nil)
|
20
|
-
|
21
|
+
@scope = scope
|
22
|
+
@field = field
|
23
|
+
@weights = weights
|
21
24
|
@star = star
|
25
|
+
calc_weights
|
22
26
|
end
|
23
27
|
attr_accessor :star
|
28
|
+
attr_writer :x
|
24
29
|
|
25
30
|
##
|
26
31
|
# Computes the G or G* statistic for every observation in x.
|
@@ -33,6 +38,25 @@ module SpatialStats
|
|
33
38
|
end
|
34
39
|
alias g stat
|
35
40
|
|
41
|
+
##
|
42
|
+
# Computes the groups each observation belongs to.
|
43
|
+
# Potential groups for G are:
|
44
|
+
# [H] High
|
45
|
+
# [L] Low
|
46
|
+
#
|
47
|
+
# Group is high when standardized z is positive, low otherwise.
|
48
|
+
#
|
49
|
+
# @return [Array] groups for each observation
|
50
|
+
def groups
|
51
|
+
z.standardize.map do |val|
|
52
|
+
if val.positive?
|
53
|
+
'H'
|
54
|
+
else
|
55
|
+
'L'
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
36
60
|
##
|
37
61
|
# Values of the +field+ queried from the +scope+
|
38
62
|
#
|
@@ -50,7 +74,7 @@ module SpatialStats
|
|
50
74
|
# @return [Boolean] of star
|
51
75
|
def star?
|
52
76
|
if @star.nil?
|
53
|
-
@star = weights.
|
77
|
+
@star = weights.dense.trace.positive?
|
54
78
|
else
|
55
79
|
@star
|
56
80
|
end
|
@@ -67,25 +91,29 @@ module SpatialStats
|
|
67
91
|
x_lag_i / denominators[idx]
|
68
92
|
end
|
69
93
|
|
70
|
-
def
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
94
|
+
def mc_observation_calc(stat_i_orig, stat_i_new, permutations)
|
95
|
+
# GetisOrd cannot be negative, so we have to use this technique from
|
96
|
+
# ESDA to determine if we should select p or 1-p.
|
97
|
+
# https://github.com/pysal/esda/blob/master/esda/getisord.py#L388
|
98
|
+
num_larger = (stat_i_new >= stat_i_orig).count
|
99
|
+
is_low = (permutations - num_larger) < num_larger
|
100
|
+
if is_low
|
101
|
+
permutations - num_larger
|
102
|
+
else
|
103
|
+
num_larger
|
77
104
|
end
|
78
105
|
end
|
79
106
|
|
107
|
+
def calc_weights
|
108
|
+
@weights = if star?
|
109
|
+
weights.window.standardize
|
110
|
+
else
|
111
|
+
weights.standardize
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
80
115
|
def z_lag
|
81
|
-
|
82
|
-
@z_lag ||= begin
|
83
|
-
if star?
|
84
|
-
SpatialStats::Utils::Lag.window_sum(w, x)
|
85
|
-
else
|
86
|
-
SpatialStats::Utils::Lag.neighbor_sum(w, x)
|
87
|
-
end
|
88
|
-
end
|
116
|
+
@z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, x)
|
89
117
|
end
|
90
118
|
alias x_lag z_lag
|
91
119
|
|
@@ -55,12 +55,11 @@ module SpatialStats
|
|
55
55
|
# @return [Array] of variances for each observation
|
56
56
|
def variance
|
57
57
|
# formula is A - B - (E[I])**2
|
58
|
-
wt = w.row_standardized
|
59
58
|
exp = expectation
|
60
59
|
|
61
60
|
vars = []
|
62
|
-
a_terms = a_calc
|
63
|
-
b_terms = b_calc
|
61
|
+
a_terms = a_calc
|
62
|
+
b_terms = b_calc
|
64
63
|
|
65
64
|
a_terms.each_with_index do |a_term, idx|
|
66
65
|
vars << (a_term - b_terms[idx] - (exp**2))
|
@@ -68,6 +67,21 @@ module SpatialStats
|
|
68
67
|
vars
|
69
68
|
end
|
70
69
|
|
70
|
+
##
|
71
|
+
# Computes the groups each observation belongs to.
|
72
|
+
# Potential groups for Moran's I are:
|
73
|
+
# [HH] High-High
|
74
|
+
# [HL] High-Low
|
75
|
+
# [LH] Low-High
|
76
|
+
# [LL] Low-Low
|
77
|
+
#
|
78
|
+
# This is the same as the +#quads+ method in the +Stat+ class.
|
79
|
+
#
|
80
|
+
# @return [Array] groups for each observation
|
81
|
+
def groups
|
82
|
+
quads
|
83
|
+
end
|
84
|
+
|
71
85
|
##
|
72
86
|
# Values of the +field+ queried from the +scope+
|
73
87
|
#
|
@@ -85,7 +99,7 @@ module SpatialStats
|
|
85
99
|
def z_lag
|
86
100
|
# w is already row_standardized, so we are using
|
87
101
|
# neighbor sum instead of neighbor_average to save cost
|
88
|
-
@z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(
|
102
|
+
@z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, z)
|
89
103
|
end
|
90
104
|
|
91
105
|
private
|
@@ -102,6 +116,15 @@ module SpatialStats
|
|
102
116
|
z[idx] * z_lag_i
|
103
117
|
end
|
104
118
|
|
119
|
+
def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
|
120
|
+
# Since moran can be positive or negative, go by this definition
|
121
|
+
if stat_i_orig.positive?
|
122
|
+
(stat_i_new >= stat_i_orig).count
|
123
|
+
else
|
124
|
+
(stat_i_new <= stat_i_orig).count
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
105
128
|
def si2
|
106
129
|
# @si2 ||= z.sample_variance
|
107
130
|
# we standardize so sample_variance is 1
|
@@ -109,20 +132,27 @@ module SpatialStats
|
|
109
132
|
end
|
110
133
|
|
111
134
|
# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
|
112
|
-
|
113
|
-
|
135
|
+
# TODO: sparse
|
136
|
+
def a_calc
|
137
|
+
n = weights.n
|
114
138
|
b2i = b2i_calc
|
139
|
+
|
140
|
+
wts = weights.sparse.values
|
141
|
+
row_index = weights.sparse.row_index
|
142
|
+
|
115
143
|
a_terms = []
|
116
144
|
|
117
145
|
(0..n - 1).each do |idx|
|
118
|
-
|
146
|
+
row_range = row_index[idx]..(row_index[idx + 1] - 1)
|
147
|
+
wt = wts[row_range]
|
148
|
+
sigma_term = wt.sum { |v| v**2 }
|
119
149
|
a_terms << (n - b2i) * sigma_term / (n - 1)
|
120
150
|
end
|
121
151
|
a_terms
|
122
152
|
end
|
123
153
|
|
124
|
-
def b_calc
|
125
|
-
n =
|
154
|
+
def b_calc
|
155
|
+
n = weights.n
|
126
156
|
b2i = b2i_calc
|
127
157
|
b_terms = []
|
128
158
|
|
@@ -23,7 +23,7 @@ module SpatialStats
|
|
23
23
|
def initialize(scope, fields, weights)
|
24
24
|
@scope = scope
|
25
25
|
@fields = fields
|
26
|
-
@weights = weights
|
26
|
+
@weights = weights.standardize
|
27
27
|
end
|
28
28
|
attr_accessor :scope, :fields, :weights
|
29
29
|
|
@@ -60,37 +60,47 @@ module SpatialStats
|
|
60
60
|
# of indices, which will return a list of new orders for the fields.
|
61
61
|
# They will then be shuffled corresponding to the new indices.
|
62
62
|
rng = gen_rng(seed)
|
63
|
-
|
64
|
-
indices = (0..(n - 1)).to_a
|
65
|
-
shuffles = crand(indices, permutations, rng)
|
63
|
+
rids = crand(permutations, rng)
|
66
64
|
|
65
|
+
n_1 = weights.n - 1
|
66
|
+
sparse = weights.sparse
|
67
|
+
row_index = sparse.row_index
|
68
|
+
ws = sparse.values
|
69
|
+
wc = weights.wc
|
67
70
|
stat_orig = stat
|
68
|
-
rs = [0] * n
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
ids = (0..n_1).to_a
|
73
|
+
observations = Array.new(weights.n)
|
74
|
+
(0..n_1).each do |idx|
|
75
|
+
idsi = ids.dup
|
76
|
+
idsi.delete_at(idx)
|
77
|
+
idsi.shuffle!(random: rng)
|
78
|
+
idsi = Numo::Int32.cast(idsi)
|
79
|
+
sample = rids[idsi[rids[true, 0..wc[idx] - 1]]]
|
80
|
+
|
81
|
+
# account for case where there are no neighbors
|
82
|
+
row_range = row_index[idx]..(row_index[idx + 1] - 1)
|
83
|
+
if row_range.size.zero?
|
84
|
+
observations[idx] = permutations
|
85
|
+
next
|
86
|
+
end
|
87
|
+
|
88
|
+
wi = Numo::DFloat.cast(ws[row_range])
|
89
|
+
stat_i_new = mc_i(wi, sample, idx)
|
74
90
|
stat_i_orig = stat_orig[idx]
|
75
|
-
|
76
|
-
|
77
|
-
# for each field, compute the C value at that index.
|
78
|
-
stat_i_new = mc_i(wi, shuffles[idx], idx)
|
79
|
-
|
80
|
-
rs[idx] = if stat_i_orig.positive?
|
81
|
-
(stat_i_new >= stat_i_orig).count
|
82
|
-
else
|
83
|
-
(stat_i_new <= stat_i_orig).count
|
84
|
-
end
|
85
|
-
|
86
|
-
idx += 1
|
91
|
+
observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
|
92
|
+
permutations)
|
87
93
|
end
|
88
94
|
|
89
|
-
|
95
|
+
observations.map do |ri|
|
90
96
|
(ri + 1.0) / (permutations + 1.0)
|
91
97
|
end
|
92
98
|
end
|
93
99
|
|
100
|
+
def groups
|
101
|
+
raise NotImplementedError, 'groups not implemented'
|
102
|
+
end
|
103
|
+
|
94
104
|
private
|
95
105
|
|
96
106
|
def mc_i(wi, perms, idx)
|
@@ -108,6 +118,19 @@ module SpatialStats
|
|
108
118
|
cs.mean(0)
|
109
119
|
end
|
110
120
|
|
121
|
+
def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
|
122
|
+
# Geary cannot be negative, so we have to use this technique from
|
123
|
+
# GeoDa to determine p values. Note I slightly modified it to be inclusive
|
124
|
+
# on both tails not just the lower tail.
|
125
|
+
# https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981 mean = stat_i_new.mean
|
126
|
+
mean = stat_i_new.mean
|
127
|
+
if stat_i_orig <= mean
|
128
|
+
(stat_i_new <= stat_i_orig).count
|
129
|
+
else
|
130
|
+
(stat_i_new >= stat_i_orig).count
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
111
134
|
def field_data
|
112
135
|
@field_data ||= fields.map do |field|
|
113
136
|
SpatialStats::Queries::Variables.query_field(@scope, field)
|