spatial_stats 0.2.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +126 -55
- data/Rakefile +7 -0
- data/ext/spatial_stats/csr_matrix.c +380 -0
- data/ext/spatial_stats/csr_matrix.h +34 -0
- data/ext/spatial_stats/extconf.rb +6 -0
- data/ext/spatial_stats/spatial_stats.c +32 -0
- data/lib/spatial_stats.rb +1 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
- data/lib/spatial_stats/global/moran.rb +43 -36
- data/lib/spatial_stats/global/stat.rb +55 -27
- data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
- data/lib/spatial_stats/local/geary.rb +35 -5
- data/lib/spatial_stats/local/getis_ord.rb +45 -17
- data/lib/spatial_stats/local/moran.rb +39 -9
- data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
- data/lib/spatial_stats/local/stat.rb +112 -80
- data/lib/spatial_stats/narray_ext.rb +5 -5
- data/lib/spatial_stats/spatial_stats.so +0 -0
- data/lib/spatial_stats/utils.rb +25 -0
- data/lib/spatial_stats/utils/lag.rb +10 -10
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights/contiguous.rb +20 -10
- data/lib/spatial_stats/weights/distant.rb +38 -20
- data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
- metadata +33 -11
- data/MIT-LICENSE +0 -20
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef CSR_MATRIX
|
2
|
+
#define CSR_MATRIX
|
3
|
+
|
4
|
+
typedef struct csr_matrix
|
5
|
+
{
|
6
|
+
char init;
|
7
|
+
int n;
|
8
|
+
int nnz;
|
9
|
+
double *values;
|
10
|
+
int *col_index;
|
11
|
+
int *row_index;
|
12
|
+
} csr_matrix;
|
13
|
+
|
14
|
+
void csr_matrix_free(void *mat);
|
15
|
+
size_t csr_matrix_memsize(const void *ptr);
|
16
|
+
|
17
|
+
// ruby VALUE for csr_matrix
|
18
|
+
static const rb_data_type_t csr_matrix_type = {
|
19
|
+
"SpatialStats::Weights::CSRMatrix",
|
20
|
+
{NULL, csr_matrix_free, csr_matrix_memsize},
|
21
|
+
0,
|
22
|
+
0,
|
23
|
+
RUBY_TYPED_FREE_IMMEDIATELY};
|
24
|
+
|
25
|
+
void mat_to_sparse(csr_matrix *csr, VALUE data, VALUE keys, VALUE num_rows);
|
26
|
+
VALUE csr_matrix_alloc(VALUE self);
|
27
|
+
VALUE csr_matrix_initialize(VALUE self, VALUE data, VALUE num_rows);
|
28
|
+
VALUE csr_matrix_values(VALUE self);
|
29
|
+
VALUE csr_matrix_col_index(VALUE self);
|
30
|
+
VALUE csr_matrix_row_index(VALUE self);
|
31
|
+
VALUE csr_matrix_mulvec(VALUE self, VALUE vec);
|
32
|
+
VALUE csr_matrix_dot_row(VALUE self, VALUE vec, VALUE row);
|
33
|
+
VALUE csr_matrix_coordinates(VALUE self);
|
34
|
+
#endif
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include "csr_matrix.h"
|
3
|
+
|
4
|
+
/**
|
5
|
+
* Document-class: SpatialStats::Weights::CSRMatrix
|
6
|
+
*
|
7
|
+
* CSRMatrix partially implements a compressed sparse row matrix to perform
|
8
|
+
* spatial lag and other calculations. This will generally be used
|
9
|
+
* to store the weights of an observation set.
|
10
|
+
*
|
11
|
+
* @see https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
|
12
|
+
*
|
13
|
+
*/
|
14
|
+
void Init_spatial_stats()
|
15
|
+
{
|
16
|
+
VALUE spatial_stats_mod = rb_define_module("SpatialStats");
|
17
|
+
VALUE weights_mod = rb_define_module_under(spatial_stats_mod, "Weights");
|
18
|
+
VALUE csr_matrix_class = rb_define_class_under(weights_mod, "CSRMatrix", rb_cData);
|
19
|
+
|
20
|
+
rb_define_alloc_func(csr_matrix_class, csr_matrix_alloc);
|
21
|
+
rb_define_method(csr_matrix_class, "initialize", csr_matrix_initialize, 2);
|
22
|
+
rb_define_method(csr_matrix_class, "values", csr_matrix_values, 0);
|
23
|
+
rb_define_method(csr_matrix_class, "col_index", csr_matrix_col_index, 0);
|
24
|
+
rb_define_method(csr_matrix_class, "row_index", csr_matrix_row_index, 0);
|
25
|
+
rb_define_method(csr_matrix_class, "mulvec", csr_matrix_mulvec, 1);
|
26
|
+
rb_define_method(csr_matrix_class, "dot_row", csr_matrix_dot_row, 2);
|
27
|
+
rb_define_method(csr_matrix_class, "coordinates", csr_matrix_coordinates, 0);
|
28
|
+
|
29
|
+
rb_define_attr(csr_matrix_class, "m", 1, 0);
|
30
|
+
rb_define_attr(csr_matrix_class, "n", 1, 0);
|
31
|
+
rb_define_attr(csr_matrix_class, "nnz", 1, 0);
|
32
|
+
}
|
data/lib/spatial_stats.rb
CHANGED
@@ -19,9 +19,25 @@ module SpatialStats
|
|
19
19
|
@scope = scope
|
20
20
|
@x_field = x_field
|
21
21
|
@y_field = y_field
|
22
|
-
@weights = weights
|
22
|
+
@weights = weights.standardize
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# A new instance of BivariateMoran, from vector and weights.
|
27
|
+
#
|
28
|
+
# @param [Array] x observations of dataset
|
29
|
+
# @param [WeightsMatrix] weights to define relationships between observations
|
30
|
+
#
|
31
|
+
# @return [BivariateMoran]
|
32
|
+
def self.from_observations(x, y, weights)
|
33
|
+
n = weights.n
|
34
|
+
raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
|
35
|
+
|
36
|
+
instance = new(nil, nil, nil, weights.standardize)
|
37
|
+
instance.x = x
|
38
|
+
instance.y = y
|
39
|
+
instance
|
23
40
|
end
|
24
|
-
attr_writer :x, :y
|
25
41
|
|
26
42
|
##
|
27
43
|
# Computes the global spatial correlation of x against spatially lagged
|
@@ -29,8 +45,7 @@ module SpatialStats
|
|
29
45
|
#
|
30
46
|
# @return [Float]
|
31
47
|
def stat
|
32
|
-
|
33
|
-
y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
|
48
|
+
y_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, y)
|
34
49
|
numerator = 0
|
35
50
|
x.each_with_index do |xi, idx|
|
36
51
|
numerator += xi * y_lag[idx]
|
@@ -55,19 +70,20 @@ module SpatialStats
|
|
55
70
|
#
|
56
71
|
# @return [Float]
|
57
72
|
def variance
|
58
|
-
n =
|
59
|
-
|
60
|
-
w = wij.sum
|
73
|
+
n = weights.n
|
74
|
+
w_sum = n.to_f
|
61
75
|
e = expectation
|
62
76
|
|
63
|
-
|
64
|
-
|
77
|
+
wij = weights.sparse.coordinates
|
78
|
+
|
79
|
+
s1 = s1_calc(wij)
|
80
|
+
s2 = s2_calc(n, wij, weights.sparse.row_index)
|
65
81
|
s3 = s3_calc(n, x)
|
66
82
|
|
67
|
-
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (
|
68
|
-
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (
|
83
|
+
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
|
84
|
+
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
|
69
85
|
|
70
|
-
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) *
|
86
|
+
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
|
71
87
|
var_right = e**2
|
72
88
|
var_left - var_right
|
73
89
|
end
|
@@ -88,6 +104,19 @@ module SpatialStats
|
|
88
104
|
mc_bv(permutations, seed)
|
89
105
|
end
|
90
106
|
|
107
|
+
##
|
108
|
+
# Summary of the statistic. Computes +stat+ and +mc+ and returns the values
|
109
|
+
# in a hash.
|
110
|
+
#
|
111
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
112
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
113
|
+
#
|
114
|
+
# @return [Hash]
|
115
|
+
def summary(permutations = 99, seed = nil)
|
116
|
+
p_val = mc(permutations, seed)
|
117
|
+
{ stat: stat, p: p_val }
|
118
|
+
end
|
119
|
+
|
91
120
|
##
|
92
121
|
# Standardized variables queried from +x_field+.
|
93
122
|
#
|
@@ -108,32 +137,41 @@ module SpatialStats
|
|
108
137
|
|
109
138
|
private
|
110
139
|
|
140
|
+
def stat_mc(perms)
|
141
|
+
x_arr = Numo::DFloat.cast(x)
|
142
|
+
lag = w.dot(perms.transpose)
|
143
|
+
x_arr.dot(lag) / (x_arr**2).sum
|
144
|
+
end
|
145
|
+
|
111
146
|
def s3_calc(n, zs)
|
112
147
|
numerator = (1.0 / n) * zs.sum { |v| v**4 }
|
113
148
|
denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
|
114
149
|
numerator / denominator
|
115
150
|
end
|
116
151
|
|
117
|
-
def s2_calc(n, wij)
|
152
|
+
def s2_calc(n, wij, row_index)
|
118
153
|
s2 = 0
|
119
|
-
|
154
|
+
wij_arr = wij.to_a # for row slicing
|
155
|
+
(0..n - 1).each do |idx|
|
156
|
+
row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
|
120
157
|
left_term = 0
|
121
158
|
right_term = 0
|
122
|
-
|
123
|
-
|
124
|
-
|
159
|
+
|
160
|
+
row.each do |coords, val|
|
161
|
+
left_term += val
|
162
|
+
right_term += wij[coords.reverse] || 0
|
125
163
|
end
|
126
164
|
s2 += (left_term + right_term)**2
|
127
165
|
end
|
128
166
|
s2
|
129
167
|
end
|
130
168
|
|
131
|
-
def s1_calc(
|
169
|
+
def s1_calc(wij)
|
132
170
|
s1 = 0
|
133
|
-
|
134
|
-
(
|
135
|
-
|
136
|
-
|
171
|
+
wij.each do |coords, val|
|
172
|
+
# (wij + wji)**2
|
173
|
+
wji = wij[coords.reverse] || 0
|
174
|
+
s1 += (val + wji)**2
|
137
175
|
end
|
138
176
|
s1 / 2
|
139
177
|
end
|
@@ -18,7 +18,6 @@ module SpatialStats
|
|
18
18
|
def initialize(scope, field, weights)
|
19
19
|
super(scope, field, weights)
|
20
20
|
end
|
21
|
-
attr_writer :x
|
22
21
|
|
23
22
|
##
|
24
23
|
# Computes the global spatial autocorrelation of x against a spatially
|
@@ -29,8 +28,7 @@ module SpatialStats
|
|
29
28
|
# compute's Moran's I. numerator is sum of zi * spatial lag of zi
|
30
29
|
# denominator is sum of zi**2.
|
31
30
|
# have to use row-standardized weights
|
32
|
-
|
33
|
-
z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
|
31
|
+
z_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, z)
|
34
32
|
numerator = 0
|
35
33
|
z.each_with_index do |zi, j|
|
36
34
|
row_sum = zi * z_lag[j]
|
@@ -49,7 +47,7 @@ module SpatialStats
|
|
49
47
|
# @return [Float]
|
50
48
|
def expectation
|
51
49
|
# -1/(n-1)
|
52
|
-
-1.0 / (
|
50
|
+
-1.0 / (weights.n - 1)
|
53
51
|
end
|
54
52
|
|
55
53
|
##
|
@@ -58,19 +56,20 @@ module SpatialStats
|
|
58
56
|
#
|
59
57
|
# @return [Float]
|
60
58
|
def variance
|
61
|
-
n =
|
62
|
-
|
63
|
-
w = wij.sum
|
59
|
+
n = weights.n
|
60
|
+
w_sum = n # standardized weights
|
64
61
|
e = expectation
|
65
62
|
|
66
|
-
|
67
|
-
|
63
|
+
wij = weights.sparse.coordinates
|
64
|
+
|
65
|
+
s1 = s1_calc(wij)
|
66
|
+
s2 = s2_calc(n, wij, weights.sparse.row_index)
|
68
67
|
s3 = s3_calc(n, z)
|
69
68
|
|
70
|
-
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (
|
71
|
-
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (
|
69
|
+
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
|
70
|
+
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
|
72
71
|
|
73
|
-
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) *
|
72
|
+
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
|
74
73
|
var_right = e**2
|
75
74
|
var_left - var_right
|
76
75
|
end
|
@@ -92,58 +91,66 @@ module SpatialStats
|
|
92
91
|
end
|
93
92
|
|
94
93
|
##
|
95
|
-
#
|
94
|
+
# Summary of the statistic. Computes +stat+ and +mc+ and returns the values
|
95
|
+
# in a hash.
|
96
96
|
#
|
97
|
-
# @
|
98
|
-
|
99
|
-
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
100
|
-
end
|
101
|
-
|
102
|
-
# TODO: remove these last 2 methods and just standardize x.
|
103
|
-
##
|
104
|
-
# Mean of x
|
97
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
98
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
105
99
|
#
|
106
|
-
# @return [
|
107
|
-
def
|
108
|
-
|
100
|
+
# @return [Hash]
|
101
|
+
def summary(permutations = 99, seed = nil)
|
102
|
+
p_val = mc(permutations, seed)
|
103
|
+
{ stat: stat, p: p_val }
|
109
104
|
end
|
110
105
|
|
111
106
|
##
|
112
|
-
#
|
107
|
+
# Values of the +field+ queried from the +scope+
|
113
108
|
#
|
114
109
|
# @return [Array]
|
115
|
-
def
|
116
|
-
x
|
110
|
+
def x
|
111
|
+
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
112
|
+
.standardize
|
117
113
|
end
|
114
|
+
alias z x
|
118
115
|
|
119
116
|
private
|
120
117
|
|
118
|
+
def stat_mc(perms)
|
119
|
+
z_arr = Numo::DFloat.cast(z)
|
120
|
+
lag = w.dot(perms.transpose)
|
121
|
+
z_arr.dot(lag) / (z_arr**2).sum
|
122
|
+
end
|
123
|
+
|
121
124
|
def s3_calc(n, zs)
|
122
125
|
numerator = (1.0 / n) * zs.sum { |v| v**4 }
|
123
126
|
denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
|
124
127
|
numerator / denominator
|
125
128
|
end
|
126
129
|
|
127
|
-
|
130
|
+
# use row_index to take slices of wij
|
131
|
+
def s2_calc(n, wij, row_index)
|
128
132
|
s2 = 0
|
133
|
+
wij_arr = wij.to_a # for row slicing
|
129
134
|
(0..n - 1).each do |idx|
|
135
|
+
row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
|
130
136
|
left_term = 0
|
131
137
|
right_term = 0
|
132
|
-
|
133
|
-
|
134
|
-
|
138
|
+
|
139
|
+
row.each do |coords, val|
|
140
|
+
left_term += val
|
141
|
+
right_term += wij[coords.reverse] || 0
|
135
142
|
end
|
136
143
|
s2 += (left_term + right_term)**2
|
137
144
|
end
|
138
145
|
s2
|
139
146
|
end
|
140
147
|
|
141
|
-
def s1_calc(
|
148
|
+
def s1_calc(wij)
|
142
149
|
s1 = 0
|
143
|
-
|
144
|
-
(
|
145
|
-
|
146
|
-
|
150
|
+
wij.each do |coords, val|
|
151
|
+
# (wij + wji)**2
|
152
|
+
wji = wij[coords.reverse] || 0
|
153
|
+
s1 += (val + wji)**2
|
147
154
|
end
|
148
155
|
s1 / 2
|
149
156
|
end
|
@@ -11,10 +11,25 @@ module SpatialStats
|
|
11
11
|
def initialize(scope, field, weights)
|
12
12
|
@scope = scope
|
13
13
|
@field = field
|
14
|
-
@weights = weights
|
14
|
+
@weights = weights.standardize
|
15
15
|
end
|
16
16
|
attr_accessor :scope, :field, :weights
|
17
17
|
|
18
|
+
##
|
19
|
+
# A new instance of Stat, from vector and weights.
|
20
|
+
#
|
21
|
+
# @param [Array] x observations of dataset
|
22
|
+
# @param [WeightsMatrix] weights to define relationships between observations
|
23
|
+
#
|
24
|
+
# @return [Stat]
|
25
|
+
def self.from_observations(x, weights)
|
26
|
+
raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
|
27
|
+
|
28
|
+
instance = new(nil, nil, weights.standardize)
|
29
|
+
instance.x = x
|
30
|
+
instance
|
31
|
+
end
|
32
|
+
|
18
33
|
def stat
|
19
34
|
raise NotImplementedError, 'method stat not defined'
|
20
35
|
end
|
@@ -39,27 +54,36 @@ module SpatialStats
|
|
39
54
|
(stat - expectation) / Math.sqrt(variance)
|
40
55
|
end
|
41
56
|
|
57
|
+
def x=(values)
|
58
|
+
@x = values.standardize
|
59
|
+
end
|
60
|
+
alias z= x=
|
61
|
+
|
62
|
+
def y=(values)
|
63
|
+
@y = values.standardize
|
64
|
+
end
|
65
|
+
|
42
66
|
def mc(permutations, seed)
|
43
67
|
rng = gen_rng(seed)
|
44
68
|
shuffles = []
|
45
69
|
permutations.times do
|
46
70
|
shuffles << x.shuffle(random: rng)
|
47
71
|
end
|
72
|
+
shuffles = Numo::DFloat.cast(shuffles)
|
73
|
+
|
48
74
|
# r is the number of equal to or more extreme samples
|
49
75
|
# one sided
|
50
|
-
stat_orig = stat
|
51
|
-
r = 0
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
end
|
62
|
-
end
|
76
|
+
stat_orig = stat.round(5)
|
77
|
+
# r = 0
|
78
|
+
|
79
|
+
# compute new stat values
|
80
|
+
stat_new = stat_mc(shuffles)
|
81
|
+
|
82
|
+
r = if stat_orig.positive?
|
83
|
+
(stat_new >= stat_orig).count
|
84
|
+
else
|
85
|
+
(stat_new <= stat_orig).count
|
86
|
+
end
|
63
87
|
|
64
88
|
(r + 1.0) / (permutations + 1.0)
|
65
89
|
end
|
@@ -71,27 +95,31 @@ module SpatialStats
|
|
71
95
|
permutations.times do
|
72
96
|
shuffles << y.shuffle(random: rng)
|
73
97
|
end
|
98
|
+
shuffles = Numo::DFloat.cast(shuffles)
|
74
99
|
|
75
100
|
# r is the number of equal to or more extreme samples
|
76
|
-
stat_orig = stat
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
r += 1 if klass.stat >= stat_orig
|
85
|
-
else
|
86
|
-
r += 1 if klass.stat <= stat_orig
|
87
|
-
end
|
88
|
-
end
|
101
|
+
stat_orig = stat.round(5)
|
102
|
+
stat_new = stat_mc(shuffles)
|
103
|
+
|
104
|
+
r = if stat_orig.positive?
|
105
|
+
(stat_new >= stat_orig).count
|
106
|
+
else
|
107
|
+
(stat_new <= stat_orig).count
|
108
|
+
end
|
89
109
|
|
90
110
|
(r + 1.0) / (permutations + 1.0)
|
91
111
|
end
|
92
112
|
|
93
113
|
private
|
94
114
|
|
115
|
+
def stat_mc(_shuffles)
|
116
|
+
raise NotImplementedError, 'private method stat_mc not defined'
|
117
|
+
end
|
118
|
+
|
119
|
+
def w
|
120
|
+
@w ||= weights.dense
|
121
|
+
end
|
122
|
+
|
95
123
|
def gen_rng(seed)
|
96
124
|
if seed
|
97
125
|
Random.new(seed)
|