spatial_stats 0.2.2 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +126 -55
- data/Rakefile +7 -0
- data/ext/spatial_stats/csr_matrix.c +380 -0
- data/ext/spatial_stats/csr_matrix.h +34 -0
- data/ext/spatial_stats/extconf.rb +6 -0
- data/ext/spatial_stats/spatial_stats.c +32 -0
- data/lib/spatial_stats.rb +1 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
- data/lib/spatial_stats/global/moran.rb +43 -36
- data/lib/spatial_stats/global/stat.rb +55 -27
- data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
- data/lib/spatial_stats/local/geary.rb +35 -5
- data/lib/spatial_stats/local/getis_ord.rb +45 -17
- data/lib/spatial_stats/local/moran.rb +39 -9
- data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
- data/lib/spatial_stats/local/stat.rb +112 -80
- data/lib/spatial_stats/narray_ext.rb +5 -5
- data/lib/spatial_stats/spatial_stats.so +0 -0
- data/lib/spatial_stats/utils.rb +25 -0
- data/lib/spatial_stats/utils/lag.rb +10 -10
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights/contiguous.rb +20 -10
- data/lib/spatial_stats/weights/distant.rb +38 -20
- data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
- metadata +33 -11
- data/MIT-LICENSE +0 -20
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef CSR_MATRIX
|
2
|
+
#define CSR_MATRIX
|
3
|
+
|
4
|
+
typedef struct csr_matrix
|
5
|
+
{
|
6
|
+
char init;
|
7
|
+
int n;
|
8
|
+
int nnz;
|
9
|
+
double *values;
|
10
|
+
int *col_index;
|
11
|
+
int *row_index;
|
12
|
+
} csr_matrix;
|
13
|
+
|
14
|
+
void csr_matrix_free(void *mat);
|
15
|
+
size_t csr_matrix_memsize(const void *ptr);
|
16
|
+
|
17
|
+
// ruby VALUE for csr_matrix
|
18
|
+
static const rb_data_type_t csr_matrix_type = {
|
19
|
+
"SpatialStats::Weights::CSRMatrix",
|
20
|
+
{NULL, csr_matrix_free, csr_matrix_memsize},
|
21
|
+
0,
|
22
|
+
0,
|
23
|
+
RUBY_TYPED_FREE_IMMEDIATELY};
|
24
|
+
|
25
|
+
void mat_to_sparse(csr_matrix *csr, VALUE data, VALUE keys, VALUE num_rows);
|
26
|
+
VALUE csr_matrix_alloc(VALUE self);
|
27
|
+
VALUE csr_matrix_initialize(VALUE self, VALUE data, VALUE num_rows);
|
28
|
+
VALUE csr_matrix_values(VALUE self);
|
29
|
+
VALUE csr_matrix_col_index(VALUE self);
|
30
|
+
VALUE csr_matrix_row_index(VALUE self);
|
31
|
+
VALUE csr_matrix_mulvec(VALUE self, VALUE vec);
|
32
|
+
VALUE csr_matrix_dot_row(VALUE self, VALUE vec, VALUE row);
|
33
|
+
VALUE csr_matrix_coordinates(VALUE self);
|
34
|
+
#endif
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include "csr_matrix.h"
|
3
|
+
|
4
|
+
/**
|
5
|
+
* Document-class: SpatialStats::Weights::CSRMatrix
|
6
|
+
*
|
7
|
+
* CSRMatrix partially implements a compressed sparse row matrix to perform
|
8
|
+
* spatial lag and other calculations. This will generally be used
|
9
|
+
* to store the weights of an observation set.
|
10
|
+
*
|
11
|
+
* @see https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
|
12
|
+
*
|
13
|
+
*/
|
14
|
+
void Init_spatial_stats()
|
15
|
+
{
|
16
|
+
VALUE spatial_stats_mod = rb_define_module("SpatialStats");
|
17
|
+
VALUE weights_mod = rb_define_module_under(spatial_stats_mod, "Weights");
|
18
|
+
VALUE csr_matrix_class = rb_define_class_under(weights_mod, "CSRMatrix", rb_cData);
|
19
|
+
|
20
|
+
rb_define_alloc_func(csr_matrix_class, csr_matrix_alloc);
|
21
|
+
rb_define_method(csr_matrix_class, "initialize", csr_matrix_initialize, 2);
|
22
|
+
rb_define_method(csr_matrix_class, "values", csr_matrix_values, 0);
|
23
|
+
rb_define_method(csr_matrix_class, "col_index", csr_matrix_col_index, 0);
|
24
|
+
rb_define_method(csr_matrix_class, "row_index", csr_matrix_row_index, 0);
|
25
|
+
rb_define_method(csr_matrix_class, "mulvec", csr_matrix_mulvec, 1);
|
26
|
+
rb_define_method(csr_matrix_class, "dot_row", csr_matrix_dot_row, 2);
|
27
|
+
rb_define_method(csr_matrix_class, "coordinates", csr_matrix_coordinates, 0);
|
28
|
+
|
29
|
+
rb_define_attr(csr_matrix_class, "m", 1, 0);
|
30
|
+
rb_define_attr(csr_matrix_class, "n", 1, 0);
|
31
|
+
rb_define_attr(csr_matrix_class, "nnz", 1, 0);
|
32
|
+
}
|
data/lib/spatial_stats.rb
CHANGED
@@ -19,9 +19,25 @@ module SpatialStats
|
|
19
19
|
@scope = scope
|
20
20
|
@x_field = x_field
|
21
21
|
@y_field = y_field
|
22
|
-
@weights = weights
|
22
|
+
@weights = weights.standardize
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# A new instance of BivariateMoran, from vector and weights.
|
27
|
+
#
|
28
|
+
# @param [Array] x observations of dataset
|
29
|
+
# @param [WeightsMatrix] weights to define relationships between observations
|
30
|
+
#
|
31
|
+
# @return [BivariateMoran]
|
32
|
+
def self.from_observations(x, y, weights)
|
33
|
+
n = weights.n
|
34
|
+
raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
|
35
|
+
|
36
|
+
instance = new(nil, nil, nil, weights.standardize)
|
37
|
+
instance.x = x
|
38
|
+
instance.y = y
|
39
|
+
instance
|
23
40
|
end
|
24
|
-
attr_writer :x, :y
|
25
41
|
|
26
42
|
##
|
27
43
|
# Computes the global spatial correlation of x against spatially lagged
|
@@ -29,8 +45,7 @@ module SpatialStats
|
|
29
45
|
#
|
30
46
|
# @return [Float]
|
31
47
|
def stat
|
32
|
-
|
33
|
-
y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
|
48
|
+
y_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, y)
|
34
49
|
numerator = 0
|
35
50
|
x.each_with_index do |xi, idx|
|
36
51
|
numerator += xi * y_lag[idx]
|
@@ -55,19 +70,20 @@ module SpatialStats
|
|
55
70
|
#
|
56
71
|
# @return [Float]
|
57
72
|
def variance
|
58
|
-
n =
|
59
|
-
|
60
|
-
w = wij.sum
|
73
|
+
n = weights.n
|
74
|
+
w_sum = n.to_f
|
61
75
|
e = expectation
|
62
76
|
|
63
|
-
|
64
|
-
|
77
|
+
wij = weights.sparse.coordinates
|
78
|
+
|
79
|
+
s1 = s1_calc(wij)
|
80
|
+
s2 = s2_calc(n, wij, weights.sparse.row_index)
|
65
81
|
s3 = s3_calc(n, x)
|
66
82
|
|
67
|
-
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (
|
68
|
-
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (
|
83
|
+
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
|
84
|
+
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
|
69
85
|
|
70
|
-
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) *
|
86
|
+
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
|
71
87
|
var_right = e**2
|
72
88
|
var_left - var_right
|
73
89
|
end
|
@@ -88,6 +104,19 @@ module SpatialStats
|
|
88
104
|
mc_bv(permutations, seed)
|
89
105
|
end
|
90
106
|
|
107
|
+
##
|
108
|
+
# Summary of the statistic. Computes +stat+ and +mc+ and returns the values
|
109
|
+
# in a hash.
|
110
|
+
#
|
111
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
112
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
113
|
+
#
|
114
|
+
# @return [Hash]
|
115
|
+
def summary(permutations = 99, seed = nil)
|
116
|
+
p_val = mc(permutations, seed)
|
117
|
+
{ stat: stat, p: p_val }
|
118
|
+
end
|
119
|
+
|
91
120
|
##
|
92
121
|
# Standardized variables queried from +x_field+.
|
93
122
|
#
|
@@ -108,32 +137,41 @@ module SpatialStats
|
|
108
137
|
|
109
138
|
private
|
110
139
|
|
140
|
+
def stat_mc(perms)
|
141
|
+
x_arr = Numo::DFloat.cast(x)
|
142
|
+
lag = w.dot(perms.transpose)
|
143
|
+
x_arr.dot(lag) / (x_arr**2).sum
|
144
|
+
end
|
145
|
+
|
111
146
|
def s3_calc(n, zs)
|
112
147
|
numerator = (1.0 / n) * zs.sum { |v| v**4 }
|
113
148
|
denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
|
114
149
|
numerator / denominator
|
115
150
|
end
|
116
151
|
|
117
|
-
def s2_calc(n, wij)
|
152
|
+
def s2_calc(n, wij, row_index)
|
118
153
|
s2 = 0
|
119
|
-
|
154
|
+
wij_arr = wij.to_a # for row slicing
|
155
|
+
(0..n - 1).each do |idx|
|
156
|
+
row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
|
120
157
|
left_term = 0
|
121
158
|
right_term = 0
|
122
|
-
|
123
|
-
|
124
|
-
|
159
|
+
|
160
|
+
row.each do |coords, val|
|
161
|
+
left_term += val
|
162
|
+
right_term += wij[coords.reverse] || 0
|
125
163
|
end
|
126
164
|
s2 += (left_term + right_term)**2
|
127
165
|
end
|
128
166
|
s2
|
129
167
|
end
|
130
168
|
|
131
|
-
def s1_calc(
|
169
|
+
def s1_calc(wij)
|
132
170
|
s1 = 0
|
133
|
-
|
134
|
-
(
|
135
|
-
|
136
|
-
|
171
|
+
wij.each do |coords, val|
|
172
|
+
# (wij + wji)**2
|
173
|
+
wji = wij[coords.reverse] || 0
|
174
|
+
s1 += (val + wji)**2
|
137
175
|
end
|
138
176
|
s1 / 2
|
139
177
|
end
|
@@ -18,7 +18,6 @@ module SpatialStats
|
|
18
18
|
def initialize(scope, field, weights)
|
19
19
|
super(scope, field, weights)
|
20
20
|
end
|
21
|
-
attr_writer :x
|
22
21
|
|
23
22
|
##
|
24
23
|
# Computes the global spatial autocorrelation of x against a spatially
|
@@ -29,8 +28,7 @@ module SpatialStats
|
|
29
28
|
# compute's Moran's I. numerator is sum of zi * spatial lag of zi
|
30
29
|
# denominator is sum of zi**2.
|
31
30
|
# have to use row-standardized weights
|
32
|
-
|
33
|
-
z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
|
31
|
+
z_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, z)
|
34
32
|
numerator = 0
|
35
33
|
z.each_with_index do |zi, j|
|
36
34
|
row_sum = zi * z_lag[j]
|
@@ -49,7 +47,7 @@ module SpatialStats
|
|
49
47
|
# @return [Float]
|
50
48
|
def expectation
|
51
49
|
# -1/(n-1)
|
52
|
-
-1.0 / (
|
50
|
+
-1.0 / (weights.n - 1)
|
53
51
|
end
|
54
52
|
|
55
53
|
##
|
@@ -58,19 +56,20 @@ module SpatialStats
|
|
58
56
|
#
|
59
57
|
# @return [Float]
|
60
58
|
def variance
|
61
|
-
n =
|
62
|
-
|
63
|
-
w = wij.sum
|
59
|
+
n = weights.n
|
60
|
+
w_sum = n # standardized weights
|
64
61
|
e = expectation
|
65
62
|
|
66
|
-
|
67
|
-
|
63
|
+
wij = weights.sparse.coordinates
|
64
|
+
|
65
|
+
s1 = s1_calc(wij)
|
66
|
+
s2 = s2_calc(n, wij, weights.sparse.row_index)
|
68
67
|
s3 = s3_calc(n, z)
|
69
68
|
|
70
|
-
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (
|
71
|
-
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (
|
69
|
+
s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
|
70
|
+
s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
|
72
71
|
|
73
|
-
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) *
|
72
|
+
var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
|
74
73
|
var_right = e**2
|
75
74
|
var_left - var_right
|
76
75
|
end
|
@@ -92,58 +91,66 @@ module SpatialStats
|
|
92
91
|
end
|
93
92
|
|
94
93
|
##
|
95
|
-
#
|
94
|
+
# Summary of the statistic. Computes +stat+ and +mc+ and returns the values
|
95
|
+
# in a hash.
|
96
96
|
#
|
97
|
-
# @
|
98
|
-
|
99
|
-
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
100
|
-
end
|
101
|
-
|
102
|
-
# TODO: remove these last 2 methods and just standardize x.
|
103
|
-
##
|
104
|
-
# Mean of x
|
97
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
98
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
105
99
|
#
|
106
|
-
# @return [
|
107
|
-
def
|
108
|
-
|
100
|
+
# @return [Hash]
|
101
|
+
def summary(permutations = 99, seed = nil)
|
102
|
+
p_val = mc(permutations, seed)
|
103
|
+
{ stat: stat, p: p_val }
|
109
104
|
end
|
110
105
|
|
111
106
|
##
|
112
|
-
#
|
107
|
+
# Values of the +field+ queried from the +scope+
|
113
108
|
#
|
114
109
|
# @return [Array]
|
115
|
-
def
|
116
|
-
x
|
110
|
+
def x
|
111
|
+
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
112
|
+
.standardize
|
117
113
|
end
|
114
|
+
alias z x
|
118
115
|
|
119
116
|
private
|
120
117
|
|
118
|
+
def stat_mc(perms)
|
119
|
+
z_arr = Numo::DFloat.cast(z)
|
120
|
+
lag = w.dot(perms.transpose)
|
121
|
+
z_arr.dot(lag) / (z_arr**2).sum
|
122
|
+
end
|
123
|
+
|
121
124
|
def s3_calc(n, zs)
|
122
125
|
numerator = (1.0 / n) * zs.sum { |v| v**4 }
|
123
126
|
denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
|
124
127
|
numerator / denominator
|
125
128
|
end
|
126
129
|
|
127
|
-
|
130
|
+
# use row_index to take slices of wij
|
131
|
+
def s2_calc(n, wij, row_index)
|
128
132
|
s2 = 0
|
133
|
+
wij_arr = wij.to_a # for row slicing
|
129
134
|
(0..n - 1).each do |idx|
|
135
|
+
row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
|
130
136
|
left_term = 0
|
131
137
|
right_term = 0
|
132
|
-
|
133
|
-
|
134
|
-
|
138
|
+
|
139
|
+
row.each do |coords, val|
|
140
|
+
left_term += val
|
141
|
+
right_term += wij[coords.reverse] || 0
|
135
142
|
end
|
136
143
|
s2 += (left_term + right_term)**2
|
137
144
|
end
|
138
145
|
s2
|
139
146
|
end
|
140
147
|
|
141
|
-
def s1_calc(
|
148
|
+
def s1_calc(wij)
|
142
149
|
s1 = 0
|
143
|
-
|
144
|
-
(
|
145
|
-
|
146
|
-
|
150
|
+
wij.each do |coords, val|
|
151
|
+
# (wij + wji)**2
|
152
|
+
wji = wij[coords.reverse] || 0
|
153
|
+
s1 += (val + wji)**2
|
147
154
|
end
|
148
155
|
s1 / 2
|
149
156
|
end
|
@@ -11,10 +11,25 @@ module SpatialStats
|
|
11
11
|
def initialize(scope, field, weights)
|
12
12
|
@scope = scope
|
13
13
|
@field = field
|
14
|
-
@weights = weights
|
14
|
+
@weights = weights.standardize
|
15
15
|
end
|
16
16
|
attr_accessor :scope, :field, :weights
|
17
17
|
|
18
|
+
##
|
19
|
+
# A new instance of Stat, from vector and weights.
|
20
|
+
#
|
21
|
+
# @param [Array] x observations of dataset
|
22
|
+
# @param [WeightsMatrix] weights to define relationships between observations
|
23
|
+
#
|
24
|
+
# @return [Stat]
|
25
|
+
def self.from_observations(x, weights)
|
26
|
+
raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
|
27
|
+
|
28
|
+
instance = new(nil, nil, weights.standardize)
|
29
|
+
instance.x = x
|
30
|
+
instance
|
31
|
+
end
|
32
|
+
|
18
33
|
def stat
|
19
34
|
raise NotImplementedError, 'method stat not defined'
|
20
35
|
end
|
@@ -39,27 +54,36 @@ module SpatialStats
|
|
39
54
|
(stat - expectation) / Math.sqrt(variance)
|
40
55
|
end
|
41
56
|
|
57
|
+
def x=(values)
|
58
|
+
@x = values.standardize
|
59
|
+
end
|
60
|
+
alias z= x=
|
61
|
+
|
62
|
+
def y=(values)
|
63
|
+
@y = values.standardize
|
64
|
+
end
|
65
|
+
|
42
66
|
def mc(permutations, seed)
|
43
67
|
rng = gen_rng(seed)
|
44
68
|
shuffles = []
|
45
69
|
permutations.times do
|
46
70
|
shuffles << x.shuffle(random: rng)
|
47
71
|
end
|
72
|
+
shuffles = Numo::DFloat.cast(shuffles)
|
73
|
+
|
48
74
|
# r is the number of equal to or more extreme samples
|
49
75
|
# one sided
|
50
|
-
stat_orig = stat
|
51
|
-
r = 0
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
end
|
62
|
-
end
|
76
|
+
stat_orig = stat.round(5)
|
77
|
+
# r = 0
|
78
|
+
|
79
|
+
# compute new stat values
|
80
|
+
stat_new = stat_mc(shuffles)
|
81
|
+
|
82
|
+
r = if stat_orig.positive?
|
83
|
+
(stat_new >= stat_orig).count
|
84
|
+
else
|
85
|
+
(stat_new <= stat_orig).count
|
86
|
+
end
|
63
87
|
|
64
88
|
(r + 1.0) / (permutations + 1.0)
|
65
89
|
end
|
@@ -71,27 +95,31 @@ module SpatialStats
|
|
71
95
|
permutations.times do
|
72
96
|
shuffles << y.shuffle(random: rng)
|
73
97
|
end
|
98
|
+
shuffles = Numo::DFloat.cast(shuffles)
|
74
99
|
|
75
100
|
# r is the number of equal to or more extreme samples
|
76
|
-
stat_orig = stat
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
r += 1 if klass.stat >= stat_orig
|
85
|
-
else
|
86
|
-
r += 1 if klass.stat <= stat_orig
|
87
|
-
end
|
88
|
-
end
|
101
|
+
stat_orig = stat.round(5)
|
102
|
+
stat_new = stat_mc(shuffles)
|
103
|
+
|
104
|
+
r = if stat_orig.positive?
|
105
|
+
(stat_new >= stat_orig).count
|
106
|
+
else
|
107
|
+
(stat_new <= stat_orig).count
|
108
|
+
end
|
89
109
|
|
90
110
|
(r + 1.0) / (permutations + 1.0)
|
91
111
|
end
|
92
112
|
|
93
113
|
private
|
94
114
|
|
115
|
+
def stat_mc(_shuffles)
|
116
|
+
raise NotImplementedError, 'private method stat_mc not defined'
|
117
|
+
end
|
118
|
+
|
119
|
+
def w
|
120
|
+
@w ||= weights.dense
|
121
|
+
end
|
122
|
+
|
95
123
|
def gen_rng(seed)
|
96
124
|
if seed
|
97
125
|
Random.new(seed)
|