spatial_stats 0.2.2 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,34 @@
1
+ #ifndef CSR_MATRIX
2
+ #define CSR_MATRIX
3
+
4
+ typedef struct csr_matrix
5
+ {
6
+ char init;
7
+ int n;
8
+ int nnz;
9
+ double *values;
10
+ int *col_index;
11
+ int *row_index;
12
+ } csr_matrix;
13
+
14
+ void csr_matrix_free(void *mat);
15
+ size_t csr_matrix_memsize(const void *ptr);
16
+
17
+ // ruby VALUE for csr_matrix
18
+ static const rb_data_type_t csr_matrix_type = {
19
+ "SpatialStats::Weights::CSRMatrix",
20
+ {NULL, csr_matrix_free, csr_matrix_memsize},
21
+ 0,
22
+ 0,
23
+ RUBY_TYPED_FREE_IMMEDIATELY};
24
+
25
+ void mat_to_sparse(csr_matrix *csr, VALUE data, VALUE keys, VALUE num_rows);
26
+ VALUE csr_matrix_alloc(VALUE self);
27
+ VALUE csr_matrix_initialize(VALUE self, VALUE data, VALUE num_rows);
28
+ VALUE csr_matrix_values(VALUE self);
29
+ VALUE csr_matrix_col_index(VALUE self);
30
+ VALUE csr_matrix_row_index(VALUE self);
31
+ VALUE csr_matrix_mulvec(VALUE self, VALUE vec);
32
+ VALUE csr_matrix_dot_row(VALUE self, VALUE vec, VALUE row);
33
+ VALUE csr_matrix_coordinates(VALUE self);
34
+ #endif
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ create_header
6
+ create_makefile 'spatial_stats/spatial_stats'
@@ -0,0 +1,32 @@
1
+ #include <ruby.h>
2
+ #include "csr_matrix.h"
3
+
4
+ /**
5
+ * Document-class: SpatialStats::Weights::CSRMatrix
6
+ *
7
+ * CSRMatrix partially implements a compressed sparse row matrix to perform
8
+ * spatial lag and other calculations. This will generally be used
9
+ * to store the weights of an observation set.
10
+ *
11
+ * @see https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
12
+ *
13
+ */
14
+ void Init_spatial_stats()
15
+ {
16
+ VALUE spatial_stats_mod = rb_define_module("SpatialStats");
17
+ VALUE weights_mod = rb_define_module_under(spatial_stats_mod, "Weights");
18
+ VALUE csr_matrix_class = rb_define_class_under(weights_mod, "CSRMatrix", rb_cData);
19
+
20
+ rb_define_alloc_func(csr_matrix_class, csr_matrix_alloc);
21
+ rb_define_method(csr_matrix_class, "initialize", csr_matrix_initialize, 2);
22
+ rb_define_method(csr_matrix_class, "values", csr_matrix_values, 0);
23
+ rb_define_method(csr_matrix_class, "col_index", csr_matrix_col_index, 0);
24
+ rb_define_method(csr_matrix_class, "row_index", csr_matrix_row_index, 0);
25
+ rb_define_method(csr_matrix_class, "mulvec", csr_matrix_mulvec, 1);
26
+ rb_define_method(csr_matrix_class, "dot_row", csr_matrix_dot_row, 2);
27
+ rb_define_method(csr_matrix_class, "coordinates", csr_matrix_coordinates, 0);
28
+
29
+ rb_define_attr(csr_matrix_class, "m", 1, 0);
30
+ rb_define_attr(csr_matrix_class, "n", 1, 0);
31
+ rb_define_attr(csr_matrix_class, "nnz", 1, 0);
32
+ }
@@ -6,6 +6,7 @@ require 'spatial_stats/global'
6
6
  require 'spatial_stats/local'
7
7
  require 'spatial_stats/narray_ext'
8
8
  require 'spatial_stats/queries'
9
+ require 'spatial_stats/spatial_stats' # c extensions
9
10
  require 'spatial_stats/utils'
10
11
  require 'spatial_stats/weights'
11
12
 
@@ -19,9 +19,25 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
+ end
24
+
25
+ ##
26
+ # A new instance of BivariateMoran, from vector and weights.
27
+ #
28
+ # @param [Array] x observations of dataset
29
+ # @param [WeightsMatrix] weights to define relationships between observations
30
+ #
31
+ # @return [BivariateMoran]
32
+ def self.from_observations(x, y, weights)
33
+ n = weights.n
34
+ raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
35
+
36
+ instance = new(nil, nil, nil, weights.standardize)
37
+ instance.x = x
38
+ instance.y = y
39
+ instance
23
40
  end
24
- attr_writer :x, :y
25
41
 
26
42
  ##
27
43
  # Computes the global spatial correlation of x against spatially lagged
@@ -29,8 +45,7 @@ module SpatialStats
29
45
  #
30
46
  # @return [Float]
31
47
  def stat
32
- w = @weights.standardized
33
- y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
48
+ y_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, y)
34
49
  numerator = 0
35
50
  x.each_with_index do |xi, idx|
36
51
  numerator += xi * y_lag[idx]
@@ -55,19 +70,20 @@ module SpatialStats
55
70
  #
56
71
  # @return [Float]
57
72
  def variance
58
- n = @weights.n
59
- wij = @weights.full
60
- w = wij.sum
73
+ n = weights.n
74
+ w_sum = n.to_f
61
75
  e = expectation
62
76
 
63
- s1 = s1_calc(n, wij)
64
- s2 = s2_calc(n, wij)
77
+ wij = weights.sparse.coordinates
78
+
79
+ s1 = s1_calc(wij)
80
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
65
81
  s3 = s3_calc(n, x)
66
82
 
67
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
68
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
83
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
84
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
69
85
 
70
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
86
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
71
87
  var_right = e**2
72
88
  var_left - var_right
73
89
  end
@@ -88,6 +104,19 @@ module SpatialStats
88
104
  mc_bv(permutations, seed)
89
105
  end
90
106
 
107
+ ##
108
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
109
+ # in a hash.
110
+ #
111
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
112
+ # @param [Integer] seed used in random number generator for shuffles.
113
+ #
114
+ # @return [Hash]
115
+ def summary(permutations = 99, seed = nil)
116
+ p_val = mc(permutations, seed)
117
+ { stat: stat, p: p_val }
118
+ end
119
+
91
120
  ##
92
121
  # Standardized variables queried from +x_field+.
93
122
  #
@@ -108,32 +137,41 @@ module SpatialStats
108
137
 
109
138
  private
110
139
 
140
+ def stat_mc(perms)
141
+ x_arr = Numo::DFloat.cast(x)
142
+ lag = w.dot(perms.transpose)
143
+ x_arr.dot(lag) / (x_arr**2).sum
144
+ end
145
+
111
146
  def s3_calc(n, zs)
112
147
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
113
148
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
114
149
  numerator / denominator
115
150
  end
116
151
 
117
- def s2_calc(n, wij)
152
+ def s2_calc(n, wij, row_index)
118
153
  s2 = 0
119
- (0..n - 1).each do |i|
154
+ wij_arr = wij.to_a # for row slicing
155
+ (0..n - 1).each do |idx|
156
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
120
157
  left_term = 0
121
158
  right_term = 0
122
- (0..n - 1).each do |j|
123
- left_term += wij[i, j]
124
- right_term += wij[j, i]
159
+
160
+ row.each do |coords, val|
161
+ left_term += val
162
+ right_term += wij[coords.reverse] || 0
125
163
  end
126
164
  s2 += (left_term + right_term)**2
127
165
  end
128
166
  s2
129
167
  end
130
168
 
131
- def s1_calc(n, wij)
169
+ def s1_calc(wij)
132
170
  s1 = 0
133
- (0..n - 1).each do |i|
134
- (0..n - 1).each do |j|
135
- s1 += (wij[i, j] + wij[j, i])**2
136
- end
171
+ wij.each do |coords, val|
172
+ # (wij + wji)**2
173
+ wji = wij[coords.reverse] || 0
174
+ s1 += (val + wji)**2
137
175
  end
138
176
  s1 / 2
139
177
  end
@@ -18,7 +18,6 @@ module SpatialStats
18
18
  def initialize(scope, field, weights)
19
19
  super(scope, field, weights)
20
20
  end
21
- attr_writer :x
22
21
 
23
22
  ##
24
23
  # Computes the global spatial autocorrelation of x against a spatially
@@ -29,8 +28,7 @@ module SpatialStats
29
28
  # compute's Moran's I. numerator is sum of zi * spatial lag of zi
30
29
  # denominator is sum of zi**2.
31
30
  # have to use row-standardized weights
32
- w = @weights.standardized
33
- z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
31
+ z_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, z)
34
32
  numerator = 0
35
33
  z.each_with_index do |zi, j|
36
34
  row_sum = zi * z_lag[j]
@@ -49,7 +47,7 @@ module SpatialStats
49
47
  # @return [Float]
50
48
  def expectation
51
49
  # -1/(n-1)
52
- -1.0 / (@weights.n - 1)
50
+ -1.0 / (weights.n - 1)
53
51
  end
54
52
 
55
53
  ##
@@ -58,19 +56,20 @@ module SpatialStats
58
56
  #
59
57
  # @return [Float]
60
58
  def variance
61
- n = @weights.n
62
- wij = @weights.full
63
- w = wij.sum
59
+ n = weights.n
60
+ w_sum = n # standardized weights
64
61
  e = expectation
65
62
 
66
- s1 = s1_calc(n, wij)
67
- s2 = s2_calc(n, wij)
63
+ wij = weights.sparse.coordinates
64
+
65
+ s1 = s1_calc(wij)
66
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
68
67
  s3 = s3_calc(n, z)
69
68
 
70
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
71
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
69
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
70
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
72
71
 
73
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
72
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
74
73
  var_right = e**2
75
74
  var_left - var_right
76
75
  end
@@ -92,58 +91,66 @@ module SpatialStats
92
91
  end
93
92
 
94
93
  ##
95
- # Values of the +field+ queried from the +scope+
94
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
95
+ # in a hash.
96
96
  #
97
- # @return [Array]
98
- def x
99
- @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
100
- end
101
-
102
- # TODO: remove these last 2 methods and just standardize x.
103
- ##
104
- # Mean of x
97
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
98
+ # @param [Integer] seed used in random number generator for shuffles.
105
99
  #
106
- # @return [Float]
107
- def zbar
108
- x.sum / x.size
100
+ # @return [Hash]
101
+ def summary(permutations = 99, seed = nil)
102
+ p_val = mc(permutations, seed)
103
+ { stat: stat, p: p_val }
109
104
  end
110
105
 
111
106
  ##
112
- # Array of xi - zbar for i: [0:n-1]
107
+ # Values of the +field+ queried from the +scope+
113
108
  #
114
109
  # @return [Array]
115
- def z
116
- x.map { |val| val - zbar }
110
+ def x
111
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
112
+ .standardize
117
113
  end
114
+ alias z x
118
115
 
119
116
  private
120
117
 
118
+ def stat_mc(perms)
119
+ z_arr = Numo::DFloat.cast(z)
120
+ lag = w.dot(perms.transpose)
121
+ z_arr.dot(lag) / (z_arr**2).sum
122
+ end
123
+
121
124
  def s3_calc(n, zs)
122
125
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
123
126
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
124
127
  numerator / denominator
125
128
  end
126
129
 
127
- def s2_calc(n, wij)
130
+ # use row_index to take slices of wij
131
+ def s2_calc(n, wij, row_index)
128
132
  s2 = 0
133
+ wij_arr = wij.to_a # for row slicing
129
134
  (0..n - 1).each do |idx|
135
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
130
136
  left_term = 0
131
137
  right_term = 0
132
- (0..n - 1).each do |j|
133
- left_term += wij[idx, j]
134
- right_term += wij[j, idx]
138
+
139
+ row.each do |coords, val|
140
+ left_term += val
141
+ right_term += wij[coords.reverse] || 0
135
142
  end
136
143
  s2 += (left_term + right_term)**2
137
144
  end
138
145
  s2
139
146
  end
140
147
 
141
- def s1_calc(n, wij)
148
+ def s1_calc(wij)
142
149
  s1 = 0
143
- (0..n - 1).each do |idx|
144
- (0..n - 1).each do |j|
145
- s1 += (wij[idx, j] + wij[j, idx])**2
146
- end
150
+ wij.each do |coords, val|
151
+ # (wij + wji)**2
152
+ wji = wij[coords.reverse] || 0
153
+ s1 += (val + wji)**2
147
154
  end
148
155
  s1 / 2
149
156
  end
@@ -11,10 +11,25 @@ module SpatialStats
11
11
  def initialize(scope, field, weights)
12
12
  @scope = scope
13
13
  @field = field
14
- @weights = weights
14
+ @weights = weights.standardize
15
15
  end
16
16
  attr_accessor :scope, :field, :weights
17
17
 
18
+ ##
19
+ # A new instance of Stat, from vector and weights.
20
+ #
21
+ # @param [Array] x observations of dataset
22
+ # @param [WeightsMatrix] weights to define relationships between observations
23
+ #
24
+ # @return [Stat]
25
+ def self.from_observations(x, weights)
26
+ raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
27
+
28
+ instance = new(nil, nil, weights.standardize)
29
+ instance.x = x
30
+ instance
31
+ end
32
+
18
33
  def stat
19
34
  raise NotImplementedError, 'method stat not defined'
20
35
  end
@@ -39,27 +54,36 @@ module SpatialStats
39
54
  (stat - expectation) / Math.sqrt(variance)
40
55
  end
41
56
 
57
+ def x=(values)
58
+ @x = values.standardize
59
+ end
60
+ alias z= x=
61
+
62
+ def y=(values)
63
+ @y = values.standardize
64
+ end
65
+
42
66
  def mc(permutations, seed)
43
67
  rng = gen_rng(seed)
44
68
  shuffles = []
45
69
  permutations.times do
46
70
  shuffles << x.shuffle(random: rng)
47
71
  end
72
+ shuffles = Numo::DFloat.cast(shuffles)
73
+
48
74
  # r is the number of equal to or more extreme samples
49
75
  # one sided
50
- stat_orig = stat
51
- r = 0
52
- shuffles.each do |shuffle|
53
- klass = self.class.new(@scope, @field, @weights)
54
- klass.x = shuffle
55
-
56
- # https://geodacenter.github.io/glossary.html#ppvalue
57
- if stat_orig.positive?
58
- r += 1 if klass.stat >= stat_orig
59
- else
60
- r += 1 if klass.stat <= stat_orig
61
- end
62
- end
76
+ stat_orig = stat.round(5)
77
+ # r = 0
78
+
79
+ # compute new stat values
80
+ stat_new = stat_mc(shuffles)
81
+
82
+ r = if stat_orig.positive?
83
+ (stat_new >= stat_orig).count
84
+ else
85
+ (stat_new <= stat_orig).count
86
+ end
63
87
 
64
88
  (r + 1.0) / (permutations + 1.0)
65
89
  end
@@ -71,27 +95,31 @@ module SpatialStats
71
95
  permutations.times do
72
96
  shuffles << y.shuffle(random: rng)
73
97
  end
98
+ shuffles = Numo::DFloat.cast(shuffles)
74
99
 
75
100
  # r is the number of equal to or more extreme samples
76
- stat_orig = stat
77
- r = 0
78
- shuffles.each do |shuffle|
79
- klass = self.class.new(@scope, @x_field, @y_field, @weights)
80
- klass.x = x
81
- klass.y = shuffle
82
-
83
- if stat_orig.positive?
84
- r += 1 if klass.stat >= stat_orig
85
- else
86
- r += 1 if klass.stat <= stat_orig
87
- end
88
- end
101
+ stat_orig = stat.round(5)
102
+ stat_new = stat_mc(shuffles)
103
+
104
+ r = if stat_orig.positive?
105
+ (stat_new >= stat_orig).count
106
+ else
107
+ (stat_new <= stat_orig).count
108
+ end
89
109
 
90
110
  (r + 1.0) / (permutations + 1.0)
91
111
  end
92
112
 
93
113
  private
94
114
 
115
+ def stat_mc(_shuffles)
116
+ raise NotImplementedError, 'private method stat_mc not defined'
117
+ end
118
+
119
+ def w
120
+ @w ||= weights.dense
121
+ end
122
+
95
123
  def gen_rng(seed)
96
124
  if seed
97
125
  Random.new(seed)