spatial_stats 0.2.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ #ifndef CSR_MATRIX
2
+ #define CSR_MATRIX
3
+
4
+ typedef struct csr_matrix
5
+ {
6
+ char init;
7
+ int n;
8
+ int nnz;
9
+ double *values;
10
+ int *col_index;
11
+ int *row_index;
12
+ } csr_matrix;
13
+
14
+ void csr_matrix_free(void *mat);
15
+ size_t csr_matrix_memsize(const void *ptr);
16
+
17
+ // ruby VALUE for csr_matrix
18
+ static const rb_data_type_t csr_matrix_type = {
19
+ "SpatialStats::Weights::CSRMatrix",
20
+ {NULL, csr_matrix_free, csr_matrix_memsize},
21
+ 0,
22
+ 0,
23
+ RUBY_TYPED_FREE_IMMEDIATELY};
24
+
25
+ void mat_to_sparse(csr_matrix *csr, VALUE data, VALUE keys, VALUE num_rows);
26
+ VALUE csr_matrix_alloc(VALUE self);
27
+ VALUE csr_matrix_initialize(VALUE self, VALUE data, VALUE num_rows);
28
+ VALUE csr_matrix_values(VALUE self);
29
+ VALUE csr_matrix_col_index(VALUE self);
30
+ VALUE csr_matrix_row_index(VALUE self);
31
+ VALUE csr_matrix_mulvec(VALUE self, VALUE vec);
32
+ VALUE csr_matrix_dot_row(VALUE self, VALUE vec, VALUE row);
33
+ VALUE csr_matrix_coordinates(VALUE self);
34
+ #endif
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ create_header
6
+ create_makefile 'spatial_stats/spatial_stats'
@@ -0,0 +1,32 @@
1
+ #include <ruby.h>
2
+ #include "csr_matrix.h"
3
+
4
+ /**
5
+ * Document-class: SpatialStats::Weights::CSRMatrix
6
+ *
7
+ * CSRMatrix partially implements a compressed sparse row matrix to perform
8
+ * spatial lag and other calculations. This will generally be used
9
+ * to store the weights of an observation set.
10
+ *
11
+ * @see https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
12
+ *
13
+ */
14
+ void Init_spatial_stats()
15
+ {
16
+ VALUE spatial_stats_mod = rb_define_module("SpatialStats");
17
+ VALUE weights_mod = rb_define_module_under(spatial_stats_mod, "Weights");
18
+ VALUE csr_matrix_class = rb_define_class_under(weights_mod, "CSRMatrix", rb_cData);
19
+
20
+ rb_define_alloc_func(csr_matrix_class, csr_matrix_alloc);
21
+ rb_define_method(csr_matrix_class, "initialize", csr_matrix_initialize, 2);
22
+ rb_define_method(csr_matrix_class, "values", csr_matrix_values, 0);
23
+ rb_define_method(csr_matrix_class, "col_index", csr_matrix_col_index, 0);
24
+ rb_define_method(csr_matrix_class, "row_index", csr_matrix_row_index, 0);
25
+ rb_define_method(csr_matrix_class, "mulvec", csr_matrix_mulvec, 1);
26
+ rb_define_method(csr_matrix_class, "dot_row", csr_matrix_dot_row, 2);
27
+ rb_define_method(csr_matrix_class, "coordinates", csr_matrix_coordinates, 0);
28
+
29
+ rb_define_attr(csr_matrix_class, "m", 1, 0);
30
+ rb_define_attr(csr_matrix_class, "n", 1, 0);
31
+ rb_define_attr(csr_matrix_class, "nnz", 1, 0);
32
+ }
@@ -6,6 +6,7 @@ require 'spatial_stats/global'
6
6
  require 'spatial_stats/local'
7
7
  require 'spatial_stats/narray_ext'
8
8
  require 'spatial_stats/queries'
9
+ require 'spatial_stats/spatial_stats' # c extensions
9
10
  require 'spatial_stats/utils'
10
11
  require 'spatial_stats/weights'
11
12
 
@@ -19,9 +19,25 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
+ end
24
+
25
+ ##
26
+ # A new instance of BivariateMoran, from vector and weights.
27
+ #
28
+ # @param [Array] x observations of dataset
29
+ # @param [WeightsMatrix] weights to define relationships between observations
30
+ #
31
+ # @return [BivariateMoran]
32
+ def self.from_observations(x, y, weights)
33
+ n = weights.n
34
+ raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
35
+
36
+ instance = new(nil, nil, nil, weights.standardize)
37
+ instance.x = x
38
+ instance.y = y
39
+ instance
23
40
  end
24
- attr_writer :x, :y
25
41
 
26
42
  ##
27
43
  # Computes the global spatial correlation of x against spatially lagged
@@ -29,8 +45,7 @@ module SpatialStats
29
45
  #
30
46
  # @return [Float]
31
47
  def stat
32
- w = @weights.standardized
33
- y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
48
+ y_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, y)
34
49
  numerator = 0
35
50
  x.each_with_index do |xi, idx|
36
51
  numerator += xi * y_lag[idx]
@@ -55,19 +70,20 @@ module SpatialStats
55
70
  #
56
71
  # @return [Float]
57
72
  def variance
58
- n = @weights.n
59
- wij = @weights.full
60
- w = wij.sum
73
+ n = weights.n
74
+ w_sum = n.to_f
61
75
  e = expectation
62
76
 
63
- s1 = s1_calc(n, wij)
64
- s2 = s2_calc(n, wij)
77
+ wij = weights.sparse.coordinates
78
+
79
+ s1 = s1_calc(wij)
80
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
65
81
  s3 = s3_calc(n, x)
66
82
 
67
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
68
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
83
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
84
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
69
85
 
70
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
86
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
71
87
  var_right = e**2
72
88
  var_left - var_right
73
89
  end
@@ -88,6 +104,19 @@ module SpatialStats
88
104
  mc_bv(permutations, seed)
89
105
  end
90
106
 
107
+ ##
108
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
109
+ # in a hash.
110
+ #
111
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
112
+ # @param [Integer] seed used in random number generator for shuffles.
113
+ #
114
+ # @return [Hash]
115
+ def summary(permutations = 99, seed = nil)
116
+ p_val = mc(permutations, seed)
117
+ { stat: stat, p: p_val }
118
+ end
119
+
91
120
  ##
92
121
  # Standardized variables queried from +x_field+.
93
122
  #
@@ -108,32 +137,41 @@ module SpatialStats
108
137
 
109
138
  private
110
139
 
140
+ def stat_mc(perms)
141
+ x_arr = Numo::DFloat.cast(x)
142
+ lag = w.dot(perms.transpose)
143
+ x_arr.dot(lag) / (x_arr**2).sum
144
+ end
145
+
111
146
  def s3_calc(n, zs)
112
147
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
113
148
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
114
149
  numerator / denominator
115
150
  end
116
151
 
117
- def s2_calc(n, wij)
152
+ def s2_calc(n, wij, row_index)
118
153
  s2 = 0
119
- (0..n - 1).each do |i|
154
+ wij_arr = wij.to_a # for row slicing
155
+ (0..n - 1).each do |idx|
156
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
120
157
  left_term = 0
121
158
  right_term = 0
122
- (0..n - 1).each do |j|
123
- left_term += wij[i, j]
124
- right_term += wij[j, i]
159
+
160
+ row.each do |coords, val|
161
+ left_term += val
162
+ right_term += wij[coords.reverse] || 0
125
163
  end
126
164
  s2 += (left_term + right_term)**2
127
165
  end
128
166
  s2
129
167
  end
130
168
 
131
- def s1_calc(n, wij)
169
+ def s1_calc(wij)
132
170
  s1 = 0
133
- (0..n - 1).each do |i|
134
- (0..n - 1).each do |j|
135
- s1 += (wij[i, j] + wij[j, i])**2
136
- end
171
+ wij.each do |coords, val|
172
+ # (wij + wji)**2
173
+ wji = wij[coords.reverse] || 0
174
+ s1 += (val + wji)**2
137
175
  end
138
176
  s1 / 2
139
177
  end
@@ -18,7 +18,6 @@ module SpatialStats
18
18
  def initialize(scope, field, weights)
19
19
  super(scope, field, weights)
20
20
  end
21
- attr_writer :x
22
21
 
23
22
  ##
24
23
  # Computes the global spatial autocorrelation of x against a spatially
@@ -29,8 +28,7 @@ module SpatialStats
29
28
  # compute's Moran's I. numerator is sum of zi * spatial lag of zi
30
29
  # denominator is sum of zi**2.
31
30
  # have to use row-standardized weights
32
- w = @weights.standardized
33
- z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
31
+ z_lag = SpatialStats::Utils::Lag.neighbor_sum(weights, z)
34
32
  numerator = 0
35
33
  z.each_with_index do |zi, j|
36
34
  row_sum = zi * z_lag[j]
@@ -49,7 +47,7 @@ module SpatialStats
49
47
  # @return [Float]
50
48
  def expectation
51
49
  # -1/(n-1)
52
- -1.0 / (@weights.n - 1)
50
+ -1.0 / (weights.n - 1)
53
51
  end
54
52
 
55
53
  ##
@@ -58,19 +56,20 @@ module SpatialStats
58
56
  #
59
57
  # @return [Float]
60
58
  def variance
61
- n = @weights.n
62
- wij = @weights.full
63
- w = wij.sum
59
+ n = weights.n
60
+ w_sum = n # standardized weights
64
61
  e = expectation
65
62
 
66
- s1 = s1_calc(n, wij)
67
- s2 = s2_calc(n, wij)
63
+ wij = weights.sparse.coordinates
64
+
65
+ s1 = s1_calc(wij)
66
+ s2 = s2_calc(n, wij, weights.sparse.row_index)
68
67
  s3 = s3_calc(n, z)
69
68
 
70
- s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
71
- s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
69
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w_sum**2)
70
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w_sum**2)
72
71
 
73
- var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
72
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w_sum**2)
74
73
  var_right = e**2
75
74
  var_left - var_right
76
75
  end
@@ -92,58 +91,66 @@ module SpatialStats
92
91
  end
93
92
 
94
93
  ##
95
- # Values of the +field+ queried from the +scope+
94
+ # Summary of the statistic. Computes +stat+ and +mc+ and returns the values
95
+ # in a hash.
96
96
  #
97
- # @return [Array]
98
- def x
99
- @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
100
- end
101
-
102
- # TODO: remove these last 2 methods and just standardize x.
103
- ##
104
- # Mean of x
97
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
98
+ # @param [Integer] seed used in random number generator for shuffles.
105
99
  #
106
- # @return [Float]
107
- def zbar
108
- x.sum / x.size
100
+ # @return [Hash]
101
+ def summary(permutations = 99, seed = nil)
102
+ p_val = mc(permutations, seed)
103
+ { stat: stat, p: p_val }
109
104
  end
110
105
 
111
106
  ##
112
- # Array of xi - zbar for i: [0:n-1]
107
+ # Values of the +field+ queried from the +scope+
113
108
  #
114
109
  # @return [Array]
115
- def z
116
- x.map { |val| val - zbar }
110
+ def x
111
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
112
+ .standardize
117
113
  end
114
+ alias z x
118
115
 
119
116
  private
120
117
 
118
+ def stat_mc(perms)
119
+ z_arr = Numo::DFloat.cast(z)
120
+ lag = w.dot(perms.transpose)
121
+ z_arr.dot(lag) / (z_arr**2).sum
122
+ end
123
+
121
124
  def s3_calc(n, zs)
122
125
  numerator = (1.0 / n) * zs.sum { |v| v**4 }
123
126
  denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
124
127
  numerator / denominator
125
128
  end
126
129
 
127
- def s2_calc(n, wij)
130
+ # use row_index to take slices of wij
131
+ def s2_calc(n, wij, row_index)
128
132
  s2 = 0
133
+ wij_arr = wij.to_a # for row slicing
129
134
  (0..n - 1).each do |idx|
135
+ row = wij_arr[row_index[idx]..(row_index[idx + 1] - 1)]
130
136
  left_term = 0
131
137
  right_term = 0
132
- (0..n - 1).each do |j|
133
- left_term += wij[idx, j]
134
- right_term += wij[j, idx]
138
+
139
+ row.each do |coords, val|
140
+ left_term += val
141
+ right_term += wij[coords.reverse] || 0
135
142
  end
136
143
  s2 += (left_term + right_term)**2
137
144
  end
138
145
  s2
139
146
  end
140
147
 
141
- def s1_calc(n, wij)
148
+ def s1_calc(wij)
142
149
  s1 = 0
143
- (0..n - 1).each do |idx|
144
- (0..n - 1).each do |j|
145
- s1 += (wij[idx, j] + wij[j, idx])**2
146
- end
150
+ wij.each do |coords, val|
151
+ # (wij + wji)**2
152
+ wji = wij[coords.reverse] || 0
153
+ s1 += (val + wji)**2
147
154
  end
148
155
  s1 / 2
149
156
  end
@@ -11,10 +11,25 @@ module SpatialStats
11
11
  def initialize(scope, field, weights)
12
12
  @scope = scope
13
13
  @field = field
14
- @weights = weights
14
+ @weights = weights.standardize
15
15
  end
16
16
  attr_accessor :scope, :field, :weights
17
17
 
18
+ ##
19
+ # A new instance of Stat, from vector and weights.
20
+ #
21
+ # @param [Array] x observations of dataset
22
+ # @param [WeightsMatrix] weights to define relationships between observations
23
+ #
24
+ # @return [Stat]
25
+ def self.from_observations(x, weights)
26
+ raise ArgumentError, 'Data size != weights.n' if x.size != weights.n
27
+
28
+ instance = new(nil, nil, weights.standardize)
29
+ instance.x = x
30
+ instance
31
+ end
32
+
18
33
  def stat
19
34
  raise NotImplementedError, 'method stat not defined'
20
35
  end
@@ -39,27 +54,36 @@ module SpatialStats
39
54
  (stat - expectation) / Math.sqrt(variance)
40
55
  end
41
56
 
57
+ def x=(values)
58
+ @x = values.standardize
59
+ end
60
+ alias z= x=
61
+
62
+ def y=(values)
63
+ @y = values.standardize
64
+ end
65
+
42
66
  def mc(permutations, seed)
43
67
  rng = gen_rng(seed)
44
68
  shuffles = []
45
69
  permutations.times do
46
70
  shuffles << x.shuffle(random: rng)
47
71
  end
72
+ shuffles = Numo::DFloat.cast(shuffles)
73
+
48
74
  # r is the number of equal to or more extreme samples
49
75
  # one sided
50
- stat_orig = stat
51
- r = 0
52
- shuffles.each do |shuffle|
53
- klass = self.class.new(@scope, @field, @weights)
54
- klass.x = shuffle
55
-
56
- # https://geodacenter.github.io/glossary.html#ppvalue
57
- if stat_orig.positive?
58
- r += 1 if klass.stat >= stat_orig
59
- else
60
- r += 1 if klass.stat <= stat_orig
61
- end
62
- end
76
+ stat_orig = stat.round(5)
77
+ # r = 0
78
+
79
+ # compute new stat values
80
+ stat_new = stat_mc(shuffles)
81
+
82
+ r = if stat_orig.positive?
83
+ (stat_new >= stat_orig).count
84
+ else
85
+ (stat_new <= stat_orig).count
86
+ end
63
87
 
64
88
  (r + 1.0) / (permutations + 1.0)
65
89
  end
@@ -71,27 +95,31 @@ module SpatialStats
71
95
  permutations.times do
72
96
  shuffles << y.shuffle(random: rng)
73
97
  end
98
+ shuffles = Numo::DFloat.cast(shuffles)
74
99
 
75
100
  # r is the number of equal to or more extreme samples
76
- stat_orig = stat
77
- r = 0
78
- shuffles.each do |shuffle|
79
- klass = self.class.new(@scope, @x_field, @y_field, @weights)
80
- klass.x = x
81
- klass.y = shuffle
82
-
83
- if stat_orig.positive?
84
- r += 1 if klass.stat >= stat_orig
85
- else
86
- r += 1 if klass.stat <= stat_orig
87
- end
88
- end
101
+ stat_orig = stat.round(5)
102
+ stat_new = stat_mc(shuffles)
103
+
104
+ r = if stat_orig.positive?
105
+ (stat_new >= stat_orig).count
106
+ else
107
+ (stat_new <= stat_orig).count
108
+ end
89
109
 
90
110
  (r + 1.0) / (permutations + 1.0)
91
111
  end
92
112
 
93
113
  private
94
114
 
115
+ def stat_mc(_shuffles)
116
+ raise NotImplementedError, 'private method stat_mc not defined'
117
+ end
118
+
119
+ def w
120
+ @w ||= weights.dense
121
+ end
122
+
95
123
  def gen_rng(seed)
96
124
  if seed
97
125
  Random.new(seed)