spatial_stats 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08787d83d402843b9d711fd88cdc5f22cbdacfb7a960d8bdb185591f10215f85'
4
- data.tar.gz: 96241b1ef7099ce6371f24dee56258121e0d6686ec746ac0411c4ed8ccaf5cad
3
+ metadata.gz: 3d63578b1952570f2de9a3ab045c8a977988960c2942c73416b86de6b228a091
4
+ data.tar.gz: 591927bf3c2fd3d7723b7e155bbae6a2c73313b45c749c6ab552e445e5f3b609
5
5
  SHA512:
6
- metadata.gz: 4ec63da930afeae2b68e3f821e82c8c18f18b11ffa3296ad34e56706b54db11170e5319133d84e9440360e984776cdc5adbd6193d7de252f6f618136a431696b
7
- data.tar.gz: 13964eb6a903e5ba0881b0ea2dfeab5e10fcfe51722d9e8a3342f2084200a60bc046d7375e29f651a5267d04a7b9b3a571f26b1ecf13ebf1c3fa2a3211ac6d8c
6
+ metadata.gz: dfe901b03685c5d66539793d7446738c49b0cdaae3022559544620962aa9d63e4d668d0177b8dc3c44cff797ce5c8ebed8fdeba87a52ec28ebdf0d02b69625b3
7
+ data.tar.gz: 3cfde56bb984bfbf3ab83bb89e02797a24eb47b1a8387e695890127ee5711f185a114f300906f96728c0e51996f223091f0a9455713f8fb3c3ff34b8285615da
data/README.md CHANGED
@@ -58,8 +58,6 @@ weights = SpatialStats::Weights::Distant.idw_knn(scope, :geom, 5)
58
58
 
59
59
  Weight matrices can be defined by a hash that describes each key's neighbor and weight.
60
60
 
61
- Note: Currently, the keys must be numeric.
62
-
63
61
  Example: Define WeightsMatrix and get the matrix in row_standardized format.
64
62
 
65
63
  ```ruby
@@ -70,30 +68,40 @@ weights = {
70
68
  4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
71
69
  }
72
70
  keys = weights.keys
73
- wm = SpatialStats::Weights::WeightsMatrix.new(keys, weights)
71
+ wm = SpatialStats::Weights::WeightsMatrix.new(weights)
74
72
  # => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>1}, {:id=>4, :weight=>1}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>1}, {:id=>3, :weight=>1}]}, @n=4>
75
73
 
76
- wm.standardized
77
- # => Numo::DFloat#shape=[4,4]
78
- #[[0, 0.5, 0, 0.5],
79
- # [1, 0, 0, 0],
80
- # [0, 0, 0, 1],
81
- # [0.5, 0, 0.5, 0]]
74
+ wm = wm.standardize
75
+ # => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>0.5}, {:id=>4, :weight=>0.5}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>0.5}, {:id=>3, :weight=>0.5}]}, @n=4>
76
+
77
+ wm.dense
78
+ # => Numo::DFloat[
79
+ # [0, 0.5, 0, 0.5],
80
+ # [1, 0, 0, 0],
81
+ # [0, 0, 0, 1],
82
+ # [0.5, 0, 0.5, 0]
83
+ # ]
84
+
85
+ wm.sparse
86
+ # => #<SpatialStats::Weights::CSRMatrix @m=4, @n=4, @nnz=6>
82
87
  ```
83
88
 
84
89
  ### Lagged Variables
85
90
 
86
- Spatially lagged variables can be computed with a 2-D n x n `Numo::NArray` and 1-D vector (`Array` or `Numo::NArray`).
91
+ Spatially lagged variables can be computed with weights matrix and 1-D vector (`Array`).
87
92
 
88
93
  #### Compute a Lagged Variable
89
94
 
90
95
  ```ruby
91
- w = Numo::DFloat[[0, 0.5, 0, 0.5],
92
- [1, 0, 0, 0],
93
- [0, 0, 0, 1],
94
- [0.5, 0, 0.5, 0]]
96
+ weights = {
97
+ 1 => [{ id: 2, weight: 1 }, { id: 4, weight: 1 }],
98
+ 2 => [{ id: 1, weight: 1 }],
99
+ 3 => [{ id: 4, weight: 1 }],
100
+ 4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
101
+ }
102
+ wm = SpatialStats::Weights::WeightsMatrix.new(weights).standardize
95
103
  vec = [1, 2, 3, 4]
96
- lagged_var = SpatialStats::Utils::Lag.neighbor_sum(w, vec)
104
+ lagged_var = SpatialStats::Utils::Lag.neighbor_sum(wm, vec)
97
105
  # => [3.0, 1.0, 4.0, 2.0]
98
106
  ```
99
107
 
@@ -60,35 +60,39 @@ module SpatialStats
60
60
  # of indices, which will return a list of new orders for the fields.
61
61
  # They will then be shuffled corresponding to the new indices.
62
62
  rng = gen_rng(seed)
63
- n = w.shape[0]
64
- indices = (0..(n - 1)).to_a
65
- shuffles = crand(indices, permutations, rng)
63
+ rids = crand(permutations, rng)
66
64
 
65
+ n_1 = weights.n - 1
66
+ sparse = weights.sparse
67
+ row_index = sparse.row_index
68
+ ws = sparse.values
69
+ wc = weights.wc
67
70
  stat_orig = stat
68
- rs = [0] * n
69
71
 
70
- row_index = weights.sparse.row_index
71
- ws = weights.sparse.values
72
-
73
- idx = 0
74
- while idx < n
75
- stat_i_orig = stat_orig[idx]
72
+ ids = (0..n_1).to_a
73
+ observations = Array.new(weights.n)
74
+ (0..n_1).each do |idx|
75
+ idsi = ids.dup
76
+ idsi.delete_at(idx)
77
+ idsi.shuffle!(random: rng)
78
+ idsi = Numo::Int32.cast(idsi)
79
+ sample = rids[idsi[rids[true, 0..wc[idx] - 1]]]
76
80
 
81
+ # account for case where there are no neighbors
77
82
  row_range = row_index[idx]..(row_index[idx + 1] - 1)
78
83
  if row_range.size.zero?
79
- rs[idx] = permutations
80
- idx += 1
84
+ observations[idx] = permutations
81
85
  next
82
86
  end
83
- wi = Numo::DFloat.cast(ws[row_range])
84
87
 
85
- # for each field, compute the C value at that index.
86
- stat_i_new = mc_i(wi, shuffles[idx], idx)
87
- rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new, permutations)
88
- idx += 1
88
+ wi = Numo::DFloat.cast(ws[row_range])
89
+ stat_i_new = mc_i(wi, sample, idx)
90
+ stat_i_orig = stat_orig[idx]
91
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
92
+ permutations)
89
93
  end
90
94
 
91
- rs.map do |ri|
95
+ observations.map do |ri|
92
96
  (ri + 1.0) / (permutations + 1.0)
93
97
  end
94
98
  end
@@ -42,12 +42,12 @@ module SpatialStats
42
42
 
43
43
  ##
44
44
  # Conditional randomization algorithm used in permutation testing.
45
- # Outputs an array of length n of Numo::DFloat matrices of
46
- # size m x num_neighbors. Where m is the number of permutations and
47
- # num_neighbors is the number of neighbors for that observation.
45
+ # Returns a matrix with permuted index values that will be used for
46
+ # selecting values from the original data set.
48
47
  #
49
- # The values are randomly permutated values from arr that will act
50
- # as its neighbors for that permutation.
48
+ # The width of the matrix is the max number of neighbors + 1
49
+ # which is way less than it would be if the original vector
50
+ # was shuffled in full.
51
51
  #
52
52
  # This is super important because most weight matrices are very
53
53
  # sparse so the amount of shuffling/multiplication that is done
@@ -55,9 +55,9 @@ module SpatialStats
55
55
  #
56
56
  # @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
57
57
  #
58
- # @return [Array] of Numo::Narray matrices
58
+ # @return [Numo::Int32] matrix of shape perms x wc_max + 1
59
59
  #
60
- def crand(arr, permutations, rng)
60
+ def crand(permutations, rng)
61
61
  # basing this off the ESDA method
62
62
  # need to get k for max_neighbors
63
63
  # and wc for cardinalities of each item
@@ -68,32 +68,13 @@ module SpatialStats
68
68
  # entry not the entire list of permutations for each entry.
69
69
  n_1 = weights.n - 1
70
70
 
71
- sparse = weights.sparse
72
- row_index = sparse.row_index
73
-
74
71
  # weight counts
75
- wc = Array.new(weights.n)
76
- k = 0
77
- (0..n_1).each do |idx|
78
- wc[idx] = row_index[idx + 1] - row_index[idx]
79
- end
80
-
72
+ wc = weights.wc
81
73
  k = wc.max + 1
82
74
  prange = (0..permutations - 1).to_a
83
75
 
84
- arr = Numo::DFloat.cast(arr)
85
-
86
- ids = (0..n_1).to_a
87
76
  ids_perm = (0..n_1 - 1).to_a
88
- rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
89
-
90
- (0..n_1).map do |idx|
91
- idsi = ids.dup
92
- idsi.delete_at(idx)
93
- idsi.shuffle!(random: rng)
94
- idsi = Numo::Int32.cast(idsi)
95
- arr[idsi[rids[true, 0..wc[idx] - 1]]]
96
- end
77
+ Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
97
78
  end
98
79
 
99
80
  ##
@@ -114,48 +95,40 @@ module SpatialStats
114
95
  # its neighbors. Then we will only test for that item instead
115
96
  # of the entire set. This will be done for each item.
116
97
  rng = gen_rng(seed)
117
- shuffles = crand(x, permutations, rng)
98
+ rids = crand(permutations, rng)
118
99
 
119
- n = weights.n
120
- # r is the number of equal to or more extreme samples
100
+ n_1 = weights.n - 1
101
+ sparse = weights.sparse
102
+ row_index = sparse.row_index
103
+ ws = sparse.values
104
+ wc = weights.wc
121
105
  stat_orig = stat
122
- rs = [0] * n
123
-
124
- row_index = weights.sparse.row_index
125
- ws = weights.sparse.values
126
106
 
127
- idx = 0
128
- while idx < n
129
- # need to truncate because floats from
130
- # c in sparse matrix are inconsistent with
131
- # dfloats
132
- stat_i_orig = stat_orig[idx]
107
+ arr = Numo::DFloat.cast(x)
108
+ ids = (0..n_1).to_a
109
+ observations = Array.new(weights.n)
110
+ (0..n_1).each do |idx|
111
+ idsi = ids.dup
112
+ idsi.delete_at(idx)
113
+ idsi.shuffle!(random: rng)
114
+ idsi = Numo::Int32.cast(idsi)
115
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
133
116
 
134
117
  # account for case where there are no neighbors
135
- # the way Numo handles negative ranges, it returns the max
136
- # so there will be a len 0 z array being multiplied by a
137
- # max_neighbor width permutation matrix.
138
- # Need to skip.
139
118
  row_range = row_index[idx]..(row_index[idx + 1] - 1)
140
119
  if row_range.size.zero?
141
- rs[idx] = permutations
142
- idx += 1
120
+ observations[idx] = permutations
143
121
  next
144
122
  end
145
- wi = Numo::DFloat.cast(ws[row_range])
146
- stat_i_new = mc_i(wi, shuffles[idx], idx)
147
123
 
148
- rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
149
- permutations)
150
- # rs[idx] = if stat_i_orig.positive?
151
- # (stat_i_new >= stat_i_orig).count
152
- # else
153
- # (stat_i_new <= stat_i_orig).count
154
- # end
155
- idx += 1
124
+ wi = Numo::DFloat.cast(ws[row_range])
125
+ stat_i_new = mc_i(wi, sample, idx)
126
+ stat_i_orig = stat_orig[idx]
127
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
128
+ permutations)
156
129
  end
157
130
 
158
- rs.map do |ri|
131
+ observations.map do |ri|
159
132
  (ri + 1.0) / (permutations + 1.0)
160
133
  end
161
134
  end
@@ -174,41 +147,40 @@ module SpatialStats
174
147
  # @return [Array] of p-values
175
148
  def mc_bv(permutations, seed)
176
149
  rng = gen_rng(seed)
177
- shuffles = crand(y, permutations, rng)
178
- n = weights.n
150
+ rids = crand(permutations, rng)
179
151
 
152
+ n_1 = weights.n - 1
153
+ sparse = weights.sparse
154
+ row_index = sparse.row_index
155
+ ws = sparse.values
156
+ wc = weights.wc
180
157
  stat_orig = stat
181
- rs = [0] * n
182
-
183
- row_index = weights.sparse.row_index
184
- ws = weights.sparse.values
185
158
 
186
- idx = 0
187
- while idx < n
188
- stat_i_orig = stat_orig[idx]
159
+ arr = Numo::DFloat.cast(y)
160
+ ids = (0..n_1).to_a
161
+ observations = Array.new(weights.n)
162
+ (0..n_1).each do |idx|
163
+ idsi = ids.dup
164
+ idsi.delete_at(idx)
165
+ idsi.shuffle!(random: rng)
166
+ idsi = Numo::Int32.cast(idsi)
167
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
189
168
 
169
+ # account for case where there are no neighbors
190
170
  row_range = row_index[idx]..(row_index[idx + 1] - 1)
191
171
  if row_range.size.zero?
192
- rs[idx] = permutations
193
- idx += 1
172
+ observations[idx] = permutations
194
173
  next
195
174
  end
196
- wi = Numo::DFloat.cast(ws[row_range])
197
-
198
- stat_i_new = mc_i(wi, shuffles[idx], idx)
199
-
200
- rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
201
- permutations)
202
- # if stat_i_orig.positive?
203
- # (stat_i_new >= stat_i_orig).count
204
- # else
205
- # (stat_i_new <= stat_i_orig).count
206
- # end
207
175
 
208
- idx += 1
176
+ wi = Numo::DFloat.cast(ws[row_range])
177
+ stat_i_new = mc_i(wi, sample, idx)
178
+ stat_i_orig = stat_orig[idx]
179
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
180
+ permutations)
209
181
  end
210
182
 
211
- rs.map do |ri|
183
+ observations.map do |ri|
212
184
  (ri + 1.0) / (permutations + 1.0)
213
185
  end
214
186
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpatialStats
4
- VERSION = '1.0.0'
4
+ VERSION = '1.0.1'
5
5
  end
@@ -58,6 +58,19 @@ module SpatialStats
58
58
  @sparse ||= CSRMatrix.new(dense.to_a.flatten, n, n)
59
59
  end
60
60
 
61
+ ##
62
+ # Compute the cardinalities of each neighbor into an array
63
+ #
64
+ # @return [Array]
65
+ def wc
66
+ @wc ||= begin
67
+ row_index = sparse.row_index
68
+ (0..n - 1).map do |idx|
69
+ row_index[idx + 1] - row_index[idx]
70
+ end
71
+ end
72
+ end
73
+
61
74
  ##
62
75
  # Row standardized version of the weights matrix.
63
76
  # Will return a new version of the weights matrix with standardized
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spatial_stats
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Keith Doggett
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-27 00:00:00.000000000 Z
11
+ date: 2020-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray