spatial_stats 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08787d83d402843b9d711fd88cdc5f22cbdacfb7a960d8bdb185591f10215f85'
4
- data.tar.gz: 96241b1ef7099ce6371f24dee56258121e0d6686ec746ac0411c4ed8ccaf5cad
3
+ metadata.gz: 3d63578b1952570f2de9a3ab045c8a977988960c2942c73416b86de6b228a091
4
+ data.tar.gz: 591927bf3c2fd3d7723b7e155bbae6a2c73313b45c749c6ab552e445e5f3b609
5
5
  SHA512:
6
- metadata.gz: 4ec63da930afeae2b68e3f821e82c8c18f18b11ffa3296ad34e56706b54db11170e5319133d84e9440360e984776cdc5adbd6193d7de252f6f618136a431696b
7
- data.tar.gz: 13964eb6a903e5ba0881b0ea2dfeab5e10fcfe51722d9e8a3342f2084200a60bc046d7375e29f651a5267d04a7b9b3a571f26b1ecf13ebf1c3fa2a3211ac6d8c
6
+ metadata.gz: dfe901b03685c5d66539793d7446738c49b0cdaae3022559544620962aa9d63e4d668d0177b8dc3c44cff797ce5c8ebed8fdeba87a52ec28ebdf0d02b69625b3
7
+ data.tar.gz: 3cfde56bb984bfbf3ab83bb89e02797a24eb47b1a8387e695890127ee5711f185a114f300906f96728c0e51996f223091f0a9455713f8fb3c3ff34b8285615da
data/README.md CHANGED
@@ -58,8 +58,6 @@ weights = SpatialStats::Weights::Distant.idw_knn(scope, :geom, 5)
58
58
 
59
59
  Weight matrices can be defined by a hash that describes each key's neighbor and weight.
60
60
 
61
- Note: Currently, the keys must be numeric.
62
-
63
61
  Example: Define WeightsMatrix and get the matrix in row_standardized format.
64
62
 
65
63
  ```ruby
@@ -70,30 +68,40 @@ weights = {
70
68
  4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
71
69
  }
72
70
  keys = weights.keys
73
- wm = SpatialStats::Weights::WeightsMatrix.new(keys, weights)
71
+ wm = SpatialStats::Weights::WeightsMatrix.new(weights)
74
72
  # => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>1}, {:id=>4, :weight=>1}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>1}, {:id=>3, :weight=>1}]}, @n=4>
75
73
 
76
- wm.standardized
77
- # => Numo::DFloat#shape=[4,4]
78
- #[[0, 0.5, 0, 0.5],
79
- # [1, 0, 0, 0],
80
- # [0, 0, 0, 1],
81
- # [0.5, 0, 0.5, 0]]
74
+ wm = wm.standardize
75
+ # => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>0.5}, {:id=>4, :weight=>0.5}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>0.5}, {:id=>3, :weight=>0.5}]}, @n=4>
76
+
77
+ wm.dense
78
+ # => Numo::DFloat[
79
+ # [0, 0.5, 0, 0.5],
80
+ # [1, 0, 0, 0],
81
+ # [0, 0, 0, 1],
82
+ # [0.5, 0, 0.5, 0]
83
+ # ]
84
+
85
+ wm.sparse
86
+ # => #<SpatialStats::Weights::CSRMatrix @m=4, @n=4, @nnz=6>
82
87
  ```
83
88
 
84
89
  ### Lagged Variables
85
90
 
86
- Spatially lagged variables can be computed with a 2-D n x n `Numo::NArray` and 1-D vector (`Array` or `Numo::NArray`).
91
+ Spatially lagged variables can be computed with weights matrix and 1-D vector (`Array`).
87
92
 
88
93
  #### Compute a Lagged Variable
89
94
 
90
95
  ```ruby
91
- w = Numo::DFloat[[0, 0.5, 0, 0.5],
92
- [1, 0, 0, 0],
93
- [0, 0, 0, 1],
94
- [0.5, 0, 0.5, 0]]
96
+ weights = {
97
+ 1 => [{ id: 2, weight: 1 }, { id: 4, weight: 1 }],
98
+ 2 => [{ id: 1, weight: 1 }],
99
+ 3 => [{ id: 4, weight: 1 }],
100
+ 4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
101
+ }
102
+ wm = SpatialStats::Weights::WeightsMatrix.new(weights).standardize
95
103
  vec = [1, 2, 3, 4]
96
- lagged_var = SpatialStats::Utils::Lag.neighbor_sum(w, vec)
104
+ lagged_var = SpatialStats::Utils::Lag.neighbor_sum(wm, vec)
97
105
  # => [3.0, 1.0, 4.0, 2.0]
98
106
  ```
99
107
 
@@ -60,35 +60,39 @@ module SpatialStats
60
60
  # of indices, which will return a list of new orders for the fields.
61
61
  # They will then be shuffled corresponding to the new indices.
62
62
  rng = gen_rng(seed)
63
- n = w.shape[0]
64
- indices = (0..(n - 1)).to_a
65
- shuffles = crand(indices, permutations, rng)
63
+ rids = crand(permutations, rng)
66
64
 
65
+ n_1 = weights.n - 1
66
+ sparse = weights.sparse
67
+ row_index = sparse.row_index
68
+ ws = sparse.values
69
+ wc = weights.wc
67
70
  stat_orig = stat
68
- rs = [0] * n
69
71
 
70
- row_index = weights.sparse.row_index
71
- ws = weights.sparse.values
72
-
73
- idx = 0
74
- while idx < n
75
- stat_i_orig = stat_orig[idx]
72
+ ids = (0..n_1).to_a
73
+ observations = Array.new(weights.n)
74
+ (0..n_1).each do |idx|
75
+ idsi = ids.dup
76
+ idsi.delete_at(idx)
77
+ idsi.shuffle!(random: rng)
78
+ idsi = Numo::Int32.cast(idsi)
79
+ sample = rids[idsi[rids[true, 0..wc[idx] - 1]]]
76
80
 
81
+ # account for case where there are no neighbors
77
82
  row_range = row_index[idx]..(row_index[idx + 1] - 1)
78
83
  if row_range.size.zero?
79
- rs[idx] = permutations
80
- idx += 1
84
+ observations[idx] = permutations
81
85
  next
82
86
  end
83
- wi = Numo::DFloat.cast(ws[row_range])
84
87
 
85
- # for each field, compute the C value at that index.
86
- stat_i_new = mc_i(wi, shuffles[idx], idx)
87
- rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new, permutations)
88
- idx += 1
88
+ wi = Numo::DFloat.cast(ws[row_range])
89
+ stat_i_new = mc_i(wi, sample, idx)
90
+ stat_i_orig = stat_orig[idx]
91
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
92
+ permutations)
89
93
  end
90
94
 
91
- rs.map do |ri|
95
+ observations.map do |ri|
92
96
  (ri + 1.0) / (permutations + 1.0)
93
97
  end
94
98
  end
@@ -42,12 +42,12 @@ module SpatialStats
42
42
 
43
43
  ##
44
44
  # Conditional randomization algorithm used in permutation testing.
45
- # Outputs an array of length n of Numo::DFloat matrices of
46
- # size m x num_neighbors. Where m is the number of permutations and
47
- # num_neighbors is the number of neighbors for that observation.
45
+ # Returns a matrix with permuted index values that will be used for
46
+ # selecting values from the original data set.
48
47
  #
49
- # The values are randomly permutated values from arr that will act
50
- # as its neighbors for that permutation.
48
+ # The width of the matrix is the max number of neighbors + 1
49
+ # which is way less than it would be if the original vector
50
+ # was shuffled in full.
51
51
  #
52
52
  # This is super important because most weight matrices are very
53
53
  # sparse so the amount of shuffling/multiplication that is done
@@ -55,9 +55,9 @@ module SpatialStats
55
55
  #
56
56
  # @see https://github.com/pysal/esda/blob/master/esda/moran.py#L893
57
57
  #
58
- # @return [Array] of Numo::Narray matrices
58
+ # @return [Numo::Int32] matrix of shape perms x wc_max + 1
59
59
  #
60
- def crand(arr, permutations, rng)
60
+ def crand(permutations, rng)
61
61
  # basing this off the ESDA method
62
62
  # need to get k for max_neighbors
63
63
  # and wc for cardinalities of each item
@@ -68,32 +68,13 @@ module SpatialStats
68
68
  # entry not the entire list of permutations for each entry.
69
69
  n_1 = weights.n - 1
70
70
 
71
- sparse = weights.sparse
72
- row_index = sparse.row_index
73
-
74
71
  # weight counts
75
- wc = Array.new(weights.n)
76
- k = 0
77
- (0..n_1).each do |idx|
78
- wc[idx] = row_index[idx + 1] - row_index[idx]
79
- end
80
-
72
+ wc = weights.wc
81
73
  k = wc.max + 1
82
74
  prange = (0..permutations - 1).to_a
83
75
 
84
- arr = Numo::DFloat.cast(arr)
85
-
86
- ids = (0..n_1).to_a
87
76
  ids_perm = (0..n_1 - 1).to_a
88
- rids = Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
89
-
90
- (0..n_1).map do |idx|
91
- idsi = ids.dup
92
- idsi.delete_at(idx)
93
- idsi.shuffle!(random: rng)
94
- idsi = Numo::Int32.cast(idsi)
95
- arr[idsi[rids[true, 0..wc[idx] - 1]]]
96
- end
77
+ Numo::Int32.cast(prange.map { ids_perm.sample(k, random: rng) })
97
78
  end
98
79
 
99
80
  ##
@@ -114,48 +95,40 @@ module SpatialStats
114
95
  # its neighbors. Then we will only test for that item instead
115
96
  # of the entire set. This will be done for each item.
116
97
  rng = gen_rng(seed)
117
- shuffles = crand(x, permutations, rng)
98
+ rids = crand(permutations, rng)
118
99
 
119
- n = weights.n
120
- # r is the number of equal to or more extreme samples
100
+ n_1 = weights.n - 1
101
+ sparse = weights.sparse
102
+ row_index = sparse.row_index
103
+ ws = sparse.values
104
+ wc = weights.wc
121
105
  stat_orig = stat
122
- rs = [0] * n
123
-
124
- row_index = weights.sparse.row_index
125
- ws = weights.sparse.values
126
106
 
127
- idx = 0
128
- while idx < n
129
- # need to truncate because floats from
130
- # c in sparse matrix are inconsistent with
131
- # dfloats
132
- stat_i_orig = stat_orig[idx]
107
+ arr = Numo::DFloat.cast(x)
108
+ ids = (0..n_1).to_a
109
+ observations = Array.new(weights.n)
110
+ (0..n_1).each do |idx|
111
+ idsi = ids.dup
112
+ idsi.delete_at(idx)
113
+ idsi.shuffle!(random: rng)
114
+ idsi = Numo::Int32.cast(idsi)
115
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
133
116
 
134
117
  # account for case where there are no neighbors
135
- # the way Numo handles negative ranges, it returns the max
136
- # so there will be a len 0 z array being multiplied by a
137
- # max_neighbor width permutation matrix.
138
- # Need to skip.
139
118
  row_range = row_index[idx]..(row_index[idx + 1] - 1)
140
119
  if row_range.size.zero?
141
- rs[idx] = permutations
142
- idx += 1
120
+ observations[idx] = permutations
143
121
  next
144
122
  end
145
- wi = Numo::DFloat.cast(ws[row_range])
146
- stat_i_new = mc_i(wi, shuffles[idx], idx)
147
123
 
148
- rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
149
- permutations)
150
- # rs[idx] = if stat_i_orig.positive?
151
- # (stat_i_new >= stat_i_orig).count
152
- # else
153
- # (stat_i_new <= stat_i_orig).count
154
- # end
155
- idx += 1
124
+ wi = Numo::DFloat.cast(ws[row_range])
125
+ stat_i_new = mc_i(wi, sample, idx)
126
+ stat_i_orig = stat_orig[idx]
127
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
128
+ permutations)
156
129
  end
157
130
 
158
- rs.map do |ri|
131
+ observations.map do |ri|
159
132
  (ri + 1.0) / (permutations + 1.0)
160
133
  end
161
134
  end
@@ -174,41 +147,40 @@ module SpatialStats
174
147
  # @return [Array] of p-values
175
148
  def mc_bv(permutations, seed)
176
149
  rng = gen_rng(seed)
177
- shuffles = crand(y, permutations, rng)
178
- n = weights.n
150
+ rids = crand(permutations, rng)
179
151
 
152
+ n_1 = weights.n - 1
153
+ sparse = weights.sparse
154
+ row_index = sparse.row_index
155
+ ws = sparse.values
156
+ wc = weights.wc
180
157
  stat_orig = stat
181
- rs = [0] * n
182
-
183
- row_index = weights.sparse.row_index
184
- ws = weights.sparse.values
185
158
 
186
- idx = 0
187
- while idx < n
188
- stat_i_orig = stat_orig[idx]
159
+ arr = Numo::DFloat.cast(y)
160
+ ids = (0..n_1).to_a
161
+ observations = Array.new(weights.n)
162
+ (0..n_1).each do |idx|
163
+ idsi = ids.dup
164
+ idsi.delete_at(idx)
165
+ idsi.shuffle!(random: rng)
166
+ idsi = Numo::Int32.cast(idsi)
167
+ sample = arr[idsi[rids[true, 0..wc[idx] - 1]]]
189
168
 
169
+ # account for case where there are no neighbors
190
170
  row_range = row_index[idx]..(row_index[idx + 1] - 1)
191
171
  if row_range.size.zero?
192
- rs[idx] = permutations
193
- idx += 1
172
+ observations[idx] = permutations
194
173
  next
195
174
  end
196
- wi = Numo::DFloat.cast(ws[row_range])
197
-
198
- stat_i_new = mc_i(wi, shuffles[idx], idx)
199
-
200
- rs[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
201
- permutations)
202
- # if stat_i_orig.positive?
203
- # (stat_i_new >= stat_i_orig).count
204
- # else
205
- # (stat_i_new <= stat_i_orig).count
206
- # end
207
175
 
208
- idx += 1
176
+ wi = Numo::DFloat.cast(ws[row_range])
177
+ stat_i_new = mc_i(wi, sample, idx)
178
+ stat_i_orig = stat_orig[idx]
179
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
180
+ permutations)
209
181
  end
210
182
 
211
- rs.map do |ri|
183
+ observations.map do |ri|
212
184
  (ri + 1.0) / (permutations + 1.0)
213
185
  end
214
186
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpatialStats
4
- VERSION = '1.0.0'
4
+ VERSION = '1.0.1'
5
5
  end
@@ -58,6 +58,19 @@ module SpatialStats
58
58
  @sparse ||= CSRMatrix.new(dense.to_a.flatten, n, n)
59
59
  end
60
60
 
61
+ ##
62
+ # Compute the cardinalities of each neighbor into an array
63
+ #
64
+ # @return [Array]
65
+ def wc
66
+ @wc ||= begin
67
+ row_index = sparse.row_index
68
+ (0..n - 1).map do |idx|
69
+ row_index[idx + 1] - row_index[idx]
70
+ end
71
+ end
72
+ end
73
+
61
74
  ##
62
75
  # Row standardized version of the weights matrix.
63
76
  # Will return a new version of the weights matrix with standardized
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spatial_stats
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Keith Doggett
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-27 00:00:00.000000000 Z
11
+ date: 2020-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray