spatial_stats 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
4
+ # For now, instead of doing neighbor's variance (Si**2), I'm going to use
5
+ # the total sample variance. This is how GeoDa does it, but is different
6
+ # than arcgis. This shouldn't affect the expectation and variance of I.
7
+ module SpatialStats
8
+ module Local
9
+ class Moran < Stat
10
+ def initialize(scope, field, weights)
11
+ super(scope, field, weights)
12
+ @scope = scope
13
+ @field = field
14
+ @weights = weights
15
+ end
16
+ attr_writer :x, :z_lag
17
+
18
+ def i
19
+ z.each_with_index.map do |_z_val, idx|
20
+ i_i(idx)
21
+ end
22
+ end
23
+
24
+ def i_i(idx)
25
+ # method to compute i at a single index.
26
+ # this is important for permutation testing
27
+ # because for each test we only want the result from
28
+ # 1 index not the entire set, so this will save lots of
29
+ # computations.
30
+ sum_term = z_lag[idx]
31
+ (z[idx] / si2) * sum_term
32
+ end
33
+
34
+ def expectation
35
+ # since we are using row standardized weights, the expectation
36
+ # will just be -1/(n-1) for all items. Otherwise, it would be
37
+ # a vector where the sum of the weights for each row is the numerator
38
+ # in the equation.
39
+ -1.0 / (@weights.n - 1)
40
+ end
41
+
42
+ def variance
43
+ # formula is A - B - (E[I])**2
44
+ wt = w.row_standardized
45
+ exp = expectation
46
+
47
+ vars = []
48
+ a_terms = a_calc(wt)
49
+ b_terms = b_calc(wt)
50
+
51
+ a_terms.each_with_index do |a_term, idx|
52
+ vars << (a_term - b_terms[idx] - (exp**2))
53
+ end
54
+ vars
55
+ end
56
+
57
+ def x
58
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
59
+ .standardize
60
+ end
61
+ alias z x
62
+
63
+ def z_lag
64
+ # can't memoize yet because of mc testing
65
+ # w is already row_standardized, so we are using
66
+ # neighbor sum instead of neighbor_average to save cost
67
+ @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
68
+ end
69
+
70
+ private
71
+
72
+ def si2
73
+ # @si2 ||= z.sample_variance
74
+ # we standardize so sample_variance is 1
75
+ 1.0
76
+ end
77
+
78
+ # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
79
+ def a_calc(wt)
80
+ n = wt.shape[0]
81
+ b2i = b2i_calc
82
+ a_terms = []
83
+
84
+ (0..n - 1).each do |idx|
85
+ sigma_term = wt[idx, true].to_a.sum { |v| v**2 }
86
+ a_terms << (n - b2i) * sigma_term / (n - 1)
87
+ end
88
+ a_terms
89
+ end
90
+
91
+ def b_calc(wt)
92
+ n = wt.shape[0]
93
+ b2i = b2i_calc
94
+ b_terms = []
95
+
96
+ # technically, the formula is Sigma k (sigma h (wik * wih))
97
+ # since we use row standardized matricies, this is always 1
98
+ # for each row
99
+ # this also means that all b_terms will be the same.
100
+ sigma_term = 1.0
101
+ b_terms << sigma_term * (2 * b2i - n) / ((n - 1) * (n - 2))
102
+ b_terms * n
103
+ end
104
+
105
+ def b2i_calc
106
+ numerator = z.sum { |v| v**4 }
107
+ denominator = z.sum { |v| v**2 }
108
+ numerator / (denominator**2)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class MultivariateGeary < Stat
6
+ def initialize(scope, fields, weights)
7
+ @scope = scope
8
+ @fields = fields
9
+ @weights = weights
10
+ end
11
+ attr_accessor :scope, :fields, :weights
12
+
13
+ def i
14
+ m = fields.size
15
+ gearys = fields.map do |field|
16
+ Geary.new(scope, field, weights).i
17
+ end
18
+ gearys.transpose.map { |x| x.reduce(:+) / m }
19
+ end
20
+
21
+ def mc(permutations = 99, seed = nil)
22
+ # in this case, one tuple of vals is held constant, then
23
+ # the rest are shuffled, so for crand we will pass in an arr
24
+ # of indices, which will return a list of new orders for the fields.
25
+ # They will then be shuffled corresponding to the new indices.
26
+ rng = gen_rng(seed)
27
+ n = w.shape[0]
28
+ indices = (0..(n - 1)).to_a
29
+ shuffles = crand(indices, permutations, rng)
30
+
31
+ i_orig = i
32
+ rs = [0] * i_orig.size
33
+ shuffles.each_with_index do |perms, idx|
34
+ ii_orig = i_orig[idx]
35
+ perms.each do |perm|
36
+ # essentially reimplement i here, but only use i_i
37
+ m = fields.size
38
+ gearys = fields.each_with_index.map do |field, field_idx|
39
+ geary = Geary.new(scope, field, weights)
40
+ geary.x = field_data[field_idx].values_at(*perm)
41
+ geary.i_i(idx)
42
+ end
43
+ ii_new = gearys.sum { |x| x / m }
44
+
45
+ if ii_orig.positive?
46
+ rs[idx] += 1 if ii_new >= ii_orig
47
+ else
48
+ rs[idx] += 1 if ii_new <= ii_orig
49
+ end
50
+ end
51
+ end
52
+
53
+ rs.map do |ri|
54
+ (ri + 1.0) / (permutations + 1.0)
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def field_data
61
+ @field_data ||= fields.map do |field|
62
+ SpatialStats::Queries::Variables.query_field(@scope, field)
63
+ .standardize
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class Stat
6
+ # Base class for local stats
7
+ def initialize(scope, field, weights)
8
+ @scope = scope
9
+ @field = field
10
+ @weights = weights
11
+ end
12
+ attr_accessor :scope, :field, :weights
13
+
14
+ def i
15
+ raise NotImplementedError, 'method i not defined'
16
+ end
17
+
18
+ def i_i(_idx)
19
+ raise NotImplementedError, 'method i_i not defined'
20
+ end
21
+
22
+ def expectation
23
+ raise NotImplementedError, 'method expectation not implemented'
24
+ end
25
+
26
+ def variance
27
+ raise NotImplementedError, 'method variance not implemented'
28
+ end
29
+
30
+ def z_score
31
+ numerators = i.map { |v| v - expectation }
32
+ denominators = variance.map { |v| Math.sqrt(v) }
33
+ numerators.each_with_index.map do |numerator, idx|
34
+ numerator / denominators[idx]
35
+ end
36
+ end
37
+
38
+ def crand(arr, permutations, rng)
39
+ # conditional randomization method
40
+ # will generate an n x permutations array of arrays.
41
+ # For each n, i will be held the same and the values around it will
42
+ # be permutated.
43
+ arr.each_with_index.map do |xi, idx|
44
+ tmp_arr = arr.dup
45
+ tmp_arr.delete_at(idx)
46
+ permutations.times.map do
47
+ perm = tmp_arr.shuffle(random: rng)
48
+ perm.insert(idx, xi)
49
+ end
50
+ end
51
+ end
52
+
53
+ # def crandi(arr, permutations, rng)
54
+ # n = @weights.n
55
+ # lisas = Numo::DFloat.zeros([n, permutations])
56
+
57
+ # ids = (0..n - 1).to_a
58
+ # rids = permutations.times.map do
59
+ # ids.shuffle(random: rng)
60
+ # end
61
+ # p rids
62
+
63
+ # (0..n - 1).each do |idx|
64
+ # idsi = ids.dup
65
+ # idsi.delete_at(idx)
66
+ # ids.shuffle!(random: rng)
67
+ # tmp = arr[idsi[rids[]]]
68
+ # end
69
+ # end
70
+
71
+ def mc(permutations = 99, seed = nil)
72
+ # For local tests, we need to shuffle the values
73
+ # but for each item, hold its value in place and shuffle
74
+ # its neighbors. Then we will only test for that item instead
75
+ # of the entire set. This will be done for each item.
76
+ rng = gen_rng(seed)
77
+ shuffles = crand(x, permutations, rng)
78
+
79
+ # r is the number of equal to or more extreme samples
80
+ i_orig = i
81
+ rs = [0] * i_orig.size
82
+
83
+ # For each shuffle, we only need the spatially lagged variable
84
+ # at one index, but it needs to be an array of length n.
85
+ # Store a zeros array that can be mutated or duplicated and the
86
+ # lagged variable at idx will only be set there.
87
+ lagged = [0] * i_orig.size
88
+
89
+ shuffles.each_with_index do |perms, idx|
90
+ ii_orig = i_orig[idx]
91
+ wi = w[idx, true] # current weight row
92
+ perms.each do |perm|
93
+ stat = self.class.new(scope, field, weights)
94
+ stat.x = perm
95
+
96
+ # avoids computing lag for entire data set
97
+ # when we only care about one entry
98
+ lagged_var = wi.dot(perm)
99
+ z_lag = lagged.dup
100
+ z_lag[idx] = lagged_var
101
+ stat.z_lag = z_lag
102
+
103
+ ii_new = stat.i_i(idx)
104
+
105
+ # https://geodacenter.github.io/glossary.html#ppvalue
106
+ # NOTE: this is inconsistent with the output from GeoDa
107
+ # for local permutation tests, they seem to use greater than
108
+ # not greater than or equal to. I'm going to go by the definition
109
+ # in the glossary for now.
110
+ if ii_orig.positive?
111
+ rs[idx] += 1 if ii_new >= ii_orig
112
+ else
113
+ rs[idx] += 1 if ii_new <= ii_orig
114
+ end
115
+ end
116
+ end
117
+
118
+ rs.map do |ri|
119
+ (ri + 1.0) / (permutations + 1.0)
120
+ end
121
+ end
122
+
123
+ def mc_bv(permutations, seed)
124
+ rng = gen_rng(seed)
125
+ shuffles = crand(y, permutations, rng)
126
+
127
+ # r is the number of equal to or more extreme samples
128
+ i_orig = i
129
+ rs = [0] * i_orig.size
130
+ shuffles.each_with_index do |perms, idx|
131
+ ii_orig = i_orig[idx]
132
+ perms.each do |perm|
133
+ stat = self.class.new(@scope, @x_field, @y_field, @weights)
134
+ stat.x = x
135
+ stat.y = perm
136
+ ii_new = stat.i_i(idx)
137
+
138
+ if ii_orig.positive?
139
+ rs[idx] += 1 if ii_new >= ii_orig
140
+ else
141
+ rs[idx] += 1 if ii_new <= ii_orig
142
+ end
143
+ end
144
+ end
145
+
146
+ rs.map do |ri|
147
+ (ri + 1.0) / (permutations + 1.0)
148
+ end
149
+ end
150
+
151
+ def quads
152
+ # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
153
+ w = @weights.full
154
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
155
+ zp = z.map(&:positive?)
156
+ lp = z_lag.map(&:positive?)
157
+
158
+ # hh = zp & lp
159
+ # lh = zp ^ true & lp
160
+ # ll = zp ^ true & lp ^ true
161
+ # hl = zp next to lp ^ true
162
+ hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
163
+ lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
164
+ ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
165
+ hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
166
+
167
+ # now zip lists and map them to proper terms
168
+ quad_terms = %w[HH LH LL HL]
169
+ hh.zip(lh, ll, hl).map do |feature|
170
+ quad_terms[feature.index(true)]
171
+ end
172
+ end
173
+
174
+ private
175
+
176
+ def w
177
+ weights.standardized
178
+ end
179
+
180
+ def gen_rng(seed = nil)
181
+ if seed
182
+ Random.new(seed)
183
+ else
184
+ Random.new
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/local/stat'
4
+ require 'spatial_stats/local/bivariate_moran'
5
+ require 'spatial_stats/local/g'
6
+ require 'spatial_stats/local/geary'
7
+ require 'spatial_stats/local/moran'
8
+ require 'spatial_stats/local/multivariate_geary'
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ module Numo
6
+ class NArray
7
+ def row_standardized
8
+ # every row will sum up to 1, or if they are all 0, do nothing
9
+ standardized = each_over_axis.map do |row|
10
+ sum = row.sum
11
+ if sum.zero?
12
+ # for some reason, we have to do this instead of just returning
13
+ # row. If row is returned, it is cast as [0,0,0] => [0,1,0] for
14
+ # example.
15
+ self.class.zeros(row.size)
16
+ else
17
+ row / sum
18
+ end
19
+ end
20
+ self.class.cast(standardized)
21
+ end
22
+
23
+ def windowed
24
+ # in windowed calculations, the diagonal is set to 1
25
+ # if trace (sum of diag) is 0, add it, else return input
26
+ if trace.zero?
27
+ self + self.class.eye(shape[0])
28
+ else
29
+ self
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Queries
5
+ module Variables
6
+ # Module to query for the desired variable from the given scope
7
+ # and include the primary keys so that the weights matrix
8
+ # will know that its keys will match up with the variables.
9
+ def self.query_field(scope, field)
10
+ klass = scope.klass
11
+ column = ActiveRecord::Base.connection.quote_column_name(field)
12
+ primary_key = klass.quoted_primary_key
13
+ variables = klass.find_by_sql([<<-SQL, scope: scope])
14
+ WITH scope as (:scope)
15
+ SELECT scope.#{column} as field FROM scope
16
+ ORDER BY scope.#{primary_key} ASC
17
+ SQL
18
+ variables.map(&:field)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Queries
5
+ # This provides PostGIS queries for calculating weights/neighbors
6
+ # of spatial data sets
7
+ module Weights
8
+ def self.idw_knn(scope, column, n, alpha)
9
+ klass = scope.klass
10
+ column = ActiveRecord::Base.connection.quote_column_name(column)
11
+ primary_key = klass.quoted_primary_key
12
+ neighbors = klass.find_by_sql([<<-SQL, scope: scope, n: n])
13
+ WITH scope as (:scope)
14
+ SELECT neighbors.*
15
+ FROM scope AS a
16
+ CROSS JOIN LATERAL (
17
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
18
+ ST_Distance(a.#{column}, b.#{column}) as distance
19
+ FROM scope as b
20
+ WHERE a.#{primary_key} <> b.#{primary_key}
21
+ ORDER BY a.#{column} <-> b.#{column}
22
+ LIMIT :n
23
+ ) AS neighbors
24
+ SQL
25
+
26
+ # if the lowest distance is <1, then we need to scale
27
+ # every distance by the factor that makes the lowest 1
28
+ min_dist = neighbors.map(&:distance).min
29
+ scale = if min_dist < 1
30
+ 1 / min_dist
31
+ else
32
+ 1
33
+ end
34
+
35
+ neighbors.map do |neighbor|
36
+ # formula is 1/(d^alpha)
37
+ weight = 1.0 / ((scale * neighbor.distance)**alpha)
38
+ hash = neighbor.as_json.symbolize_keys
39
+ hash[:weight] = weight
40
+ hash
41
+ end
42
+ end
43
+
44
+ def self.idw_band(scope, column, bandwidth, alpha = 1)
45
+ klass = scope.klass
46
+ column = ActiveRecord::Base.connection.quote_column_name(column)
47
+ primary_key = klass.quoted_primary_key
48
+ neighbors = klass.find_by_sql([<<-SQL, scope: scope, bandwidth: bandwidth])
49
+ WITH neighbors AS (
50
+ WITH scope AS (:scope)
51
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
52
+ ST_DWithin(a.#{column}, b.#{column}, :bandwidth) as is_neighbor,
53
+ ST_Distance(a.#{column}, b.#{column}) as distance
54
+ FROM scope as a, scope as b
55
+ ORDER BY i_id
56
+ )
57
+ SELECT * FROM neighbors WHERE is_neighbor = 't' AND i_id <> j_id
58
+ SQL
59
+
60
+ # if the lowest distance is <1, then we need to scale
61
+ # every distance by the factor that makes the lowest 1
62
+ min_dist = neighbors.map(&:distance).min
63
+ scale = if min_dist < 1
64
+ 1 / min_dist
65
+ else
66
+ 1
67
+ end
68
+
69
+ neighbors.map do |neighbor|
70
+ # formula is 1/(d^alpha)
71
+ weight = 1.0 / ((scale * neighbor.distance)**alpha)
72
+ hash = neighbor.as_json.symbolize_keys
73
+ hash[:weight] = weight
74
+ hash
75
+ end
76
+ end
77
+
78
+ def self.knn(scope, column, n)
79
+ klass = scope.klass
80
+ column = ActiveRecord::Base.connection.quote_column_name(column)
81
+ primary_key = klass.quoted_primary_key
82
+ klass.find_by_sql([<<-SQL, scope: scope, n: n])
83
+ WITH scope as (:scope)
84
+ SELECT neighbors.*
85
+ FROM scope AS a
86
+ CROSS JOIN LATERAL (
87
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id
88
+ FROM scope as b
89
+ WHERE a.#{primary_key} <> b.#{primary_key}
90
+ ORDER BY a.#{column} <-> b.#{column}
91
+ LIMIT :n
92
+ ) AS neighbors
93
+ SQL
94
+ end
95
+
96
+ def self.distance_band_neighbors(scope, column, bandwidth)
97
+ klass = scope.klass
98
+ column = ActiveRecord::Base.connection.quote_column_name(column)
99
+ primary_key = klass.quoted_primary_key
100
+ klass.find_by_sql([<<-SQL, scope: scope, distance: bandwidth])
101
+ WITH neighbors AS (
102
+ WITH scope AS (:scope)
103
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
104
+ ST_DWithin(a.#{column}, b.#{column}, :distance) as is_neighbor
105
+ FROM scope as a, scope as b
106
+ ORDER BY i_id
107
+ )
108
+ SELECT * FROM neighbors WHERE is_neighbor = 't' AND i_id <> j_id
109
+ SQL
110
+ end
111
+
112
+ # DE-9IM queen contiguiety = F***T****
113
+ def self.queen_contiguity_neighbors(scope, column)
114
+ _contiguity_neighbors(scope, column, 'F***T****')
115
+ end
116
+
117
+ def self.rook_contiguity_neighbors(scope, column)
118
+ _contiguity_neighbors(scope, column, 'F***1****')
119
+ end
120
+
121
+ def self._contiguity_neighbors(scope, column, pattern)
122
+ klass = scope.klass
123
+ column = ActiveRecord::Base.connection.quote_column_name(column)
124
+ primary_key = klass.quoted_primary_key
125
+ klass.find_by_sql([<<-SQL, scope: scope])
126
+ WITH neighbors AS (
127
+ WITH scope AS (:scope)
128
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
129
+ ST_RELATE(a.#{column}, b.#{column}, \'#{pattern}\') as is_neighbor
130
+ FROM scope as a, scope as b
131
+ ORDER BY i_id
132
+ )
133
+ SELECT * FROM neighbors WHERE is_neighbor = 't'
134
+ SQL
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/queries/variables'
4
+ require 'spatial_stats/queries/weights'
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ class Railtie < ::Rails::Railtie
5
+ end
6
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+ module SpatialStats
5
+ module Utils
6
+ module Lag
7
+ # module for computing spatially lagged variables
8
+ # from a weights matrix and variable array
9
+ def self.neighbor_average(matrix, variables)
10
+ matrix = matrix.row_standardized
11
+ neighbor_sum(matrix, variables)
12
+ end
13
+
14
+ def self.neighbor_sum(matrix, variables)
15
+ matrix.dot(variables).to_a
16
+ end
17
+
18
+ def self.window_average(matrix, variables)
19
+ matrix = matrix.windowed.row_standardized
20
+ window_sum(matrix, variables)
21
+ end
22
+
23
+ def self.window_sum(matrix, variables)
24
+ matrix = matrix.windowed
25
+ matrix.dot(variables).to_a
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/utils/lag'
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Weights
5
+ module Contiguous
6
+ def self.rook(scope, field)
7
+ p_key = scope.primary_key
8
+ keys = scope.pluck(p_key).sort
9
+
10
+ neighbors = SpatialStats::Queries::Weights
11
+ .rook_contiguity_neighbors(scope, field)
12
+
13
+ neighbors = neighbors.group_by(&:i_id)
14
+ weights = neighbors.transform_values do |value|
15
+ value.map do |neighbor|
16
+ hash = neighbor.as_json(only: [:j_id]).symbolize_keys
17
+ hash[:weight] = 1
18
+ hash
19
+ end
20
+ end
21
+ SpatialStats::Weights::WeightsMatrix.new(keys, weights)
22
+ end
23
+
24
+ def self.queen(scope, field)
25
+ p_key = scope.primary_key
26
+ keys = scope.pluck(p_key).sort
27
+
28
+ neighbors = SpatialStats::Queries::Weights
29
+ .queen_contiguity_neighbors(scope, field)
30
+
31
+ neighbors = neighbors.group_by(&:i_id)
32
+ weights = neighbors.transform_values do |value|
33
+ value.map do |neighbor|
34
+ hash = neighbor.as_json(only: [:j_id]).symbolize_keys
35
+ hash[:weight] = 1
36
+ hash
37
+ end
38
+ end
39
+ SpatialStats::Weights::WeightsMatrix.new(keys, weights)
40
+ end
41
+ end
42
+ end
43
+ end