spatial_stats 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
4
+ # For now, instead of doing neighbor's variance (Si**2), I'm going to use
5
+ # the total sample variance. This is how GeoDa does it, but is different
6
+ # than arcgis. This shouldn't affect the expectation and variance of I.
7
+ module SpatialStats
8
+ module Local
9
+ class Moran < Stat
10
+ def initialize(scope, field, weights)
11
+ super(scope, field, weights)
12
+ @scope = scope
13
+ @field = field
14
+ @weights = weights
15
+ end
16
+ attr_writer :x, :z_lag
17
+
18
+ def i
19
+ z.each_with_index.map do |_z_val, idx|
20
+ i_i(idx)
21
+ end
22
+ end
23
+
24
+ def i_i(idx)
25
+ # method to compute i at a single index.
26
+ # this is important for permutation testing
27
+ # because for each test we only want the result from
28
+ # 1 index not the entire set, so this will save lots of
29
+ # computations.
30
+ sum_term = z_lag[idx]
31
+ (z[idx] / si2) * sum_term
32
+ end
33
+
34
+ def expectation
35
+ # since we are using row standardized weights, the expectation
36
+ # will just be -1/(n-1) for all items. Otherwise, it would be
37
+ # a vector where the sum of the weights for each row is the numerator
38
+ # in the equation.
39
+ -1.0 / (@weights.n - 1)
40
+ end
41
+
42
+ def variance
43
+ # formula is A - B - (E[I])**2
44
+ wt = w.row_standardized
45
+ exp = expectation
46
+
47
+ vars = []
48
+ a_terms = a_calc(wt)
49
+ b_terms = b_calc(wt)
50
+
51
+ a_terms.each_with_index do |a_term, idx|
52
+ vars << (a_term - b_terms[idx] - (exp**2))
53
+ end
54
+ vars
55
+ end
56
+
57
+ def x
58
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
59
+ .standardize
60
+ end
61
+ alias z x
62
+
63
+ def z_lag
64
+ # can't memoize yet because of mc testing
65
+ # w is already row_standardized, so we are using
66
+ # neighbor sum instead of neighbor_average to save cost
67
+ @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
68
+ end
69
+
70
+ private
71
+
72
+ def si2
73
+ # @si2 ||= z.sample_variance
74
+ # we standardize so sample_variance is 1
75
+ 1.0
76
+ end
77
+
78
+ # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
79
+ def a_calc(wt)
80
+ n = wt.shape[0]
81
+ b2i = b2i_calc
82
+ a_terms = []
83
+
84
+ (0..n - 1).each do |idx|
85
+ sigma_term = wt[idx, true].to_a.sum { |v| v**2 }
86
+ a_terms << (n - b2i) * sigma_term / (n - 1)
87
+ end
88
+ a_terms
89
+ end
90
+
91
+ def b_calc(wt)
92
+ n = wt.shape[0]
93
+ b2i = b2i_calc
94
+ b_terms = []
95
+
96
+ # technically, the formula is Sigma k (sigma h (wik * wih))
97
+ # since we use row standardized matricies, this is always 1
98
+ # for each row
99
+ # this also means that all b_terms will be the same.
100
+ sigma_term = 1.0
101
+ b_terms << sigma_term * (2 * b2i - n) / ((n - 1) * (n - 2))
102
+ b_terms * n
103
+ end
104
+
105
+ def b2i_calc
106
+ numerator = z.sum { |v| v**4 }
107
+ denominator = z.sum { |v| v**2 }
108
+ numerator / (denominator**2)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class MultivariateGeary < Stat
6
+ def initialize(scope, fields, weights)
7
+ @scope = scope
8
+ @fields = fields
9
+ @weights = weights
10
+ end
11
+ attr_accessor :scope, :fields, :weights
12
+
13
+ def i
14
+ m = fields.size
15
+ gearys = fields.map do |field|
16
+ Geary.new(scope, field, weights).i
17
+ end
18
+ gearys.transpose.map { |x| x.reduce(:+) / m }
19
+ end
20
+
21
+ def mc(permutations = 99, seed = nil)
22
+ # in this case, one tuple of vals is held constant, then
23
+ # the rest are shuffled, so for crand we will pass in an arr
24
+ # of indices, which will return a list of new orders for the fields.
25
+ # They will then be shuffled corresponding to the new indices.
26
+ rng = gen_rng(seed)
27
+ n = w.shape[0]
28
+ indices = (0..(n - 1)).to_a
29
+ shuffles = crand(indices, permutations, rng)
30
+
31
+ i_orig = i
32
+ rs = [0] * i_orig.size
33
+ shuffles.each_with_index do |perms, idx|
34
+ ii_orig = i_orig[idx]
35
+ perms.each do |perm|
36
+ # essentially reimplement i here, but only use i_i
37
+ m = fields.size
38
+ gearys = fields.each_with_index.map do |field, field_idx|
39
+ geary = Geary.new(scope, field, weights)
40
+ geary.x = field_data[field_idx].values_at(*perm)
41
+ geary.i_i(idx)
42
+ end
43
+ ii_new = gearys.sum { |x| x / m }
44
+
45
+ if ii_orig.positive?
46
+ rs[idx] += 1 if ii_new >= ii_orig
47
+ else
48
+ rs[idx] += 1 if ii_new <= ii_orig
49
+ end
50
+ end
51
+ end
52
+
53
+ rs.map do |ri|
54
+ (ri + 1.0) / (permutations + 1.0)
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def field_data
61
+ @field_data ||= fields.map do |field|
62
+ SpatialStats::Queries::Variables.query_field(@scope, field)
63
+ .standardize
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class Stat
6
+ # Base class for local stats
7
+ def initialize(scope, field, weights)
8
+ @scope = scope
9
+ @field = field
10
+ @weights = weights
11
+ end
12
+ attr_accessor :scope, :field, :weights
13
+
14
+ def i
15
+ raise NotImplementedError, 'method i not defined'
16
+ end
17
+
18
+ def i_i(_idx)
19
+ raise NotImplementedError, 'method i_i not defined'
20
+ end
21
+
22
+ def expectation
23
+ raise NotImplementedError, 'method expectation not implemented'
24
+ end
25
+
26
+ def variance
27
+ raise NotImplementedError, 'method variance not implemented'
28
+ end
29
+
30
+ def z_score
31
+ numerators = i.map { |v| v - expectation }
32
+ denominators = variance.map { |v| Math.sqrt(v) }
33
+ numerators.each_with_index.map do |numerator, idx|
34
+ numerator / denominators[idx]
35
+ end
36
+ end
37
+
38
+ def crand(arr, permutations, rng)
39
+ # conditional randomization method
40
+ # will generate an n x permutations array of arrays.
41
+ # For each n, i will be held the same and the values around it will
42
+ # be permutated.
43
+ arr.each_with_index.map do |xi, idx|
44
+ tmp_arr = arr.dup
45
+ tmp_arr.delete_at(idx)
46
+ permutations.times.map do
47
+ perm = tmp_arr.shuffle(random: rng)
48
+ perm.insert(idx, xi)
49
+ end
50
+ end
51
+ end
52
+
53
+ # def crandi(arr, permutations, rng)
54
+ # n = @weights.n
55
+ # lisas = Numo::DFloat.zeros([n, permutations])
56
+
57
+ # ids = (0..n - 1).to_a
58
+ # rids = permutations.times.map do
59
+ # ids.shuffle(random: rng)
60
+ # end
61
+ # p rids
62
+
63
+ # (0..n - 1).each do |idx|
64
+ # idsi = ids.dup
65
+ # idsi.delete_at(idx)
66
+ # ids.shuffle!(random: rng)
67
+ # tmp = arr[idsi[rids[]]]
68
+ # end
69
+ # end
70
+
71
+ def mc(permutations = 99, seed = nil)
72
+ # For local tests, we need to shuffle the values
73
+ # but for each item, hold its value in place and shuffle
74
+ # its neighbors. Then we will only test for that item instead
75
+ # of the entire set. This will be done for each item.
76
+ rng = gen_rng(seed)
77
+ shuffles = crand(x, permutations, rng)
78
+
79
+ # r is the number of equal to or more extreme samples
80
+ i_orig = i
81
+ rs = [0] * i_orig.size
82
+
83
+ # For each shuffle, we only need the spatially lagged variable
84
+ # at one index, but it needs to be an array of length n.
85
+ # Store a zeros array that can be mutated or duplicated and the
86
+ # lagged variable at idx will only be set there.
87
+ lagged = [0] * i_orig.size
88
+
89
+ shuffles.each_with_index do |perms, idx|
90
+ ii_orig = i_orig[idx]
91
+ wi = w[idx, true] # current weight row
92
+ perms.each do |perm|
93
+ stat = self.class.new(scope, field, weights)
94
+ stat.x = perm
95
+
96
+ # avoids computing lag for entire data set
97
+ # when we only care about one entry
98
+ lagged_var = wi.dot(perm)
99
+ z_lag = lagged.dup
100
+ z_lag[idx] = lagged_var
101
+ stat.z_lag = z_lag
102
+
103
+ ii_new = stat.i_i(idx)
104
+
105
+ # https://geodacenter.github.io/glossary.html#ppvalue
106
+ # NOTE: this is inconsistent with the output from GeoDa
107
+ # for local permutation tests, they seem to use greater than
108
+ # not greater than or equal to. I'm going to go by the definition
109
+ # in the glossary for now.
110
+ if ii_orig.positive?
111
+ rs[idx] += 1 if ii_new >= ii_orig
112
+ else
113
+ rs[idx] += 1 if ii_new <= ii_orig
114
+ end
115
+ end
116
+ end
117
+
118
+ rs.map do |ri|
119
+ (ri + 1.0) / (permutations + 1.0)
120
+ end
121
+ end
122
+
123
+ def mc_bv(permutations, seed)
124
+ rng = gen_rng(seed)
125
+ shuffles = crand(y, permutations, rng)
126
+
127
+ # r is the number of equal to or more extreme samples
128
+ i_orig = i
129
+ rs = [0] * i_orig.size
130
+ shuffles.each_with_index do |perms, idx|
131
+ ii_orig = i_orig[idx]
132
+ perms.each do |perm|
133
+ stat = self.class.new(@scope, @x_field, @y_field, @weights)
134
+ stat.x = x
135
+ stat.y = perm
136
+ ii_new = stat.i_i(idx)
137
+
138
+ if ii_orig.positive?
139
+ rs[idx] += 1 if ii_new >= ii_orig
140
+ else
141
+ rs[idx] += 1 if ii_new <= ii_orig
142
+ end
143
+ end
144
+ end
145
+
146
+ rs.map do |ri|
147
+ (ri + 1.0) / (permutations + 1.0)
148
+ end
149
+ end
150
+
151
+ def quads
152
+ # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
153
+ w = @weights.full
154
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
155
+ zp = z.map(&:positive?)
156
+ lp = z_lag.map(&:positive?)
157
+
158
+ # hh = zp & lp
159
+ # lh = zp ^ true & lp
160
+ # ll = zp ^ true & lp ^ true
161
+ # hl = zp next to lp ^ true
162
+ hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
163
+ lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
164
+ ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
165
+ hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
166
+
167
+ # now zip lists and map them to proper terms
168
+ quad_terms = %w[HH LH LL HL]
169
+ hh.zip(lh, ll, hl).map do |feature|
170
+ quad_terms[feature.index(true)]
171
+ end
172
+ end
173
+
174
+ private
175
+
176
+ def w
177
+ weights.standardized
178
+ end
179
+
180
+ def gen_rng(seed = nil)
181
+ if seed
182
+ Random.new(seed)
183
+ else
184
+ Random.new
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/local/stat'
4
+ require 'spatial_stats/local/bivariate_moran'
5
+ require 'spatial_stats/local/g'
6
+ require 'spatial_stats/local/geary'
7
+ require 'spatial_stats/local/moran'
8
+ require 'spatial_stats/local/multivariate_geary'
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ module Numo
6
+ class NArray
7
+ def row_standardized
8
+ # every row will sum up to 1, or if they are all 0, do nothing
9
+ standardized = each_over_axis.map do |row|
10
+ sum = row.sum
11
+ if sum.zero?
12
+ # for some reason, we have to do this instead of just returning
13
+ # row. If row is returned, it is cast as [0,0,0] => [0,1,0] for
14
+ # example.
15
+ self.class.zeros(row.size)
16
+ else
17
+ row / sum
18
+ end
19
+ end
20
+ self.class.cast(standardized)
21
+ end
22
+
23
+ def windowed
24
+ # in windowed calculations, the diagonal is set to 1
25
+ # if trace (sum of diag) is 0, add it, else return input
26
+ if trace.zero?
27
+ self + self.class.eye(shape[0])
28
+ else
29
+ self
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Queries
5
+ module Variables
6
+ # Module to query for the desired variable from the given scope
7
+ # and include the primary keys so that the weights matrix
8
+ # will know that its keys will match up with the variables.
9
+ def self.query_field(scope, field)
10
+ klass = scope.klass
11
+ column = ActiveRecord::Base.connection.quote_column_name(field)
12
+ primary_key = klass.quoted_primary_key
13
+ variables = klass.find_by_sql([<<-SQL, scope: scope])
14
+ WITH scope as (:scope)
15
+ SELECT scope.#{column} as field FROM scope
16
+ ORDER BY scope.#{primary_key} ASC
17
+ SQL
18
+ variables.map(&:field)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Queries
5
+ # This provides PostGIS queries for calculating weights/neighbors
6
+ # of spatial data sets
7
+ module Weights
8
+ def self.idw_knn(scope, column, n, alpha)
9
+ klass = scope.klass
10
+ column = ActiveRecord::Base.connection.quote_column_name(column)
11
+ primary_key = klass.quoted_primary_key
12
+ neighbors = klass.find_by_sql([<<-SQL, scope: scope, n: n])
13
+ WITH scope as (:scope)
14
+ SELECT neighbors.*
15
+ FROM scope AS a
16
+ CROSS JOIN LATERAL (
17
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
18
+ ST_Distance(a.#{column}, b.#{column}) as distance
19
+ FROM scope as b
20
+ WHERE a.#{primary_key} <> b.#{primary_key}
21
+ ORDER BY a.#{column} <-> b.#{column}
22
+ LIMIT :n
23
+ ) AS neighbors
24
+ SQL
25
+
26
+ # if the lowest distance is <1, then we need to scale
27
+ # every distance by the factor that makes the lowest 1
28
+ min_dist = neighbors.map(&:distance).min
29
+ scale = if min_dist < 1
30
+ 1 / min_dist
31
+ else
32
+ 1
33
+ end
34
+
35
+ neighbors.map do |neighbor|
36
+ # formula is 1/(d^alpha)
37
+ weight = 1.0 / ((scale * neighbor.distance)**alpha)
38
+ hash = neighbor.as_json.symbolize_keys
39
+ hash[:weight] = weight
40
+ hash
41
+ end
42
+ end
43
+
44
+ def self.idw_band(scope, column, bandwidth, alpha = 1)
45
+ klass = scope.klass
46
+ column = ActiveRecord::Base.connection.quote_column_name(column)
47
+ primary_key = klass.quoted_primary_key
48
+ neighbors = klass.find_by_sql([<<-SQL, scope: scope, bandwidth: bandwidth])
49
+ WITH neighbors AS (
50
+ WITH scope AS (:scope)
51
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
52
+ ST_DWithin(a.#{column}, b.#{column}, :bandwidth) as is_neighbor,
53
+ ST_Distance(a.#{column}, b.#{column}) as distance
54
+ FROM scope as a, scope as b
55
+ ORDER BY i_id
56
+ )
57
+ SELECT * FROM neighbors WHERE is_neighbor = 't' AND i_id <> j_id
58
+ SQL
59
+
60
+ # if the lowest distance is <1, then we need to scale
61
+ # every distance by the factor that makes the lowest 1
62
+ min_dist = neighbors.map(&:distance).min
63
+ scale = if min_dist < 1
64
+ 1 / min_dist
65
+ else
66
+ 1
67
+ end
68
+
69
+ neighbors.map do |neighbor|
70
+ # formula is 1/(d^alpha)
71
+ weight = 1.0 / ((scale * neighbor.distance)**alpha)
72
+ hash = neighbor.as_json.symbolize_keys
73
+ hash[:weight] = weight
74
+ hash
75
+ end
76
+ end
77
+
78
+ def self.knn(scope, column, n)
79
+ klass = scope.klass
80
+ column = ActiveRecord::Base.connection.quote_column_name(column)
81
+ primary_key = klass.quoted_primary_key
82
+ klass.find_by_sql([<<-SQL, scope: scope, n: n])
83
+ WITH scope as (:scope)
84
+ SELECT neighbors.*
85
+ FROM scope AS a
86
+ CROSS JOIN LATERAL (
87
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id
88
+ FROM scope as b
89
+ WHERE a.#{primary_key} <> b.#{primary_key}
90
+ ORDER BY a.#{column} <-> b.#{column}
91
+ LIMIT :n
92
+ ) AS neighbors
93
+ SQL
94
+ end
95
+
96
+ def self.distance_band_neighbors(scope, column, bandwidth)
97
+ klass = scope.klass
98
+ column = ActiveRecord::Base.connection.quote_column_name(column)
99
+ primary_key = klass.quoted_primary_key
100
+ klass.find_by_sql([<<-SQL, scope: scope, distance: bandwidth])
101
+ WITH neighbors AS (
102
+ WITH scope AS (:scope)
103
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
104
+ ST_DWithin(a.#{column}, b.#{column}, :distance) as is_neighbor
105
+ FROM scope as a, scope as b
106
+ ORDER BY i_id
107
+ )
108
+ SELECT * FROM neighbors WHERE is_neighbor = 't' AND i_id <> j_id
109
+ SQL
110
+ end
111
+
112
+ # DE-9IM queen contiguiety = F***T****
113
+ def self.queen_contiguity_neighbors(scope, column)
114
+ _contiguity_neighbors(scope, column, 'F***T****')
115
+ end
116
+
117
+ def self.rook_contiguity_neighbors(scope, column)
118
+ _contiguity_neighbors(scope, column, 'F***1****')
119
+ end
120
+
121
+ def self._contiguity_neighbors(scope, column, pattern)
122
+ klass = scope.klass
123
+ column = ActiveRecord::Base.connection.quote_column_name(column)
124
+ primary_key = klass.quoted_primary_key
125
+ klass.find_by_sql([<<-SQL, scope: scope])
126
+ WITH neighbors AS (
127
+ WITH scope AS (:scope)
128
+ SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
129
+ ST_RELATE(a.#{column}, b.#{column}, \'#{pattern}\') as is_neighbor
130
+ FROM scope as a, scope as b
131
+ ORDER BY i_id
132
+ )
133
+ SELECT * FROM neighbors WHERE is_neighbor = 't'
134
+ SQL
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/queries/variables'
4
+ require 'spatial_stats/queries/weights'
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ class Railtie < ::Rails::Railtie
5
+ end
6
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+ module SpatialStats
5
+ module Utils
6
+ module Lag
7
+ # module for computing spatially lagged variables
8
+ # from a weights matrix and variable array
9
+ def self.neighbor_average(matrix, variables)
10
+ matrix = matrix.row_standardized
11
+ neighbor_sum(matrix, variables)
12
+ end
13
+
14
+ def self.neighbor_sum(matrix, variables)
15
+ matrix.dot(variables).to_a
16
+ end
17
+
18
+ def self.window_average(matrix, variables)
19
+ matrix = matrix.windowed.row_standardized
20
+ window_sum(matrix, variables)
21
+ end
22
+
23
+ def self.window_sum(matrix, variables)
24
+ matrix = matrix.windowed
25
+ matrix.dot(variables).to_a
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/utils/lag'
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Weights
5
+ module Contiguous
6
+ def self.rook(scope, field)
7
+ p_key = scope.primary_key
8
+ keys = scope.pluck(p_key).sort
9
+
10
+ neighbors = SpatialStats::Queries::Weights
11
+ .rook_contiguity_neighbors(scope, field)
12
+
13
+ neighbors = neighbors.group_by(&:i_id)
14
+ weights = neighbors.transform_values do |value|
15
+ value.map do |neighbor|
16
+ hash = neighbor.as_json(only: [:j_id]).symbolize_keys
17
+ hash[:weight] = 1
18
+ hash
19
+ end
20
+ end
21
+ SpatialStats::Weights::WeightsMatrix.new(keys, weights)
22
+ end
23
+
24
+ def self.queen(scope, field)
25
+ p_key = scope.primary_key
26
+ keys = scope.pluck(p_key).sort
27
+
28
+ neighbors = SpatialStats::Queries::Weights
29
+ .queen_contiguity_neighbors(scope, field)
30
+
31
+ neighbors = neighbors.group_by(&:i_id)
32
+ weights = neighbors.transform_values do |value|
33
+ value.map do |neighbor|
34
+ hash = neighbor.as_json(only: [:j_id]).symbolize_keys
35
+ hash[:weight] = 1
36
+ hash
37
+ end
38
+ end
39
+ SpatialStats::Weights::WeightsMatrix.new(keys, weights)
40
+ end
41
+ end
42
+ end
43
+ end