spatial_stats 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +96 -0
- data/Rakefile +30 -0
- data/lib/spatial_stats/enumerable_ext.rb +20 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +98 -0
- data/lib/spatial_stats/global/moran.rb +102 -0
- data/lib/spatial_stats/global/stat.rb +92 -0
- data/lib/spatial_stats/global.rb +5 -0
- data/lib/spatial_stats/local/bivariate_moran.rb +46 -0
- data/lib/spatial_stats/local/g.rb +75 -0
- data/lib/spatial_stats/local/geary.rb +76 -0
- data/lib/spatial_stats/local/moran.rb +112 -0
- data/lib/spatial_stats/local/multivariate_geary.rb +68 -0
- data/lib/spatial_stats/local/stat.rb +189 -0
- data/lib/spatial_stats/local.rb +8 -0
- data/lib/spatial_stats/narray_ext.rb +33 -0
- data/lib/spatial_stats/queries/variables.rb +22 -0
- data/lib/spatial_stats/queries/weights.rb +138 -0
- data/lib/spatial_stats/queries.rb +4 -0
- data/lib/spatial_stats/railtie.rb +6 -0
- data/lib/spatial_stats/utils/lag.rb +29 -0
- data/lib/spatial_stats/utils.rb +3 -0
- data/lib/spatial_stats/version.rb +5 -0
- data/lib/spatial_stats/weights/contiguous.rb +43 -0
- data/lib/spatial_stats/weights/distant.rb +77 -0
- data/lib/spatial_stats/weights/weights_matrix.rb +42 -0
- data/lib/spatial_stats/weights.rb +5 -0
- data/lib/spatial_stats.rb +18 -0
- data/lib/tasks/spatial_stats_tasks.rake +5 -0
- metadata +174 -0
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
|
4
|
+
# For now, instead of doing neighbor's variance (Si**2), I'm going to use
|
5
|
+
# the total sample variance. This is how GeoDa does it, but is different
|
6
|
+
# than arcgis. This shouldn't affect the expectation and variance of I.
|
7
|
+
module SpatialStats
|
8
|
+
module Local
|
9
|
+
class Moran < Stat
|
10
|
+
def initialize(scope, field, weights)
|
11
|
+
super(scope, field, weights)
|
12
|
+
@scope = scope
|
13
|
+
@field = field
|
14
|
+
@weights = weights
|
15
|
+
end
|
16
|
+
attr_writer :x, :z_lag
|
17
|
+
|
18
|
+
def i
|
19
|
+
z.each_with_index.map do |_z_val, idx|
|
20
|
+
i_i(idx)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def i_i(idx)
|
25
|
+
# method to compute i at a single index.
|
26
|
+
# this is important for permutation testing
|
27
|
+
# because for each test we only want the result from
|
28
|
+
# 1 index not the entire set, so this will save lots of
|
29
|
+
# computations.
|
30
|
+
sum_term = z_lag[idx]
|
31
|
+
(z[idx] / si2) * sum_term
|
32
|
+
end
|
33
|
+
|
34
|
+
def expectation
|
35
|
+
# since we are using row standardized weights, the expectation
|
36
|
+
# will just be -1/(n-1) for all items. Otherwise, it would be
|
37
|
+
# a vector where the sum of the weights for each row is the numerator
|
38
|
+
# in the equation.
|
39
|
+
-1.0 / (@weights.n - 1)
|
40
|
+
end
|
41
|
+
|
42
|
+
def variance
|
43
|
+
# formula is A - B - (E[I])**2
|
44
|
+
wt = w.row_standardized
|
45
|
+
exp = expectation
|
46
|
+
|
47
|
+
vars = []
|
48
|
+
a_terms = a_calc(wt)
|
49
|
+
b_terms = b_calc(wt)
|
50
|
+
|
51
|
+
a_terms.each_with_index do |a_term, idx|
|
52
|
+
vars << (a_term - b_terms[idx] - (exp**2))
|
53
|
+
end
|
54
|
+
vars
|
55
|
+
end
|
56
|
+
|
57
|
+
def x
|
58
|
+
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
59
|
+
.standardize
|
60
|
+
end
|
61
|
+
alias z x
|
62
|
+
|
63
|
+
def z_lag
|
64
|
+
# can't memoize yet because of mc testing
|
65
|
+
# w is already row_standardized, so we are using
|
66
|
+
# neighbor sum instead of neighbor_average to save cost
|
67
|
+
@z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def si2
|
73
|
+
# @si2 ||= z.sample_variance
|
74
|
+
# we standardize so sample_variance is 1
|
75
|
+
1.0
|
76
|
+
end
|
77
|
+
|
78
|
+
# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
|
79
|
+
def a_calc(wt)
|
80
|
+
n = wt.shape[0]
|
81
|
+
b2i = b2i_calc
|
82
|
+
a_terms = []
|
83
|
+
|
84
|
+
(0..n - 1).each do |idx|
|
85
|
+
sigma_term = wt[idx, true].to_a.sum { |v| v**2 }
|
86
|
+
a_terms << (n - b2i) * sigma_term / (n - 1)
|
87
|
+
end
|
88
|
+
a_terms
|
89
|
+
end
|
90
|
+
|
91
|
+
def b_calc(wt)
|
92
|
+
n = wt.shape[0]
|
93
|
+
b2i = b2i_calc
|
94
|
+
b_terms = []
|
95
|
+
|
96
|
+
# technically, the formula is Sigma k (sigma h (wik * wih))
|
97
|
+
# since we use row standardized matricies, this is always 1
|
98
|
+
# for each row
|
99
|
+
# this also means that all b_terms will be the same.
|
100
|
+
sigma_term = 1.0
|
101
|
+
b_terms << sigma_term * (2 * b2i - n) / ((n - 1) * (n - 2))
|
102
|
+
b_terms * n
|
103
|
+
end
|
104
|
+
|
105
|
+
def b2i_calc
|
106
|
+
numerator = z.sum { |v| v**4 }
|
107
|
+
denominator = z.sum { |v| v**2 }
|
108
|
+
numerator / (denominator**2)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Local
|
5
|
+
class MultivariateGeary < Stat
|
6
|
+
def initialize(scope, fields, weights)
|
7
|
+
@scope = scope
|
8
|
+
@fields = fields
|
9
|
+
@weights = weights
|
10
|
+
end
|
11
|
+
attr_accessor :scope, :fields, :weights
|
12
|
+
|
13
|
+
def i
|
14
|
+
m = fields.size
|
15
|
+
gearys = fields.map do |field|
|
16
|
+
Geary.new(scope, field, weights).i
|
17
|
+
end
|
18
|
+
gearys.transpose.map { |x| x.reduce(:+) / m }
|
19
|
+
end
|
20
|
+
|
21
|
+
def mc(permutations = 99, seed = nil)
|
22
|
+
# in this case, one tuple of vals is held constant, then
|
23
|
+
# the rest are shuffled, so for crand we will pass in an arr
|
24
|
+
# of indices, which will return a list of new orders for the fields.
|
25
|
+
# They will then be shuffled corresponding to the new indices.
|
26
|
+
rng = gen_rng(seed)
|
27
|
+
n = w.shape[0]
|
28
|
+
indices = (0..(n - 1)).to_a
|
29
|
+
shuffles = crand(indices, permutations, rng)
|
30
|
+
|
31
|
+
i_orig = i
|
32
|
+
rs = [0] * i_orig.size
|
33
|
+
shuffles.each_with_index do |perms, idx|
|
34
|
+
ii_orig = i_orig[idx]
|
35
|
+
perms.each do |perm|
|
36
|
+
# essentially reimplement i here, but only use i_i
|
37
|
+
m = fields.size
|
38
|
+
gearys = fields.each_with_index.map do |field, field_idx|
|
39
|
+
geary = Geary.new(scope, field, weights)
|
40
|
+
geary.x = field_data[field_idx].values_at(*perm)
|
41
|
+
geary.i_i(idx)
|
42
|
+
end
|
43
|
+
ii_new = gearys.sum { |x| x / m }
|
44
|
+
|
45
|
+
if ii_orig.positive?
|
46
|
+
rs[idx] += 1 if ii_new >= ii_orig
|
47
|
+
else
|
48
|
+
rs[idx] += 1 if ii_new <= ii_orig
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
rs.map do |ri|
|
54
|
+
(ri + 1.0) / (permutations + 1.0)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def field_data
|
61
|
+
@field_data ||= fields.map do |field|
|
62
|
+
SpatialStats::Queries::Variables.query_field(@scope, field)
|
63
|
+
.standardize
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Local
|
5
|
+
class Stat
|
6
|
+
# Base class for local stats
|
7
|
+
def initialize(scope, field, weights)
|
8
|
+
@scope = scope
|
9
|
+
@field = field
|
10
|
+
@weights = weights
|
11
|
+
end
|
12
|
+
attr_accessor :scope, :field, :weights
|
13
|
+
|
14
|
+
def i
|
15
|
+
raise NotImplementedError, 'method i not defined'
|
16
|
+
end
|
17
|
+
|
18
|
+
def i_i(_idx)
|
19
|
+
raise NotImplementedError, 'method i_i not defined'
|
20
|
+
end
|
21
|
+
|
22
|
+
def expectation
|
23
|
+
raise NotImplementedError, 'method expectation not implemented'
|
24
|
+
end
|
25
|
+
|
26
|
+
def variance
|
27
|
+
raise NotImplementedError, 'method variance not implemented'
|
28
|
+
end
|
29
|
+
|
30
|
+
def z_score
|
31
|
+
numerators = i.map { |v| v - expectation }
|
32
|
+
denominators = variance.map { |v| Math.sqrt(v) }
|
33
|
+
numerators.each_with_index.map do |numerator, idx|
|
34
|
+
numerator / denominators[idx]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def crand(arr, permutations, rng)
|
39
|
+
# conditional randomization method
|
40
|
+
# will generate an n x permutations array of arrays.
|
41
|
+
# For each n, i will be held the same and the values around it will
|
42
|
+
# be permutated.
|
43
|
+
arr.each_with_index.map do |xi, idx|
|
44
|
+
tmp_arr = arr.dup
|
45
|
+
tmp_arr.delete_at(idx)
|
46
|
+
permutations.times.map do
|
47
|
+
perm = tmp_arr.shuffle(random: rng)
|
48
|
+
perm.insert(idx, xi)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# def crandi(arr, permutations, rng)
|
54
|
+
# n = @weights.n
|
55
|
+
# lisas = Numo::DFloat.zeros([n, permutations])
|
56
|
+
|
57
|
+
# ids = (0..n - 1).to_a
|
58
|
+
# rids = permutations.times.map do
|
59
|
+
# ids.shuffle(random: rng)
|
60
|
+
# end
|
61
|
+
# p rids
|
62
|
+
|
63
|
+
# (0..n - 1).each do |idx|
|
64
|
+
# idsi = ids.dup
|
65
|
+
# idsi.delete_at(idx)
|
66
|
+
# ids.shuffle!(random: rng)
|
67
|
+
# tmp = arr[idsi[rids[]]]
|
68
|
+
# end
|
69
|
+
# end
|
70
|
+
|
71
|
+
def mc(permutations = 99, seed = nil)
|
72
|
+
# For local tests, we need to shuffle the values
|
73
|
+
# but for each item, hold its value in place and shuffle
|
74
|
+
# its neighbors. Then we will only test for that item instead
|
75
|
+
# of the entire set. This will be done for each item.
|
76
|
+
rng = gen_rng(seed)
|
77
|
+
shuffles = crand(x, permutations, rng)
|
78
|
+
|
79
|
+
# r is the number of equal to or more extreme samples
|
80
|
+
i_orig = i
|
81
|
+
rs = [0] * i_orig.size
|
82
|
+
|
83
|
+
# For each shuffle, we only need the spatially lagged variable
|
84
|
+
# at one index, but it needs to be an array of length n.
|
85
|
+
# Store a zeros array that can be mutated or duplicated and the
|
86
|
+
# lagged variable at idx will only be set there.
|
87
|
+
lagged = [0] * i_orig.size
|
88
|
+
|
89
|
+
shuffles.each_with_index do |perms, idx|
|
90
|
+
ii_orig = i_orig[idx]
|
91
|
+
wi = w[idx, true] # current weight row
|
92
|
+
perms.each do |perm|
|
93
|
+
stat = self.class.new(scope, field, weights)
|
94
|
+
stat.x = perm
|
95
|
+
|
96
|
+
# avoids computing lag for entire data set
|
97
|
+
# when we only care about one entry
|
98
|
+
lagged_var = wi.dot(perm)
|
99
|
+
z_lag = lagged.dup
|
100
|
+
z_lag[idx] = lagged_var
|
101
|
+
stat.z_lag = z_lag
|
102
|
+
|
103
|
+
ii_new = stat.i_i(idx)
|
104
|
+
|
105
|
+
# https://geodacenter.github.io/glossary.html#ppvalue
|
106
|
+
# NOTE: this is inconsistent with the output from GeoDa
|
107
|
+
# for local permutation tests, they seem to use greater than
|
108
|
+
# not greater than or equal to. I'm going to go by the definition
|
109
|
+
# in the glossary for now.
|
110
|
+
if ii_orig.positive?
|
111
|
+
rs[idx] += 1 if ii_new >= ii_orig
|
112
|
+
else
|
113
|
+
rs[idx] += 1 if ii_new <= ii_orig
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
rs.map do |ri|
|
119
|
+
(ri + 1.0) / (permutations + 1.0)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def mc_bv(permutations, seed)
|
124
|
+
rng = gen_rng(seed)
|
125
|
+
shuffles = crand(y, permutations, rng)
|
126
|
+
|
127
|
+
# r is the number of equal to or more extreme samples
|
128
|
+
i_orig = i
|
129
|
+
rs = [0] * i_orig.size
|
130
|
+
shuffles.each_with_index do |perms, idx|
|
131
|
+
ii_orig = i_orig[idx]
|
132
|
+
perms.each do |perm|
|
133
|
+
stat = self.class.new(@scope, @x_field, @y_field, @weights)
|
134
|
+
stat.x = x
|
135
|
+
stat.y = perm
|
136
|
+
ii_new = stat.i_i(idx)
|
137
|
+
|
138
|
+
if ii_orig.positive?
|
139
|
+
rs[idx] += 1 if ii_new >= ii_orig
|
140
|
+
else
|
141
|
+
rs[idx] += 1 if ii_new <= ii_orig
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
rs.map do |ri|
|
147
|
+
(ri + 1.0) / (permutations + 1.0)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def quads
|
152
|
+
# https://github.com/pysal/esda/blob/master/esda/moran.py#L925
|
153
|
+
w = @weights.full
|
154
|
+
z_lag = SpatialStats::Utils::Lag.neighbor_average(w, z)
|
155
|
+
zp = z.map(&:positive?)
|
156
|
+
lp = z_lag.map(&:positive?)
|
157
|
+
|
158
|
+
# hh = zp & lp
|
159
|
+
# lh = zp ^ true & lp
|
160
|
+
# ll = zp ^ true & lp ^ true
|
161
|
+
# hl = zp next to lp ^ true
|
162
|
+
hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
|
163
|
+
lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
|
164
|
+
ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
|
165
|
+
hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
|
166
|
+
|
167
|
+
# now zip lists and map them to proper terms
|
168
|
+
quad_terms = %w[HH LH LL HL]
|
169
|
+
hh.zip(lh, ll, hl).map do |feature|
|
170
|
+
quad_terms[feature.index(true)]
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
|
176
|
+
def w
|
177
|
+
weights.standardized
|
178
|
+
end
|
179
|
+
|
180
|
+
def gen_rng(seed = nil)
|
181
|
+
if seed
|
182
|
+
Random.new(seed)
|
183
|
+
else
|
184
|
+
Random.new
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spatial_stats/local/stat'
|
4
|
+
require 'spatial_stats/local/bivariate_moran'
|
5
|
+
require 'spatial_stats/local/g'
|
6
|
+
require 'spatial_stats/local/geary'
|
7
|
+
require 'spatial_stats/local/moran'
|
8
|
+
require 'spatial_stats/local/multivariate_geary'
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Numo
|
6
|
+
class NArray
|
7
|
+
def row_standardized
|
8
|
+
# every row will sum up to 1, or if they are all 0, do nothing
|
9
|
+
standardized = each_over_axis.map do |row|
|
10
|
+
sum = row.sum
|
11
|
+
if sum.zero?
|
12
|
+
# for some reason, we have to do this instead of just returning
|
13
|
+
# row. If row is returned, it is cast as [0,0,0] => [0,1,0] for
|
14
|
+
# example.
|
15
|
+
self.class.zeros(row.size)
|
16
|
+
else
|
17
|
+
row / sum
|
18
|
+
end
|
19
|
+
end
|
20
|
+
self.class.cast(standardized)
|
21
|
+
end
|
22
|
+
|
23
|
+
def windowed
|
24
|
+
# in windowed calculations, the diagonal is set to 1
|
25
|
+
# if trace (sum of diag) is 0, add it, else return input
|
26
|
+
if trace.zero?
|
27
|
+
self + self.class.eye(shape[0])
|
28
|
+
else
|
29
|
+
self
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Queries
|
5
|
+
module Variables
|
6
|
+
# Module to query for the desired variable from the given scope
|
7
|
+
# and include the primary keys so that the weights matrix
|
8
|
+
# will know that its keys will match up with the variables.
|
9
|
+
def self.query_field(scope, field)
|
10
|
+
klass = scope.klass
|
11
|
+
column = ActiveRecord::Base.connection.quote_column_name(field)
|
12
|
+
primary_key = klass.quoted_primary_key
|
13
|
+
variables = klass.find_by_sql([<<-SQL, scope: scope])
|
14
|
+
WITH scope as (:scope)
|
15
|
+
SELECT scope.#{column} as field FROM scope
|
16
|
+
ORDER BY scope.#{primary_key} ASC
|
17
|
+
SQL
|
18
|
+
variables.map(&:field)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Queries
|
5
|
+
# This provides PostGIS queries for calculating weights/neighbors
|
6
|
+
# of spatial data sets
|
7
|
+
module Weights
|
8
|
+
def self.idw_knn(scope, column, n, alpha)
|
9
|
+
klass = scope.klass
|
10
|
+
column = ActiveRecord::Base.connection.quote_column_name(column)
|
11
|
+
primary_key = klass.quoted_primary_key
|
12
|
+
neighbors = klass.find_by_sql([<<-SQL, scope: scope, n: n])
|
13
|
+
WITH scope as (:scope)
|
14
|
+
SELECT neighbors.*
|
15
|
+
FROM scope AS a
|
16
|
+
CROSS JOIN LATERAL (
|
17
|
+
SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
|
18
|
+
ST_Distance(a.#{column}, b.#{column}) as distance
|
19
|
+
FROM scope as b
|
20
|
+
WHERE a.#{primary_key} <> b.#{primary_key}
|
21
|
+
ORDER BY a.#{column} <-> b.#{column}
|
22
|
+
LIMIT :n
|
23
|
+
) AS neighbors
|
24
|
+
SQL
|
25
|
+
|
26
|
+
# if the lowest distance is <1, then we need to scale
|
27
|
+
# every distance by the factor that makes the lowest 1
|
28
|
+
min_dist = neighbors.map(&:distance).min
|
29
|
+
scale = if min_dist < 1
|
30
|
+
1 / min_dist
|
31
|
+
else
|
32
|
+
1
|
33
|
+
end
|
34
|
+
|
35
|
+
neighbors.map do |neighbor|
|
36
|
+
# formula is 1/(d^alpha)
|
37
|
+
weight = 1.0 / ((scale * neighbor.distance)**alpha)
|
38
|
+
hash = neighbor.as_json.symbolize_keys
|
39
|
+
hash[:weight] = weight
|
40
|
+
hash
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.idw_band(scope, column, bandwidth, alpha = 1)
|
45
|
+
klass = scope.klass
|
46
|
+
column = ActiveRecord::Base.connection.quote_column_name(column)
|
47
|
+
primary_key = klass.quoted_primary_key
|
48
|
+
neighbors = klass.find_by_sql([<<-SQL, scope: scope, bandwidth: bandwidth])
|
49
|
+
WITH neighbors AS (
|
50
|
+
WITH scope AS (:scope)
|
51
|
+
SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
|
52
|
+
ST_DWithin(a.#{column}, b.#{column}, :bandwidth) as is_neighbor,
|
53
|
+
ST_Distance(a.#{column}, b.#{column}) as distance
|
54
|
+
FROM scope as a, scope as b
|
55
|
+
ORDER BY i_id
|
56
|
+
)
|
57
|
+
SELECT * FROM neighbors WHERE is_neighbor = 't' AND i_id <> j_id
|
58
|
+
SQL
|
59
|
+
|
60
|
+
# if the lowest distance is <1, then we need to scale
|
61
|
+
# every distance by the factor that makes the lowest 1
|
62
|
+
min_dist = neighbors.map(&:distance).min
|
63
|
+
scale = if min_dist < 1
|
64
|
+
1 / min_dist
|
65
|
+
else
|
66
|
+
1
|
67
|
+
end
|
68
|
+
|
69
|
+
neighbors.map do |neighbor|
|
70
|
+
# formula is 1/(d^alpha)
|
71
|
+
weight = 1.0 / ((scale * neighbor.distance)**alpha)
|
72
|
+
hash = neighbor.as_json.symbolize_keys
|
73
|
+
hash[:weight] = weight
|
74
|
+
hash
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.knn(scope, column, n)
|
79
|
+
klass = scope.klass
|
80
|
+
column = ActiveRecord::Base.connection.quote_column_name(column)
|
81
|
+
primary_key = klass.quoted_primary_key
|
82
|
+
klass.find_by_sql([<<-SQL, scope: scope, n: n])
|
83
|
+
WITH scope as (:scope)
|
84
|
+
SELECT neighbors.*
|
85
|
+
FROM scope AS a
|
86
|
+
CROSS JOIN LATERAL (
|
87
|
+
SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id
|
88
|
+
FROM scope as b
|
89
|
+
WHERE a.#{primary_key} <> b.#{primary_key}
|
90
|
+
ORDER BY a.#{column} <-> b.#{column}
|
91
|
+
LIMIT :n
|
92
|
+
) AS neighbors
|
93
|
+
SQL
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.distance_band_neighbors(scope, column, bandwidth)
|
97
|
+
klass = scope.klass
|
98
|
+
column = ActiveRecord::Base.connection.quote_column_name(column)
|
99
|
+
primary_key = klass.quoted_primary_key
|
100
|
+
klass.find_by_sql([<<-SQL, scope: scope, distance: bandwidth])
|
101
|
+
WITH neighbors AS (
|
102
|
+
WITH scope AS (:scope)
|
103
|
+
SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
|
104
|
+
ST_DWithin(a.#{column}, b.#{column}, :distance) as is_neighbor
|
105
|
+
FROM scope as a, scope as b
|
106
|
+
ORDER BY i_id
|
107
|
+
)
|
108
|
+
SELECT * FROM neighbors WHERE is_neighbor = 't' AND i_id <> j_id
|
109
|
+
SQL
|
110
|
+
end
|
111
|
+
|
112
|
+
# DE-9IM queen contiguiety = F***T****
|
113
|
+
def self.queen_contiguity_neighbors(scope, column)
|
114
|
+
_contiguity_neighbors(scope, column, 'F***T****')
|
115
|
+
end
|
116
|
+
|
117
|
+
def self.rook_contiguity_neighbors(scope, column)
|
118
|
+
_contiguity_neighbors(scope, column, 'F***1****')
|
119
|
+
end
|
120
|
+
|
121
|
+
def self._contiguity_neighbors(scope, column, pattern)
|
122
|
+
klass = scope.klass
|
123
|
+
column = ActiveRecord::Base.connection.quote_column_name(column)
|
124
|
+
primary_key = klass.quoted_primary_key
|
125
|
+
klass.find_by_sql([<<-SQL, scope: scope])
|
126
|
+
WITH neighbors AS (
|
127
|
+
WITH scope AS (:scope)
|
128
|
+
SELECT a.#{primary_key} as i_id, b.#{primary_key} as j_id,
|
129
|
+
ST_RELATE(a.#{column}, b.#{column}, \'#{pattern}\') as is_neighbor
|
130
|
+
FROM scope as a, scope as b
|
131
|
+
ORDER BY i_id
|
132
|
+
)
|
133
|
+
SELECT * FROM neighbors WHERE is_neighbor = 't'
|
134
|
+
SQL
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
module SpatialStats
|
5
|
+
module Utils
|
6
|
+
module Lag
|
7
|
+
# module for computing spatially lagged variables
|
8
|
+
# from a weights matrix and variable array
|
9
|
+
def self.neighbor_average(matrix, variables)
|
10
|
+
matrix = matrix.row_standardized
|
11
|
+
neighbor_sum(matrix, variables)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.neighbor_sum(matrix, variables)
|
15
|
+
matrix.dot(variables).to_a
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.window_average(matrix, variables)
|
19
|
+
matrix = matrix.windowed.row_standardized
|
20
|
+
window_sum(matrix, variables)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.window_sum(matrix, variables)
|
24
|
+
matrix = matrix.windowed
|
25
|
+
matrix.dot(variables).to_a
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Weights
|
5
|
+
module Contiguous
|
6
|
+
def self.rook(scope, field)
|
7
|
+
p_key = scope.primary_key
|
8
|
+
keys = scope.pluck(p_key).sort
|
9
|
+
|
10
|
+
neighbors = SpatialStats::Queries::Weights
|
11
|
+
.rook_contiguity_neighbors(scope, field)
|
12
|
+
|
13
|
+
neighbors = neighbors.group_by(&:i_id)
|
14
|
+
weights = neighbors.transform_values do |value|
|
15
|
+
value.map do |neighbor|
|
16
|
+
hash = neighbor.as_json(only: [:j_id]).symbolize_keys
|
17
|
+
hash[:weight] = 1
|
18
|
+
hash
|
19
|
+
end
|
20
|
+
end
|
21
|
+
SpatialStats::Weights::WeightsMatrix.new(keys, weights)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.queen(scope, field)
|
25
|
+
p_key = scope.primary_key
|
26
|
+
keys = scope.pluck(p_key).sort
|
27
|
+
|
28
|
+
neighbors = SpatialStats::Queries::Weights
|
29
|
+
.queen_contiguity_neighbors(scope, field)
|
30
|
+
|
31
|
+
neighbors = neighbors.group_by(&:i_id)
|
32
|
+
weights = neighbors.transform_values do |value|
|
33
|
+
value.map do |neighbor|
|
34
|
+
hash = neighbor.as_json(only: [:j_id]).symbolize_keys
|
35
|
+
hash[:weight] = 1
|
36
|
+
hash
|
37
|
+
end
|
38
|
+
end
|
39
|
+
SpatialStats::Weights::WeightsMatrix.new(keys, weights)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|