spatial_stats 0.1.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +185 -9
- data/lib/spatial_stats.rb +7 -4
- data/lib/spatial_stats/enumerable_ext.rb +29 -0
- data/lib/spatial_stats/global.rb +15 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +48 -4
- data/lib/spatial_stats/global/moran.rb +69 -19
- data/lib/spatial_stats/global/stat.rb +29 -17
- data/lib/spatial_stats/local.rb +16 -1
- data/lib/spatial_stats/local/bivariate_moran.rb +45 -4
- data/lib/spatial_stats/local/geary.rb +34 -47
- data/lib/spatial_stats/local/getis_ord.rb +109 -0
- data/lib/spatial_stats/local/moran.rb +55 -22
- data/lib/spatial_stats/local/multivariate_geary.rb +77 -22
- data/lib/spatial_stats/local/stat.rb +160 -88
- data/lib/spatial_stats/narray_ext.rb +27 -0
- data/lib/spatial_stats/queries.rb +6 -0
- data/lib/spatial_stats/queries/variables.rb +16 -3
- data/lib/spatial_stats/queries/weights.rb +91 -9
- data/lib/spatial_stats/utils.rb +7 -0
- data/lib/spatial_stats/utils/lag.rb +34 -2
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights.rb +9 -0
- data/lib/spatial_stats/weights/contiguous.rb +18 -0
- data/lib/spatial_stats/weights/distant.rb +41 -4
- data/lib/spatial_stats/weights/weights_matrix.rb +25 -0
- metadata +5 -4
- data/lib/spatial_stats/local/g.rb +0 -75
@@ -2,8 +2,12 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Global
|
5
|
+
##
|
6
|
+
# Stat is the abstract base class for global stats.
|
7
|
+
# It defines the methods that are common between all classes
|
8
|
+
# and will raise a NotImplementedError on those that are specific
|
9
|
+
# for each type of statistic.
|
5
10
|
class Stat
|
6
|
-
# Base class for global stats
|
7
11
|
def initialize(scope, field, weights)
|
8
12
|
@scope = scope
|
9
13
|
@field = field
|
@@ -11,10 +15,14 @@ module SpatialStats
|
|
11
15
|
end
|
12
16
|
attr_accessor :scope, :field, :weights
|
13
17
|
|
14
|
-
def
|
15
|
-
raise NotImplementedError, 'method
|
18
|
+
def stat
|
19
|
+
raise NotImplementedError, 'method stat not defined'
|
16
20
|
end
|
17
21
|
|
22
|
+
##
|
23
|
+
# The expected value of +#stat+
|
24
|
+
#
|
25
|
+
# @return [Float]
|
18
26
|
def expectation
|
19
27
|
raise NotImplementedError, 'method expectation not implemented'
|
20
28
|
end
|
@@ -23,8 +31,12 @@ module SpatialStats
|
|
23
31
|
raise NotImplementedError, 'method variance not implemented'
|
24
32
|
end
|
25
33
|
|
34
|
+
##
|
35
|
+
# Z-score of the statistic.
|
36
|
+
#
|
37
|
+
# @return [Float] the number of deviations from the mean
|
26
38
|
def z_score
|
27
|
-
(
|
39
|
+
(stat - expectation) / Math.sqrt(variance)
|
28
40
|
end
|
29
41
|
|
30
42
|
def mc(permutations, seed)
|
@@ -35,17 +47,17 @@ module SpatialStats
|
|
35
47
|
end
|
36
48
|
# r is the number of equal to or more extreme samples
|
37
49
|
# one sided
|
38
|
-
|
50
|
+
stat_orig = stat
|
39
51
|
r = 0
|
40
52
|
shuffles.each do |shuffle|
|
41
|
-
|
42
|
-
|
53
|
+
klass = self.class.new(@scope, @field, @weights)
|
54
|
+
klass.x = shuffle
|
43
55
|
|
44
56
|
# https://geodacenter.github.io/glossary.html#ppvalue
|
45
|
-
if
|
46
|
-
r += 1 if stat
|
57
|
+
if stat_orig.positive?
|
58
|
+
r += 1 if klass.stat >= stat_orig
|
47
59
|
else
|
48
|
-
r += 1 if stat
|
60
|
+
r += 1 if klass.stat <= stat_orig
|
49
61
|
end
|
50
62
|
end
|
51
63
|
|
@@ -61,17 +73,17 @@ module SpatialStats
|
|
61
73
|
end
|
62
74
|
|
63
75
|
# r is the number of equal to or more extreme samples
|
64
|
-
|
76
|
+
stat_orig = stat
|
65
77
|
r = 0
|
66
78
|
shuffles.each do |shuffle|
|
67
|
-
|
68
|
-
|
69
|
-
|
79
|
+
klass = self.class.new(@scope, @x_field, @y_field, @weights)
|
80
|
+
klass.x = x
|
81
|
+
klass.y = shuffle
|
70
82
|
|
71
|
-
if
|
72
|
-
r += 1 if stat
|
83
|
+
if stat_orig.positive?
|
84
|
+
r += 1 if klass.stat >= stat_orig
|
73
85
|
else
|
74
|
-
r += 1 if stat
|
86
|
+
r += 1 if klass.stat <= stat_orig
|
75
87
|
end
|
76
88
|
end
|
77
89
|
|
data/lib/spatial_stats/local.rb
CHANGED
@@ -2,7 +2,22 @@
|
|
2
2
|
|
3
3
|
require 'spatial_stats/local/stat'
|
4
4
|
require 'spatial_stats/local/bivariate_moran'
|
5
|
-
require 'spatial_stats/local/g'
|
6
5
|
require 'spatial_stats/local/geary'
|
6
|
+
require 'spatial_stats/local/getis_ord'
|
7
7
|
require 'spatial_stats/local/moran'
|
8
8
|
require 'spatial_stats/local/multivariate_geary'
|
9
|
+
|
10
|
+
module SpatialStats
|
11
|
+
##
|
12
|
+
# The Local module provides functionality for local spatial statistics.
|
13
|
+
# Local spatial statistics describe each location in the dataset with a value,
|
14
|
+
# like how similar or dissimilar each area is to its neighbors.
|
15
|
+
#
|
16
|
+
# All local classes define a +stat+ method that returns the described
|
17
|
+
# statistic and an +mc+ method that runs a permutation test determine a
|
18
|
+
# pseudo p-value for the statistic. Some also define +variance+ and
|
19
|
+
# +z_score+ methods that can be used to calculate p-values if the
|
20
|
+
# distribution is known.
|
21
|
+
module Local
|
22
|
+
end
|
23
|
+
end
|
@@ -2,7 +2,19 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# BivariateMoran computes the local correlation between a variable x and
|
7
|
+
# spatially lagged variable y.
|
5
8
|
class BivariateMoran < Stat
|
9
|
+
##
|
10
|
+
# A new instance of BivariateMoran
|
11
|
+
#
|
12
|
+
# @param [ActiveRecord::Relation] scope
|
13
|
+
# @param [Symbol, String] x_field to query from scope
|
14
|
+
# @param [Symbol, String] y_field to query from scope
|
15
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
16
|
+
#
|
17
|
+
# @return [Moran]
|
6
18
|
def initialize(scope, x_field, y_field, weights)
|
7
19
|
@scope = scope
|
8
20
|
@x_field = x_field
|
@@ -10,18 +22,42 @@ module SpatialStats
|
|
10
22
|
@weights = weights
|
11
23
|
end
|
12
24
|
attr_accessor :scope, :x_field, :y_field, :weights
|
13
|
-
attr_writer :x, :y
|
14
25
|
|
15
|
-
|
26
|
+
##
|
27
|
+
# Computes the local indicator of spatial correlation for
|
28
|
+
# x against lagged y.
|
29
|
+
#
|
30
|
+
# @return [Array] of correlations for each observation.
|
31
|
+
def stat
|
16
32
|
x.each_with_index.map do |_xi, idx|
|
17
|
-
|
33
|
+
stat_i(idx)
|
18
34
|
end
|
19
35
|
end
|
36
|
+
alias i stat
|
20
37
|
|
21
|
-
|
38
|
+
##
|
39
|
+
# Computes Bivariate Moran's I at a single index. Multiplies x at
|
40
|
+
# this index by the lagged y value at this index.
|
41
|
+
#
|
42
|
+
# @param [Integer] idx to perfrom the calculation on
|
43
|
+
#
|
44
|
+
# @return [Float] correlation at idx
|
45
|
+
def stat_i(idx)
|
22
46
|
x[idx] * y_lag[idx]
|
23
47
|
end
|
24
48
|
|
49
|
+
##
|
50
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
51
|
+
# Shuffles y values, hold x values, recomputes +#stat+ for each variation,
|
52
|
+
# then compares to the computed one. The ratio of more extreme values to
|
53
|
+
# permutations is returned for each observation.
|
54
|
+
#
|
55
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
56
|
+
#
|
57
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
58
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
59
|
+
#
|
60
|
+
# @return [Array] of p-values
|
25
61
|
def mc(permutations = 99, seed = nil)
|
26
62
|
mc_bv(permutations, seed)
|
27
63
|
end
|
@@ -38,6 +74,11 @@ module SpatialStats
|
|
38
74
|
|
39
75
|
private
|
40
76
|
|
77
|
+
def mc_i(wi, perms, idx)
|
78
|
+
y_lag_i = (wi * perms).sum(1)
|
79
|
+
x[idx] * y_lag_i
|
80
|
+
end
|
81
|
+
|
41
82
|
def y_lag
|
42
83
|
@y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
|
43
84
|
end
|
@@ -2,71 +2,58 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# Geary's C statistic computes the spatial lag of the difference between
|
7
|
+
# variable zi and it's neighbors squared, in the set z. The local version
|
8
|
+
# returns a value for each entry.
|
5
9
|
class Geary < Stat
|
10
|
+
##
|
11
|
+
# A new instance of Geary
|
12
|
+
#
|
13
|
+
# @param [ActiveRecord::Relation] scope
|
14
|
+
# @param [Symbol, String] field to query from scope
|
15
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
16
|
+
#
|
17
|
+
# @return [Geary]
|
6
18
|
def initialize(scope, field, weights)
|
7
19
|
super(scope, field, weights)
|
8
20
|
end
|
9
|
-
attr_writer :x
|
10
21
|
|
11
|
-
|
22
|
+
##
|
23
|
+
# Computes Geary's C for every observation in the +scoe+.
|
24
|
+
# Geary's C is defined as the square distance between
|
25
|
+
# an observation and it's neighbors, factored to their weights.
|
26
|
+
#
|
27
|
+
# @return [Array] the C value for each observation
|
28
|
+
def stat
|
12
29
|
z.each_with_index.map do |_zi, idx|
|
13
|
-
|
30
|
+
stat_i(idx)
|
14
31
|
end
|
15
32
|
end
|
33
|
+
alias c stat
|
16
34
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
sum += w[idx, j] * ((z[idx] - z[j])**2)
|
22
|
-
end
|
23
|
-
sum
|
24
|
-
end
|
25
|
-
|
35
|
+
##
|
36
|
+
# Values of the +field+ queried from the +scope+
|
37
|
+
#
|
38
|
+
# @return [Array]
|
26
39
|
def x
|
27
40
|
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
28
41
|
.standardize
|
29
42
|
end
|
30
43
|
alias z x
|
31
44
|
|
32
|
-
|
33
|
-
# For local tests, we need to shuffle the values
|
34
|
-
# but for each item, hold its value in place and shuffle
|
35
|
-
# its neighbors. Then we will only test for that item instead
|
36
|
-
# of the entire set. This will be done for each item.
|
37
|
-
rng = gen_rng(seed)
|
38
|
-
shuffles = crand(x, permutations, rng)
|
39
|
-
|
40
|
-
# r is the number of equal to or more extreme samples
|
41
|
-
i_orig = i
|
42
|
-
rs = [0] * i_orig.size
|
43
|
-
|
44
|
-
shuffles.each_with_index do |perms, idx|
|
45
|
-
ii_orig = i_orig[idx]
|
46
|
-
perms.each do |perm|
|
47
|
-
stat = self.class.new(scope, field, weights)
|
48
|
-
stat.x = perm
|
49
|
-
ii_new = stat.i_i(idx)
|
50
|
-
|
51
|
-
# https://geodacenter.github.io/glossary.html#ppvalue
|
52
|
-
# NOTE: this is inconsistent with the output from GeoDa
|
53
|
-
# for local permutation tests, they seem to use greater than
|
54
|
-
# not greater than or equal to. I'm going to go by the definition
|
55
|
-
# in the glossary for now.
|
56
|
-
if ii_orig.positive?
|
57
|
-
rs[idx] += 1 if ii_new >= ii_orig
|
58
|
-
else
|
59
|
-
rs[idx] += 1 if ii_new <= ii_orig
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
45
|
+
private
|
63
46
|
|
64
|
-
|
65
|
-
|
66
|
-
|
47
|
+
def stat_i(idx)
|
48
|
+
zs = Numo::DFloat.cast(z)
|
49
|
+
zi = (z[idx] - zs)**2
|
50
|
+
(w[idx, true] * zi).sum
|
67
51
|
end
|
68
52
|
|
69
|
-
|
53
|
+
def mc_i(wi, perms, idx)
|
54
|
+
zi = (z[idx] - perms)**2
|
55
|
+
(wi * zi).sum(1)
|
56
|
+
end
|
70
57
|
|
71
58
|
def w
|
72
59
|
@w ||= weights.full.row_standardized
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Local
|
5
|
+
##
|
6
|
+
# GetisOrd's G and G* statistics compute the spatial autocorrelation of a
|
7
|
+
# variable, x. G computes the ratio of spatially lagged x to the sum of all
|
8
|
+
# other x's except xi for every entry. G* does the same calculation but
|
9
|
+
# includes xi in the spatial lag and denominator.
|
10
|
+
class GetisOrd < Stat
|
11
|
+
##
|
12
|
+
# A new instance of GetisOrd
|
13
|
+
#
|
14
|
+
# @param [ActiveRecord::Relation] scope
|
15
|
+
# @param [Symbol, String] field to query from scope
|
16
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
17
|
+
#
|
18
|
+
# @return [GetisOrd]
|
19
|
+
def initialize(scope, field, weights, star = nil)
|
20
|
+
super(scope, field, weights)
|
21
|
+
@star = star
|
22
|
+
end
|
23
|
+
attr_accessor :star
|
24
|
+
|
25
|
+
##
|
26
|
+
# Computes the G or G* statistic for every observation in x.
|
27
|
+
#
|
28
|
+
# @return [Array] of autocorrelations for each observation.
|
29
|
+
def stat
|
30
|
+
x.each_with_index.map do |_x_val, idx|
|
31
|
+
stat_i(idx)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
alias g stat
|
35
|
+
|
36
|
+
##
|
37
|
+
# Values of the +field+ queried from the +scope+
|
38
|
+
#
|
39
|
+
# @return [Array]
|
40
|
+
def x
|
41
|
+
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
42
|
+
end
|
43
|
+
alias z x
|
44
|
+
|
45
|
+
##
|
46
|
+
# True if G* is being used, false if G is being used.
|
47
|
+
# If no value is passed in the constructor, it will be determined
|
48
|
+
# based off of the trace of the weights.
|
49
|
+
#
|
50
|
+
# @return [Boolean] of star
|
51
|
+
def star?
|
52
|
+
if @star.nil?
|
53
|
+
@star = weights.full.trace.positive?
|
54
|
+
else
|
55
|
+
@star
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def stat_i(idx)
|
62
|
+
x_lag[idx] / denominators[idx]
|
63
|
+
end
|
64
|
+
|
65
|
+
def mc_i(wi, perms, idx)
|
66
|
+
x_lag_i = (wi * perms).sum(1)
|
67
|
+
x_lag_i / denominators[idx]
|
68
|
+
end
|
69
|
+
|
70
|
+
def w
|
71
|
+
@w ||= begin
|
72
|
+
if star?
|
73
|
+
weights.full.windowed.row_standardized
|
74
|
+
else
|
75
|
+
weights.standardized
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def z_lag
|
81
|
+
# window if star is true
|
82
|
+
@z_lag ||= begin
|
83
|
+
if star?
|
84
|
+
SpatialStats::Utils::Lag.window_sum(w, x)
|
85
|
+
else
|
86
|
+
SpatialStats::Utils::Lag.neighbor_sum(w, x)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
alias x_lag z_lag
|
91
|
+
|
92
|
+
def denominators
|
93
|
+
@denominators ||= begin
|
94
|
+
n = w.shape[0]
|
95
|
+
if star?
|
96
|
+
[x.sum] * n
|
97
|
+
else
|
98
|
+
# add everything but i
|
99
|
+
(0..n - 1).each.map do |idx|
|
100
|
+
terms = x.dup
|
101
|
+
terms.delete_at(idx)
|
102
|
+
terms.sum
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -1,36 +1,44 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
|
4
|
-
# For now, instead of doing neighbor's variance (Si**2), I'm going to use
|
5
|
-
# the total sample variance. This is how GeoDa does it, but is different
|
6
|
-
# than arcgis. This shouldn't affect the expectation and variance of I.
|
7
3
|
module SpatialStats
|
8
4
|
module Local
|
5
|
+
##
|
6
|
+
# Moran's I statistic computes the spatial autocorrelation of variable x.
|
7
|
+
# It does this by computing a spatially lagged version of itself and
|
8
|
+
# comparing that with each observation based on the weights matrix.
|
9
|
+
# The local version returns the spatial autocorrelation for each
|
10
|
+
# observation in the dataset.
|
9
11
|
class Moran < Stat
|
12
|
+
##
|
13
|
+
# A new instance of Moran
|
14
|
+
#
|
15
|
+
# @param [ActiveRecord::Relation] scope
|
16
|
+
# @param [Symbol, String] field to query from scope
|
17
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
18
|
+
#
|
19
|
+
# @return [Moran]
|
10
20
|
def initialize(scope, field, weights)
|
11
21
|
super(scope, field, weights)
|
12
|
-
@scope = scope
|
13
|
-
@field = field
|
14
|
-
@weights = weights
|
15
22
|
end
|
16
|
-
attr_writer :x, :z_lag
|
17
23
|
|
18
|
-
|
24
|
+
##
|
25
|
+
# Computes the local indicator of spatial autocorrelation (lisa) for
|
26
|
+
# x against lagged x.
|
27
|
+
#
|
28
|
+
# @return [Array] of autocorrelations for each observation.
|
29
|
+
def stat
|
19
30
|
z.each_with_index.map do |_z_val, idx|
|
20
|
-
|
31
|
+
stat_i(idx)
|
21
32
|
end
|
22
33
|
end
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
(z[idx] / si2) * sum_term
|
32
|
-
end
|
33
|
-
|
34
|
+
alias i stat
|
35
|
+
|
36
|
+
##
|
37
|
+
# Expected value of I for each observation. Since the weights matrix
|
38
|
+
# is standardized during the calculation, the expectation is the same for
|
39
|
+
# each observation.
|
40
|
+
#
|
41
|
+
# @return [Float]
|
34
42
|
def expectation
|
35
43
|
# since we are using row standardized weights, the expectation
|
36
44
|
# will just be -1/(n-1) for all items. Otherwise, it would be
|
@@ -39,6 +47,12 @@ module SpatialStats
|
|
39
47
|
-1.0 / (@weights.n - 1)
|
40
48
|
end
|
41
49
|
|
50
|
+
##
|
51
|
+
# Variance of I for each observation.
|
52
|
+
#
|
53
|
+
# @see https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
|
54
|
+
#
|
55
|
+
# @return [Array] of variances for each observation
|
42
56
|
def variance
|
43
57
|
# formula is A - B - (E[I])**2
|
44
58
|
wt = w.row_standardized
|
@@ -54,14 +68,21 @@ module SpatialStats
|
|
54
68
|
vars
|
55
69
|
end
|
56
70
|
|
71
|
+
##
|
72
|
+
# Values of the +field+ queried from the +scope+
|
73
|
+
#
|
74
|
+
# @return [Array]
|
57
75
|
def x
|
58
76
|
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
59
77
|
.standardize
|
60
78
|
end
|
61
79
|
alias z x
|
62
80
|
|
81
|
+
##
|
82
|
+
# Spatially lagged x variable at each observation.
|
83
|
+
#
|
84
|
+
# @return [Array]
|
63
85
|
def z_lag
|
64
|
-
# can't memoize yet because of mc testing
|
65
86
|
# w is already row_standardized, so we are using
|
66
87
|
# neighbor sum instead of neighbor_average to save cost
|
67
88
|
@z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
|
@@ -69,6 +90,18 @@ module SpatialStats
|
|
69
90
|
|
70
91
|
private
|
71
92
|
|
93
|
+
def stat_i(idx)
|
94
|
+
sum_term = z_lag[idx]
|
95
|
+
(z[idx] / si2) * sum_term
|
96
|
+
end
|
97
|
+
|
98
|
+
def mc_i(wi, perms, idx)
|
99
|
+
# compute i for a single index given DFloat of neighbor weights
|
100
|
+
# and DFloat of neighbor z perms
|
101
|
+
z_lag_i = (wi * perms).sum(1)
|
102
|
+
z[idx] * z_lag_i
|
103
|
+
end
|
104
|
+
|
72
105
|
def si2
|
73
106
|
# @si2 ||= z.sample_variance
|
74
107
|
# we standardize so sample_variance is 1
|