spatial_stats 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +185 -9
- data/lib/spatial_stats.rb +7 -4
- data/lib/spatial_stats/enumerable_ext.rb +29 -0
- data/lib/spatial_stats/global.rb +15 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +48 -4
- data/lib/spatial_stats/global/moran.rb +69 -19
- data/lib/spatial_stats/global/stat.rb +29 -17
- data/lib/spatial_stats/local.rb +16 -1
- data/lib/spatial_stats/local/bivariate_moran.rb +45 -4
- data/lib/spatial_stats/local/geary.rb +34 -47
- data/lib/spatial_stats/local/getis_ord.rb +109 -0
- data/lib/spatial_stats/local/moran.rb +55 -22
- data/lib/spatial_stats/local/multivariate_geary.rb +77 -22
- data/lib/spatial_stats/local/stat.rb +160 -88
- data/lib/spatial_stats/narray_ext.rb +27 -0
- data/lib/spatial_stats/queries.rb +6 -0
- data/lib/spatial_stats/queries/variables.rb +16 -3
- data/lib/spatial_stats/queries/weights.rb +91 -9
- data/lib/spatial_stats/utils.rb +7 -0
- data/lib/spatial_stats/utils/lag.rb +34 -2
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights.rb +9 -0
- data/lib/spatial_stats/weights/contiguous.rb +18 -0
- data/lib/spatial_stats/weights/distant.rb +41 -4
- data/lib/spatial_stats/weights/weights_matrix.rb +25 -0
- metadata +5 -4
- data/lib/spatial_stats/local/g.rb +0 -75
@@ -2,8 +2,12 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Global
|
5
|
+
##
|
6
|
+
# Stat is the abstract base class for global stats.
|
7
|
+
# It defines the methods that are common between all classes
|
8
|
+
# and will raise a NotImplementedError on those that are specific
|
9
|
+
# for each type of statistic.
|
5
10
|
class Stat
|
6
|
-
# Base class for global stats
|
7
11
|
def initialize(scope, field, weights)
|
8
12
|
@scope = scope
|
9
13
|
@field = field
|
@@ -11,10 +15,14 @@ module SpatialStats
|
|
11
15
|
end
|
12
16
|
attr_accessor :scope, :field, :weights
|
13
17
|
|
14
|
-
def
|
15
|
-
raise NotImplementedError, 'method
|
18
|
+
def stat
|
19
|
+
raise NotImplementedError, 'method stat not defined'
|
16
20
|
end
|
17
21
|
|
22
|
+
##
|
23
|
+
# The expected value of +#stat+
|
24
|
+
#
|
25
|
+
# @return [Float]
|
18
26
|
def expectation
|
19
27
|
raise NotImplementedError, 'method expectation not implemented'
|
20
28
|
end
|
@@ -23,8 +31,12 @@ module SpatialStats
|
|
23
31
|
raise NotImplementedError, 'method variance not implemented'
|
24
32
|
end
|
25
33
|
|
34
|
+
##
|
35
|
+
# Z-score of the statistic.
|
36
|
+
#
|
37
|
+
# @return [Float] the number of deviations from the mean
|
26
38
|
def z_score
|
27
|
-
(
|
39
|
+
(stat - expectation) / Math.sqrt(variance)
|
28
40
|
end
|
29
41
|
|
30
42
|
def mc(permutations, seed)
|
@@ -35,17 +47,17 @@ module SpatialStats
|
|
35
47
|
end
|
36
48
|
# r is the number of equal to or more extreme samples
|
37
49
|
# one sided
|
38
|
-
|
50
|
+
stat_orig = stat
|
39
51
|
r = 0
|
40
52
|
shuffles.each do |shuffle|
|
41
|
-
|
42
|
-
|
53
|
+
klass = self.class.new(@scope, @field, @weights)
|
54
|
+
klass.x = shuffle
|
43
55
|
|
44
56
|
# https://geodacenter.github.io/glossary.html#ppvalue
|
45
|
-
if
|
46
|
-
r += 1 if stat
|
57
|
+
if stat_orig.positive?
|
58
|
+
r += 1 if klass.stat >= stat_orig
|
47
59
|
else
|
48
|
-
r += 1 if stat
|
60
|
+
r += 1 if klass.stat <= stat_orig
|
49
61
|
end
|
50
62
|
end
|
51
63
|
|
@@ -61,17 +73,17 @@ module SpatialStats
|
|
61
73
|
end
|
62
74
|
|
63
75
|
# r is the number of equal to or more extreme samples
|
64
|
-
|
76
|
+
stat_orig = stat
|
65
77
|
r = 0
|
66
78
|
shuffles.each do |shuffle|
|
67
|
-
|
68
|
-
|
69
|
-
|
79
|
+
klass = self.class.new(@scope, @x_field, @y_field, @weights)
|
80
|
+
klass.x = x
|
81
|
+
klass.y = shuffle
|
70
82
|
|
71
|
-
if
|
72
|
-
r += 1 if stat
|
83
|
+
if stat_orig.positive?
|
84
|
+
r += 1 if klass.stat >= stat_orig
|
73
85
|
else
|
74
|
-
r += 1 if stat
|
86
|
+
r += 1 if klass.stat <= stat_orig
|
75
87
|
end
|
76
88
|
end
|
77
89
|
|
data/lib/spatial_stats/local.rb
CHANGED
@@ -2,7 +2,22 @@
|
|
2
2
|
|
3
3
|
require 'spatial_stats/local/stat'
|
4
4
|
require 'spatial_stats/local/bivariate_moran'
|
5
|
-
require 'spatial_stats/local/g'
|
6
5
|
require 'spatial_stats/local/geary'
|
6
|
+
require 'spatial_stats/local/getis_ord'
|
7
7
|
require 'spatial_stats/local/moran'
|
8
8
|
require 'spatial_stats/local/multivariate_geary'
|
9
|
+
|
10
|
+
module SpatialStats
|
11
|
+
##
|
12
|
+
# The Local module provides functionality for local spatial statistics.
|
13
|
+
# Local spatial statistics describe each location in the dataset with a value,
|
14
|
+
# like how similar or dissimilar each area is to its neighbors.
|
15
|
+
#
|
16
|
+
# All local classes define a +stat+ method that returns the described
|
17
|
+
# statistic and an +mc+ method that runs a permutation test determine a
|
18
|
+
# pseudo p-value for the statistic. Some also define +variance+ and
|
19
|
+
# +z_score+ methods that can be used to calculate p-values if the
|
20
|
+
# distribution is known.
|
21
|
+
module Local
|
22
|
+
end
|
23
|
+
end
|
@@ -2,7 +2,19 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# BivariateMoran computes the local correlation between a variable x and
|
7
|
+
# spatially lagged variable y.
|
5
8
|
class BivariateMoran < Stat
|
9
|
+
##
|
10
|
+
# A new instance of BivariateMoran
|
11
|
+
#
|
12
|
+
# @param [ActiveRecord::Relation] scope
|
13
|
+
# @param [Symbol, String] x_field to query from scope
|
14
|
+
# @param [Symbol, String] y_field to query from scope
|
15
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
16
|
+
#
|
17
|
+
# @return [Moran]
|
6
18
|
def initialize(scope, x_field, y_field, weights)
|
7
19
|
@scope = scope
|
8
20
|
@x_field = x_field
|
@@ -10,18 +22,42 @@ module SpatialStats
|
|
10
22
|
@weights = weights
|
11
23
|
end
|
12
24
|
attr_accessor :scope, :x_field, :y_field, :weights
|
13
|
-
attr_writer :x, :y
|
14
25
|
|
15
|
-
|
26
|
+
##
|
27
|
+
# Computes the local indicator of spatial correlation for
|
28
|
+
# x against lagged y.
|
29
|
+
#
|
30
|
+
# @return [Array] of correlations for each observation.
|
31
|
+
def stat
|
16
32
|
x.each_with_index.map do |_xi, idx|
|
17
|
-
|
33
|
+
stat_i(idx)
|
18
34
|
end
|
19
35
|
end
|
36
|
+
alias i stat
|
20
37
|
|
21
|
-
|
38
|
+
##
|
39
|
+
# Computes Bivariate Moran's I at a single index. Multiplies x at
|
40
|
+
# this index by the lagged y value at this index.
|
41
|
+
#
|
42
|
+
# @param [Integer] idx to perfrom the calculation on
|
43
|
+
#
|
44
|
+
# @return [Float] correlation at idx
|
45
|
+
def stat_i(idx)
|
22
46
|
x[idx] * y_lag[idx]
|
23
47
|
end
|
24
48
|
|
49
|
+
##
|
50
|
+
# Permutation test to determine a pseudo p-values of the +#stat+ method.
|
51
|
+
# Shuffles y values, hold x values, recomputes +#stat+ for each variation,
|
52
|
+
# then compares to the computed one. The ratio of more extreme values to
|
53
|
+
# permutations is returned for each observation.
|
54
|
+
#
|
55
|
+
# @see https://geodacenter.github.io/glossary.html#perm
|
56
|
+
#
|
57
|
+
# @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
|
58
|
+
# @param [Integer] seed used in random number generator for shuffles.
|
59
|
+
#
|
60
|
+
# @return [Array] of p-values
|
25
61
|
def mc(permutations = 99, seed = nil)
|
26
62
|
mc_bv(permutations, seed)
|
27
63
|
end
|
@@ -38,6 +74,11 @@ module SpatialStats
|
|
38
74
|
|
39
75
|
private
|
40
76
|
|
77
|
+
def mc_i(wi, perms, idx)
|
78
|
+
y_lag_i = (wi * perms).sum(1)
|
79
|
+
x[idx] * y_lag_i
|
80
|
+
end
|
81
|
+
|
41
82
|
def y_lag
|
42
83
|
@y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
|
43
84
|
end
|
@@ -2,71 +2,58 @@
|
|
2
2
|
|
3
3
|
module SpatialStats
|
4
4
|
module Local
|
5
|
+
##
|
6
|
+
# Geary's C statistic computes the spatial lag of the difference between
|
7
|
+
# variable zi and it's neighbors squared, in the set z. The local version
|
8
|
+
# returns a value for each entry.
|
5
9
|
class Geary < Stat
|
10
|
+
##
|
11
|
+
# A new instance of Geary
|
12
|
+
#
|
13
|
+
# @param [ActiveRecord::Relation] scope
|
14
|
+
# @param [Symbol, String] field to query from scope
|
15
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
16
|
+
#
|
17
|
+
# @return [Geary]
|
6
18
|
def initialize(scope, field, weights)
|
7
19
|
super(scope, field, weights)
|
8
20
|
end
|
9
|
-
attr_writer :x
|
10
21
|
|
11
|
-
|
22
|
+
##
|
23
|
+
# Computes Geary's C for every observation in the +scoe+.
|
24
|
+
# Geary's C is defined as the square distance between
|
25
|
+
# an observation and it's neighbors, factored to their weights.
|
26
|
+
#
|
27
|
+
# @return [Array] the C value for each observation
|
28
|
+
def stat
|
12
29
|
z.each_with_index.map do |_zi, idx|
|
13
|
-
|
30
|
+
stat_i(idx)
|
14
31
|
end
|
15
32
|
end
|
33
|
+
alias c stat
|
16
34
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
sum += w[idx, j] * ((z[idx] - z[j])**2)
|
22
|
-
end
|
23
|
-
sum
|
24
|
-
end
|
25
|
-
|
35
|
+
##
|
36
|
+
# Values of the +field+ queried from the +scope+
|
37
|
+
#
|
38
|
+
# @return [Array]
|
26
39
|
def x
|
27
40
|
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
28
41
|
.standardize
|
29
42
|
end
|
30
43
|
alias z x
|
31
44
|
|
32
|
-
|
33
|
-
# For local tests, we need to shuffle the values
|
34
|
-
# but for each item, hold its value in place and shuffle
|
35
|
-
# its neighbors. Then we will only test for that item instead
|
36
|
-
# of the entire set. This will be done for each item.
|
37
|
-
rng = gen_rng(seed)
|
38
|
-
shuffles = crand(x, permutations, rng)
|
39
|
-
|
40
|
-
# r is the number of equal to or more extreme samples
|
41
|
-
i_orig = i
|
42
|
-
rs = [0] * i_orig.size
|
43
|
-
|
44
|
-
shuffles.each_with_index do |perms, idx|
|
45
|
-
ii_orig = i_orig[idx]
|
46
|
-
perms.each do |perm|
|
47
|
-
stat = self.class.new(scope, field, weights)
|
48
|
-
stat.x = perm
|
49
|
-
ii_new = stat.i_i(idx)
|
50
|
-
|
51
|
-
# https://geodacenter.github.io/glossary.html#ppvalue
|
52
|
-
# NOTE: this is inconsistent with the output from GeoDa
|
53
|
-
# for local permutation tests, they seem to use greater than
|
54
|
-
# not greater than or equal to. I'm going to go by the definition
|
55
|
-
# in the glossary for now.
|
56
|
-
if ii_orig.positive?
|
57
|
-
rs[idx] += 1 if ii_new >= ii_orig
|
58
|
-
else
|
59
|
-
rs[idx] += 1 if ii_new <= ii_orig
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
45
|
+
private
|
63
46
|
|
64
|
-
|
65
|
-
|
66
|
-
|
47
|
+
def stat_i(idx)
|
48
|
+
zs = Numo::DFloat.cast(z)
|
49
|
+
zi = (z[idx] - zs)**2
|
50
|
+
(w[idx, true] * zi).sum
|
67
51
|
end
|
68
52
|
|
69
|
-
|
53
|
+
def mc_i(wi, perms, idx)
|
54
|
+
zi = (z[idx] - perms)**2
|
55
|
+
(wi * zi).sum(1)
|
56
|
+
end
|
70
57
|
|
71
58
|
def w
|
72
59
|
@w ||= weights.full.row_standardized
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SpatialStats
|
4
|
+
module Local
|
5
|
+
##
|
6
|
+
# GetisOrd's G and G* statistics compute the spatial autocorrelation of a
|
7
|
+
# variable, x. G computes the ratio of spatially lagged x to the sum of all
|
8
|
+
# other x's except xi for every entry. G* does the same calculation but
|
9
|
+
# includes xi in the spatial lag and denominator.
|
10
|
+
class GetisOrd < Stat
|
11
|
+
##
|
12
|
+
# A new instance of GetisOrd
|
13
|
+
#
|
14
|
+
# @param [ActiveRecord::Relation] scope
|
15
|
+
# @param [Symbol, String] field to query from scope
|
16
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
17
|
+
#
|
18
|
+
# @return [GetisOrd]
|
19
|
+
def initialize(scope, field, weights, star = nil)
|
20
|
+
super(scope, field, weights)
|
21
|
+
@star = star
|
22
|
+
end
|
23
|
+
attr_accessor :star
|
24
|
+
|
25
|
+
##
|
26
|
+
# Computes the G or G* statistic for every observation in x.
|
27
|
+
#
|
28
|
+
# @return [Array] of autocorrelations for each observation.
|
29
|
+
def stat
|
30
|
+
x.each_with_index.map do |_x_val, idx|
|
31
|
+
stat_i(idx)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
alias g stat
|
35
|
+
|
36
|
+
##
|
37
|
+
# Values of the +field+ queried from the +scope+
|
38
|
+
#
|
39
|
+
# @return [Array]
|
40
|
+
def x
|
41
|
+
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
42
|
+
end
|
43
|
+
alias z x
|
44
|
+
|
45
|
+
##
|
46
|
+
# True if G* is being used, false if G is being used.
|
47
|
+
# If no value is passed in the constructor, it will be determined
|
48
|
+
# based off of the trace of the weights.
|
49
|
+
#
|
50
|
+
# @return [Boolean] of star
|
51
|
+
def star?
|
52
|
+
if @star.nil?
|
53
|
+
@star = weights.full.trace.positive?
|
54
|
+
else
|
55
|
+
@star
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def stat_i(idx)
|
62
|
+
x_lag[idx] / denominators[idx]
|
63
|
+
end
|
64
|
+
|
65
|
+
def mc_i(wi, perms, idx)
|
66
|
+
x_lag_i = (wi * perms).sum(1)
|
67
|
+
x_lag_i / denominators[idx]
|
68
|
+
end
|
69
|
+
|
70
|
+
def w
|
71
|
+
@w ||= begin
|
72
|
+
if star?
|
73
|
+
weights.full.windowed.row_standardized
|
74
|
+
else
|
75
|
+
weights.standardized
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def z_lag
|
81
|
+
# window if star is true
|
82
|
+
@z_lag ||= begin
|
83
|
+
if star?
|
84
|
+
SpatialStats::Utils::Lag.window_sum(w, x)
|
85
|
+
else
|
86
|
+
SpatialStats::Utils::Lag.neighbor_sum(w, x)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
alias x_lag z_lag
|
91
|
+
|
92
|
+
def denominators
|
93
|
+
@denominators ||= begin
|
94
|
+
n = w.shape[0]
|
95
|
+
if star?
|
96
|
+
[x.sum] * n
|
97
|
+
else
|
98
|
+
# add everything but i
|
99
|
+
(0..n - 1).each.map do |idx|
|
100
|
+
terms = x.dup
|
101
|
+
terms.delete_at(idx)
|
102
|
+
terms.sum
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -1,36 +1,44 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-cluster-and-outlier-analysis-anselin-local-m.htm
|
4
|
-
# For now, instead of doing neighbor's variance (Si**2), I'm going to use
|
5
|
-
# the total sample variance. This is how GeoDa does it, but is different
|
6
|
-
# than arcgis. This shouldn't affect the expectation and variance of I.
|
7
3
|
module SpatialStats
|
8
4
|
module Local
|
5
|
+
##
|
6
|
+
# Moran's I statistic computes the spatial autocorrelation of variable x.
|
7
|
+
# It does this by computing a spatially lagged version of itself and
|
8
|
+
# comparing that with each observation based on the weights matrix.
|
9
|
+
# The local version returns the spatial autocorrelation for each
|
10
|
+
# observation in the dataset.
|
9
11
|
class Moran < Stat
|
12
|
+
##
|
13
|
+
# A new instance of Moran
|
14
|
+
#
|
15
|
+
# @param [ActiveRecord::Relation] scope
|
16
|
+
# @param [Symbol, String] field to query from scope
|
17
|
+
# @param [WeightsMatrix] weights to define relationship between observations in scope
|
18
|
+
#
|
19
|
+
# @return [Moran]
|
10
20
|
def initialize(scope, field, weights)
|
11
21
|
super(scope, field, weights)
|
12
|
-
@scope = scope
|
13
|
-
@field = field
|
14
|
-
@weights = weights
|
15
22
|
end
|
16
|
-
attr_writer :x, :z_lag
|
17
23
|
|
18
|
-
|
24
|
+
##
|
25
|
+
# Computes the local indicator of spatial autocorrelation (lisa) for
|
26
|
+
# x against lagged x.
|
27
|
+
#
|
28
|
+
# @return [Array] of autocorrelations for each observation.
|
29
|
+
def stat
|
19
30
|
z.each_with_index.map do |_z_val, idx|
|
20
|
-
|
31
|
+
stat_i(idx)
|
21
32
|
end
|
22
33
|
end
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
(z[idx] / si2) * sum_term
|
32
|
-
end
|
33
|
-
|
34
|
+
alias i stat
|
35
|
+
|
36
|
+
##
|
37
|
+
# Expected value of I for each observation. Since the weights matrix
|
38
|
+
# is standardized during the calculation, the expectation is the same for
|
39
|
+
# each observation.
|
40
|
+
#
|
41
|
+
# @return [Float]
|
34
42
|
def expectation
|
35
43
|
# since we are using row standardized weights, the expectation
|
36
44
|
# will just be -1/(n-1) for all items. Otherwise, it would be
|
@@ -39,6 +47,12 @@ module SpatialStats
|
|
39
47
|
-1.0 / (@weights.n - 1)
|
40
48
|
end
|
41
49
|
|
50
|
+
##
|
51
|
+
# Variance of I for each observation.
|
52
|
+
#
|
53
|
+
# @see https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
|
54
|
+
#
|
55
|
+
# @return [Array] of variances for each observation
|
42
56
|
def variance
|
43
57
|
# formula is A - B - (E[I])**2
|
44
58
|
wt = w.row_standardized
|
@@ -54,14 +68,21 @@ module SpatialStats
|
|
54
68
|
vars
|
55
69
|
end
|
56
70
|
|
71
|
+
##
|
72
|
+
# Values of the +field+ queried from the +scope+
|
73
|
+
#
|
74
|
+
# @return [Array]
|
57
75
|
def x
|
58
76
|
@x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
|
59
77
|
.standardize
|
60
78
|
end
|
61
79
|
alias z x
|
62
80
|
|
81
|
+
##
|
82
|
+
# Spatially lagged x variable at each observation.
|
83
|
+
#
|
84
|
+
# @return [Array]
|
63
85
|
def z_lag
|
64
|
-
# can't memoize yet because of mc testing
|
65
86
|
# w is already row_standardized, so we are using
|
66
87
|
# neighbor sum instead of neighbor_average to save cost
|
67
88
|
@z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
|
@@ -69,6 +90,18 @@ module SpatialStats
|
|
69
90
|
|
70
91
|
private
|
71
92
|
|
93
|
+
def stat_i(idx)
|
94
|
+
sum_term = z_lag[idx]
|
95
|
+
(z[idx] / si2) * sum_term
|
96
|
+
end
|
97
|
+
|
98
|
+
def mc_i(wi, perms, idx)
|
99
|
+
# compute i for a single index given DFloat of neighbor weights
|
100
|
+
# and DFloat of neighbor z perms
|
101
|
+
z_lag_i = (wi * perms).sum(1)
|
102
|
+
z[idx] * z_lag_i
|
103
|
+
end
|
104
|
+
|
72
105
|
def si2
|
73
106
|
# @si2 ||= z.sample_variance
|
74
107
|
# we standardize so sample_variance is 1
|