spatial_stats 0.2.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,10 +19,28 @@ module SpatialStats
19
19
  @scope = scope
20
20
  @x_field = x_field
21
21
  @y_field = y_field
22
- @weights = weights
22
+ @weights = weights.standardize
23
23
  end
24
24
  attr_accessor :scope, :x_field, :y_field, :weights
25
25
 
26
+ ##
27
+ # A new instance of BivariateMoran, from vector and weights.
28
+ #
29
+ # @param [Array] x observations of dataset
30
+ # @param [Array] y observations of dataset
31
+ # @param [WeightsMatrix] weights to define relationships between observations
32
+ #
33
+ # @return [BivariateMoran]
34
+ def self.from_observations(x, y, weights)
35
+ n = weights.n
36
+ raise ArgumentError, 'Data size != weights.n' if x.size != n || y.size != n
37
+
38
+ instance = new(nil, nil, nil, weights.standardize)
39
+ instance.x = x
40
+ instance.y = y
41
+ instance
42
+ end
43
+
26
44
  ##
27
45
  # Computes the local indicator of spatial correlation for
28
46
  # x against lagged y.
@@ -62,6 +80,61 @@ module SpatialStats
62
80
  mc_bv(permutations, seed)
63
81
  end
64
82
 
83
+ ##
84
+ # Determines what quadrant an observation is in. Based on its value
85
+ # compared to its neighbors. This does not work for all stats, since
86
+ # it requires that values be negative.
87
+ #
88
+ # In a standardized array of z, high values are values greater than 0
89
+ # and it's neighbors are determined by the spatial lag and if that is
90
+ # positive then it's neighbors would be high, low otherwise.
91
+ #
92
+ # Quadrants are:
93
+ # [HH] a high value surrounded by other high values
94
+ # [LH] a low value surrounded by high values
95
+ # [LL] a low value surrounded by low values
96
+ # [HL] a high value surrounded by low values
97
+ #
98
+ # @return [Array] of labels
99
+ def quads
100
+ # https://github.com/pysal/esda/blob/master/esda/moran.py#L925
101
+ z_lag = SpatialStats::Utils::Lag.neighbor_average(weights, y)
102
+ zp = x.map(&:positive?)
103
+ lp = z_lag.map(&:positive?)
104
+
105
+ # hh = zp & lp
106
+ # lh = zp ^ true & lp
107
+ # ll = zp ^ true & lp ^ true
108
+ # hl = zp next to lp ^ true
109
+ hh = zp.each_with_index.map { |v, idx| v & lp[idx] }
110
+ lh = zp.each_with_index.map { |v, idx| (v ^ true) & lp[idx] }
111
+ ll = zp.each_with_index.map { |v, idx| (v ^ true) & (lp[idx] ^ true) }
112
+ hl = zp.each_with_index.map { |v, idx| v & (lp[idx] ^ true) }
113
+
114
+ # now zip lists and map them to proper terms
115
+ quad_terms = %w[HH LH LL HL]
116
+ hh.zip(lh, ll, hl).map do |feature|
117
+ quad_terms[feature.index(true)]
118
+ end
119
+ end
120
+ alias groups quads
121
+
122
+ ##
123
+ # Summary of the statistic. Computes +stat+, +mc+, and +groups+ then returns the values
124
+ # in a hash array.
125
+ #
126
+ # @param [Integer] permutations to run. Last digit should be 9 to produce round numbers.
127
+ # @param [Integer] seed used in random number generator for shuffles.
128
+ #
129
+ # @return [Array]
130
+ def summary(permutations = 99, seed = nil)
131
+ p_vals = mc(permutations, seed)
132
+ data = weights.keys.zip(stat, p_vals, groups)
133
+ data.map do |row|
134
+ { key: row[0], stat: row[1], p: row[2], group: row[3] }
135
+ end
136
+ end
137
+
65
138
  def x
66
139
  @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
67
140
  .standardize
@@ -79,8 +152,17 @@ module SpatialStats
79
152
  x[idx] * y_lag_i
80
153
  end
81
154
 
155
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
156
+ # Since moran can be positive or negative, go by this definition
157
+ if stat_i_orig.positive?
158
+ (stat_i_new >= stat_i_orig).count
159
+ else
160
+ (stat_i_new <= stat_i_orig).count
161
+ end
162
+ end
163
+
82
164
  def y_lag
83
- @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
165
+ @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, y)
84
166
  end
85
167
  end
86
168
  end
@@ -32,6 +32,25 @@ module SpatialStats
32
32
  end
33
33
  alias c stat
34
34
 
35
+ ##
36
+ # Computes the groups each observation belongs to.
37
+ # Potential groups for Geary's C are:
38
+ # [HH] High-High
39
+ # [LL] Low-Low
40
+ # [N] Negative - Group traditionally for HL and LH, but since the difference is squared they are in the same group.
41
+ #
42
+ #
43
+ # @return [Array] groups for each observation
44
+ def groups
45
+ quads.map do |quad|
46
+ if %w[HL LH].include?(quad)
47
+ 'N'
48
+ else
49
+ quad
50
+ end
51
+ end
52
+ end
53
+
35
54
  ##
36
55
  # Values of the +field+ queried from the +scope+
37
56
  #
@@ -45,9 +64,11 @@ module SpatialStats
45
64
  private
46
65
 
47
66
  def stat_i(idx)
48
- zs = Numo::DFloat.cast(z)
49
- zi = (z[idx] - zs)**2
50
- (w[idx, true] * zi).sum
67
+ # TODO: maybe don't even use stat_i
68
+ # just form all of the modified zs and then
69
+ # pass it to a loop of mulvec all implemented in c ext
70
+ zi = z.map { |val| (z[idx] - val)**2 }
71
+ weights.sparse.dot_row(zi, idx)
51
72
  end
52
73
 
53
74
  def mc_i(wi, perms, idx)
@@ -55,8 +76,17 @@ module SpatialStats
55
76
  (wi * zi).sum(1)
56
77
  end
57
78
 
58
- def w
59
- @w ||= weights.full.row_standardized
79
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
80
+ # Geary cannot be negative, so we have to use this technique from
81
+ # GeoDa to determine p values. Note I slightly modified it to be inclusive
82
+ # on both tails not just the lower tail.
83
+ # https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981 mean = stat_i_new.mean
84
+ mean = stat_i_new.mean
85
+ if stat_i_orig <= mean
86
+ (stat_i_new <= stat_i_orig).count
87
+ else
88
+ (stat_i_new >= stat_i_orig).count
89
+ end
60
90
  end
61
91
  end
62
92
  end
@@ -14,13 +14,18 @@ module SpatialStats
14
14
  # @param [ActiveRecord::Relation] scope
15
15
  # @param [Symbol, String] field to query from scope
16
16
  # @param [WeightsMatrix] weights to define relationship between observations in scope
17
+ # @param [Boolean] star to preset if star will be true or false. Will be calculated otherwise.
17
18
  #
18
19
  # @return [GetisOrd]
19
20
  def initialize(scope, field, weights, star = nil)
20
- super(scope, field, weights)
21
+ @scope = scope
22
+ @field = field
23
+ @weights = weights
21
24
  @star = star
25
+ calc_weights
22
26
  end
23
27
  attr_accessor :star
28
+ attr_writer :x
24
29
 
25
30
  ##
26
31
  # Computes the G or G* statistic for every observation in x.
@@ -33,6 +38,25 @@ module SpatialStats
33
38
  end
34
39
  alias g stat
35
40
 
41
+ ##
42
+ # Computes the groups each observation belongs to.
43
+ # Potential groups for G are:
44
+ # [H] High
45
+ # [L] Low
46
+ #
47
+ # Group is high when standardized z is positive, low otherwise.
48
+ #
49
+ # @return [Array] groups for each observation
50
+ def groups
51
+ z.standardize.map do |val|
52
+ if val.positive?
53
+ 'H'
54
+ else
55
+ 'L'
56
+ end
57
+ end
58
+ end
59
+
36
60
  ##
37
61
  # Values of the +field+ queried from the +scope+
38
62
  #
@@ -50,7 +74,7 @@ module SpatialStats
50
74
  # @return [Boolean] of star
51
75
  def star?
52
76
  if @star.nil?
53
- @star = weights.full.trace.positive?
77
+ @star = weights.dense.trace.positive?
54
78
  else
55
79
  @star
56
80
  end
@@ -67,25 +91,29 @@ module SpatialStats
67
91
  x_lag_i / denominators[idx]
68
92
  end
69
93
 
70
- def w
71
- @w ||= begin
72
- if star?
73
- weights.full.windowed.row_standardized
74
- else
75
- weights.standardized
76
- end
94
+ def mc_observation_calc(stat_i_orig, stat_i_new, permutations)
95
+ # GetisOrd cannot be negative, so we have to use this technique from
96
+ # ESDA to determine if we should select p or 1-p.
97
+ # https://github.com/pysal/esda/blob/master/esda/getisord.py#L388
98
+ num_larger = (stat_i_new >= stat_i_orig).count
99
+ is_low = (permutations - num_larger) < num_larger
100
+ if is_low
101
+ permutations - num_larger
102
+ else
103
+ num_larger
77
104
  end
78
105
  end
79
106
 
107
+ def calc_weights
108
+ @weights = if star?
109
+ weights.window.standardize
110
+ else
111
+ weights.standardize
112
+ end
113
+ end
114
+
80
115
  def z_lag
81
- # window if star is true
82
- @z_lag ||= begin
83
- if star?
84
- SpatialStats::Utils::Lag.window_sum(w, x)
85
- else
86
- SpatialStats::Utils::Lag.neighbor_sum(w, x)
87
- end
88
- end
116
+ @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, x)
89
117
  end
90
118
  alias x_lag z_lag
91
119
 
@@ -55,12 +55,11 @@ module SpatialStats
55
55
  # @return [Array] of variances for each observation
56
56
  def variance
57
57
  # formula is A - B - (E[I])**2
58
- wt = w.row_standardized
59
58
  exp = expectation
60
59
 
61
60
  vars = []
62
- a_terms = a_calc(wt)
63
- b_terms = b_calc(wt)
61
+ a_terms = a_calc
62
+ b_terms = b_calc
64
63
 
65
64
  a_terms.each_with_index do |a_term, idx|
66
65
  vars << (a_term - b_terms[idx] - (exp**2))
@@ -68,6 +67,21 @@ module SpatialStats
68
67
  vars
69
68
  end
70
69
 
70
+ ##
71
+ # Computes the groups each observation belongs to.
72
+ # Potential groups for Moran's I are:
73
+ # [HH] High-High
74
+ # [HL] High-Low
75
+ # [LH] Low-High
76
+ # [LL] Low-Low
77
+ #
78
+ # This is the same as the +#quads+ method in the +Stat+ class.
79
+ #
80
+ # @return [Array] groups for each observation
81
+ def groups
82
+ quads
83
+ end
84
+
71
85
  ##
72
86
  # Values of the +field+ queried from the +scope+
73
87
  #
@@ -85,7 +99,7 @@ module SpatialStats
85
99
  def z_lag
86
100
  # w is already row_standardized, so we are using
87
101
  # neighbor sum instead of neighbor_average to save cost
88
- @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, z)
102
+ @z_lag ||= SpatialStats::Utils::Lag.neighbor_sum(weights, z)
89
103
  end
90
104
 
91
105
  private
@@ -102,6 +116,15 @@ module SpatialStats
102
116
  z[idx] * z_lag_i
103
117
  end
104
118
 
119
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
120
+ # Since moran can be positive or negative, go by this definition
121
+ if stat_i_orig.positive?
122
+ (stat_i_new >= stat_i_orig).count
123
+ else
124
+ (stat_i_new <= stat_i_orig).count
125
+ end
126
+ end
127
+
105
128
  def si2
106
129
  # @si2 ||= z.sample_variance
107
130
  # we standardize so sample_variance is 1
@@ -109,20 +132,27 @@ module SpatialStats
109
132
  end
110
133
 
111
134
  # https://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-local-morans-i-additional-math.htm
112
- def a_calc(wt)
113
- n = wt.shape[0]
135
+ # TODO: sparse
136
+ def a_calc
137
+ n = weights.n
114
138
  b2i = b2i_calc
139
+
140
+ wts = weights.sparse.values
141
+ row_index = weights.sparse.row_index
142
+
115
143
  a_terms = []
116
144
 
117
145
  (0..n - 1).each do |idx|
118
- sigma_term = wt[idx, true].to_a.sum { |v| v**2 }
146
+ row_range = row_index[idx]..(row_index[idx + 1] - 1)
147
+ wt = wts[row_range]
148
+ sigma_term = wt.sum { |v| v**2 }
119
149
  a_terms << (n - b2i) * sigma_term / (n - 1)
120
150
  end
121
151
  a_terms
122
152
  end
123
153
 
124
- def b_calc(wt)
125
- n = wt.shape[0]
154
+ def b_calc
155
+ n = weights.n
126
156
  b2i = b2i_calc
127
157
  b_terms = []
128
158
 
@@ -23,7 +23,7 @@ module SpatialStats
23
23
  def initialize(scope, fields, weights)
24
24
  @scope = scope
25
25
  @fields = fields
26
- @weights = weights
26
+ @weights = weights.standardize
27
27
  end
28
28
  attr_accessor :scope, :fields, :weights
29
29
 
@@ -60,37 +60,47 @@ module SpatialStats
60
60
  # of indices, which will return a list of new orders for the fields.
61
61
  # They will then be shuffled corresponding to the new indices.
62
62
  rng = gen_rng(seed)
63
- n = w.shape[0]
64
- indices = (0..(n - 1)).to_a
65
- shuffles = crand(indices, permutations, rng)
63
+ rids = crand(permutations, rng)
66
64
 
65
+ n_1 = weights.n - 1
66
+ sparse = weights.sparse
67
+ row_index = sparse.row_index
68
+ ws = sparse.values
69
+ wc = weights.wc
67
70
  stat_orig = stat
68
- rs = [0] * n
69
71
 
70
- ws = neighbor_weights
71
-
72
- idx = 0
73
- while idx < n
72
+ ids = (0..n_1).to_a
73
+ observations = Array.new(weights.n)
74
+ (0..n_1).each do |idx|
75
+ idsi = ids.dup
76
+ idsi.delete_at(idx)
77
+ idsi.shuffle!(random: rng)
78
+ idsi = Numo::Int32.cast(idsi)
79
+ sample = rids[idsi[rids[true, 0..wc[idx] - 1]]]
80
+
81
+ # account for case where there are no neighbors
82
+ row_range = row_index[idx]..(row_index[idx + 1] - 1)
83
+ if row_range.size.zero?
84
+ observations[idx] = permutations
85
+ next
86
+ end
87
+
88
+ wi = Numo::DFloat.cast(ws[row_range])
89
+ stat_i_new = mc_i(wi, sample, idx)
74
90
  stat_i_orig = stat_orig[idx]
75
- wi = Numo::DFloat.cast(ws[idx])
76
-
77
- # for each field, compute the C value at that index.
78
- stat_i_new = mc_i(wi, shuffles[idx], idx)
79
-
80
- rs[idx] = if stat_i_orig.positive?
81
- (stat_i_new >= stat_i_orig).count
82
- else
83
- (stat_i_new <= stat_i_orig).count
84
- end
85
-
86
- idx += 1
91
+ observations[idx] = mc_observation_calc(stat_i_orig, stat_i_new,
92
+ permutations)
87
93
  end
88
94
 
89
- rs.map do |ri|
95
+ observations.map do |ri|
90
96
  (ri + 1.0) / (permutations + 1.0)
91
97
  end
92
98
  end
93
99
 
100
+ def groups
101
+ raise NotImplementedError, 'groups not implemented'
102
+ end
103
+
94
104
  private
95
105
 
96
106
  def mc_i(wi, perms, idx)
@@ -108,6 +118,19 @@ module SpatialStats
108
118
  cs.mean(0)
109
119
  end
110
120
 
121
+ def mc_observation_calc(stat_i_orig, stat_i_new, _permutations)
122
+ # Geary cannot be negative, so we have to use this technique from
123
+ # GeoDa to determine p values. Note I slightly modified it to be inclusive
124
+ # on both tails not just the lower tail.
125
+ # https://github.com/GeoDaCenter/geoda/blob/master/Explore/LocalGearyCoordinator.cpp#L981 mean = stat_i_new.mean
126
+ mean = stat_i_new.mean
127
+ if stat_i_orig <= mean
128
+ (stat_i_new <= stat_i_orig).count
129
+ else
130
+ (stat_i_new >= stat_i_orig).count
131
+ end
132
+ end
133
+
111
134
  def field_data
112
135
  @field_data ||= fields.map do |field|
113
136
  SpatialStats::Queries::Variables.query_field(@scope, field)