spatial_stats 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 229947cadddad49771285a051c87a3797166fd4407a24fb27c9b2e3ccabe709c
4
+ data.tar.gz: a4e255eea9ceb986ae3078939cac4f7bc766fc3304801531095883091ceb1c79
5
+ SHA512:
6
+ metadata.gz: d8dff7610402b8867f463d5e0c6a1b4d4ca00ce3fffc0047c2f470f71d71c4856e1534ead05ceaca3fef6d060a46e0e2151e7ac27d988625255bba3063f4bb35
7
+ data.tar.gz: ab930c04d462cc5ff9da4479078ee14ffdfe4e657430176d9d4583322077f87b433115b98edcfbed6268bd1a24204fdd13ef8808e0d31bfa485e9a3790a1d116
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2020 Keith Doggett
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # SpatialStats
2
+
3
+ Short description and motivation.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'spatial_stats'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```bash
16
+ $ bundle
17
+ ```
18
+
19
+ Or install it yourself as:
20
+
21
+ ```bash
22
+ $ gem install spatial_stats
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ How to use my plugin.
28
+
29
+ ## Contributing
30
+
31
+ Once cloned, run the following commands to setup the test database.
32
+
33
+ ```sh
34
+ cd ./spatial_stats
35
+ bundle install
36
+ cd test/dummy
37
+ rake db:create
38
+ rake db:migrate
39
+ ```
40
+
41
+ If you are getting an error, you may need to set the following environment variables.
42
+
43
+ ```
44
+ $PGUSER # default "postgres"
45
+ $PGPASSWORD # default ""
46
+ $PGHOST # default "127.0.0.1"
47
+ $PGPORT # default "5432"
48
+ $PGDATABASE # default "spatial_stats_test"
49
+ ```
50
+
51
+ If the dummy app is setup correctly, run the following:
52
+
53
+ ```
54
+ cd ../..
55
+ rake
56
+ ```
57
+
58
+ This will run the tests. If they all pass, then your environment is setup correctly.
59
+
60
+ Note: It is recommended to have GEOS installed and linked to RGeo. You can test this by running the following:
61
+
62
+ ```
63
+ cd test/dummy
64
+ rails c
65
+
66
+ RGeo::Geos.supported?
67
+ # => true
68
+ ```
69
+
70
+ ## TODO
71
+
72
+ - Memoize expensive functions within classes
73
+ - Make star a parameter to getis-ord class
74
+
75
+ ## Future Work
76
+
77
+ #### General
78
+
79
+ - Refactor stats to inherit an abstract class.
80
+
81
+ #### Weights
82
+
83
+ - Add Kernel based weighting.
84
+
85
+ #### Utils
86
+
87
+ - Rate smoothing
88
+ - Bayes smoothing
89
+
90
+ #### Local
91
+
92
+ - Join Count Statistic
93
+
94
+ ## License
95
+
96
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'bundler/setup'
5
+ rescue LoadError
6
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
7
+ end
8
+
9
+ require 'rdoc/task'
10
+
11
+ RDoc::Task.new(:rdoc) do |rdoc|
12
+ rdoc.rdoc_dir = 'rdoc'
13
+ rdoc.title = 'SpatialStats'
14
+ rdoc.options << '--line-numbers'
15
+ rdoc.rdoc_files.include('README.md')
16
+ rdoc.rdoc_files.include('lib/**/*.rb')
17
+ end
18
+
19
+ require 'bundler/gem_tasks'
20
+
21
+ require 'rake/testtask'
22
+
23
+ Rake::TestTask.new(:test) do |t|
24
+ t.libs << 'test'
25
+ t.pattern = 'test/**/*_test.rb'
26
+ t.verbose = false
27
+ t.warning = false # shut up annoying warnings
28
+ end
29
+
30
+ task default: :test
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Enumerable
4
+ def standardize
5
+ # standardize is (variable - mean)/stdev
6
+ m = mean
7
+ std = Math.sqrt(sample_variance)
8
+ map { |v| (v - m) / std }
9
+ end
10
+
11
+ def mean
12
+ sum / size.to_f
13
+ end
14
+
15
+ def sample_variance
16
+ m = mean
17
+ numerator = sum { |v| (v - m)**2 }
18
+ numerator / (size - 1).to_f
19
+ end
20
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://geodacenter.github.io/workbook/5b_global_adv/lab5b.html
4
+ module SpatialStats
5
+ module Global
6
+ class BivariateMoran < Stat
7
+ def initialize(scope, x_field, y_field, weights)
8
+ @scope = scope
9
+ @x_field = x_field
10
+ @y_field = y_field
11
+ @weights = weights
12
+ end
13
+ attr_writer :x, :y
14
+
15
+ def i
16
+ w = @weights.standardized
17
+ y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
18
+ numerator = 0
19
+ x.each_with_index do |xi, idx|
20
+ numerator += xi * y_lag[idx]
21
+ end
22
+
23
+ denominator = x.sum { |xi| xi**2 }
24
+ numerator / denominator
25
+ end
26
+
27
+ def expectation
28
+ -1.0 / (@weights.n - 1)
29
+ end
30
+
31
+ def variance
32
+ # https://en.wikipedia.org/wiki/Moran%27s_I#Expected_value
33
+ n = @weights.n
34
+ wij = @weights.full
35
+ w = wij.sum
36
+ e = expectation
37
+
38
+ s1 = s1_calc(n, wij)
39
+ s2 = s2_calc(n, wij)
40
+ s3 = s3_calc(n, x)
41
+
42
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
43
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
44
+
45
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
46
+ var_right = e**2
47
+ var_left - var_right
48
+ end
49
+
50
+ def mc(permutations = 99, seed = nil)
51
+ # call super monte carlo for multivariate
52
+ mc_bv(permutations, seed)
53
+ end
54
+
55
+ def x
56
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
57
+ .standardize
58
+ end
59
+
60
+ def y
61
+ @y ||= SpatialStats::Queries::Variables.query_field(@scope, @y_field)
62
+ .standardize
63
+ end
64
+
65
+ private
66
+
67
+ def s3_calc(n, zs)
68
+ numerator = (1.0 / n) * zs.sum { |v| v**4 }
69
+ denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
70
+ numerator / denominator
71
+ end
72
+
73
+ def s2_calc(n, wij)
74
+ s2 = 0
75
+ (0..n - 1).each do |i|
76
+ left_term = 0
77
+ right_term = 0
78
+ (0..n - 1).each do |j|
79
+ left_term += wij[i, j]
80
+ right_term += wij[j, i]
81
+ end
82
+ s2 += (left_term + right_term)**2
83
+ end
84
+ s2
85
+ end
86
+
87
+ def s1_calc(n, wij)
88
+ s1 = 0
89
+ (0..n - 1).each do |i|
90
+ (0..n - 1).each do |j|
91
+ s1 += (wij[i, j] + wij[j, i])**2
92
+ end
93
+ end
94
+ s1 / 2
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Global
5
+ class Moran < Stat
6
+ def initialize(scope, field, weights)
7
+ super(scope, field, weights)
8
+ end
9
+ attr_writer :x
10
+
11
+ def i
12
+ # compute's Moran's I. numerator is sum of zi * spatial lag of zi
13
+ # denominator is sum of zi**2.
14
+ # have to use row-standardized
15
+ @i ||= begin
16
+ w = @weights.standardized
17
+ z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
18
+ numerator = 0
19
+ z.each_with_index do |zi, j|
20
+ row_sum = zi * z_lag[j]
21
+ numerator += row_sum
22
+ end
23
+
24
+ denominator = z.sum { |zi| zi**2 }
25
+ numerator / denominator
26
+ end
27
+ end
28
+
29
+ def expectation
30
+ # -1/(n-1)
31
+ -1.0 / (@weights.n - 1)
32
+ end
33
+
34
+ def variance
35
+ # https://en.wikipedia.org/wiki/Moran%27s_I#Expected_value
36
+ n = @weights.n
37
+ wij = @weights.full
38
+ w = wij.sum
39
+ e = expectation
40
+
41
+ s1 = s1_calc(n, wij)
42
+ s2 = s2_calc(n, wij)
43
+ s3 = s3_calc(n, z)
44
+
45
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
46
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
47
+
48
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
49
+ var_right = e**2
50
+ var_left - var_right
51
+ end
52
+
53
+ def mc(permutations = 99, seed = nil)
54
+ super(permutations, seed)
55
+ end
56
+
57
+ def x
58
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
59
+ end
60
+
61
+ def zbar
62
+ x.sum / x.size
63
+ end
64
+
65
+ def z
66
+ x.map { |val| val - zbar }
67
+ end
68
+
69
+ private
70
+
71
+ def s3_calc(n, zs)
72
+ numerator = (1.0 / n) * zs.sum { |v| v**4 }
73
+ denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
74
+ numerator / denominator
75
+ end
76
+
77
+ def s2_calc(n, wij)
78
+ s2 = 0
79
+ (0..n - 1).each do |i|
80
+ left_term = 0
81
+ right_term = 0
82
+ (0..n - 1).each do |j|
83
+ left_term += wij[i, j]
84
+ right_term += wij[j, i]
85
+ end
86
+ s2 += (left_term + right_term)**2
87
+ end
88
+ s2
89
+ end
90
+
91
+ def s1_calc(n, wij)
92
+ s1 = 0
93
+ (0..n - 1).each do |i|
94
+ (0..n - 1).each do |j|
95
+ s1 += (wij[i, j] + wij[j, i])**2
96
+ end
97
+ end
98
+ s1 / 2
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Global
5
+ class Stat
6
+ # Base class for global stats
7
+ def initialize(scope, field, weights)
8
+ @scope = scope
9
+ @field = field
10
+ @weights = weights
11
+ end
12
+ attr_accessor :scope, :field, :weights
13
+
14
+ def i
15
+ raise NotImplementedError, 'method i not defined'
16
+ end
17
+
18
+ def expectation
19
+ raise NotImplementedError, 'method expectation not implemented'
20
+ end
21
+
22
+ def variance
23
+ raise NotImplementedError, 'method variance not implemented'
24
+ end
25
+
26
+ def z_score
27
+ (i - expectation) / Math.sqrt(variance)
28
+ end
29
+
30
+ def mc(permutations, seed)
31
+ rng = gen_rng(seed)
32
+ shuffles = []
33
+ permutations.times do
34
+ shuffles << x.shuffle(random: rng)
35
+ end
36
+ # r is the number of equal to or more extreme samples
37
+ # one sided
38
+ i_orig = i
39
+ r = 0
40
+ shuffles.each do |shuffle|
41
+ stat = self.class.new(@scope, @field, @weights)
42
+ stat.x = shuffle
43
+
44
+ # https://geodacenter.github.io/glossary.html#ppvalue
45
+ if i_orig.positive?
46
+ r += 1 if stat.i >= i_orig
47
+ else
48
+ r += 1 if stat.i <= i_orig
49
+ end
50
+ end
51
+
52
+ (r + 1.0) / (permutations + 1.0)
53
+ end
54
+
55
+ def mc_bv(permutations, seed)
56
+ # in multivariate, hold x and shuffle y
57
+ rng = gen_rng(seed)
58
+ shuffles = []
59
+ permutations.times do
60
+ shuffles << y.shuffle(random: rng)
61
+ end
62
+
63
+ # r is the number of equal to or more extreme samples
64
+ i_orig = i
65
+ r = 0
66
+ shuffles.each do |shuffle|
67
+ stat = self.class.new(@scope, @x_field, @y_field, @weights)
68
+ stat.x = x
69
+ stat.y = shuffle
70
+
71
+ if i_orig.positive?
72
+ r += 1 if stat.i >= i_orig
73
+ else
74
+ r += 1 if stat.i <= i_orig
75
+ end
76
+ end
77
+
78
+ (r + 1.0) / (permutations + 1.0)
79
+ end
80
+
81
+ private
82
+
83
+ def gen_rng(seed)
84
+ if seed
85
+ Random.new(seed)
86
+ else
87
+ Random.new
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/global/stat'
4
+ require 'spatial_stats/global/bivariate_moran'
5
+ require 'spatial_stats/global/moran'
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class BivariateMoran < Stat
6
+ def initialize(scope, x_field, y_field, weights)
7
+ @scope = scope
8
+ @x_field = x_field
9
+ @y_field = y_field
10
+ @weights = weights
11
+ end
12
+ attr_accessor :scope, :x_field, :y_field, :weights
13
+ attr_writer :x, :y
14
+
15
+ def i
16
+ x.each_with_index.map do |_xi, idx|
17
+ i_i(idx)
18
+ end
19
+ end
20
+
21
+ def i_i(idx)
22
+ x[idx] * y_lag[idx]
23
+ end
24
+
25
+ def mc(permutations = 99, seed = nil)
26
+ mc_bv(permutations, seed)
27
+ end
28
+
29
+ def x
30
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
31
+ .standardize
32
+ end
33
+
34
+ def y
35
+ @y ||= SpatialStats::Queries::Variables.query_field(@scope, @y_field)
36
+ .standardize
37
+ end
38
+
39
+ private
40
+
41
+ def y_lag
42
+ @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class G < Stat
6
+ def initialize(scope, field, weights, star = false)
7
+ super(scope, field, weights)
8
+ @star = star
9
+ end
10
+ attr_accessor :star
11
+ attr_writer :x, :z_lag
12
+
13
+ def i
14
+ x.each_with_index.map do |_x_val, idx|
15
+ i_i(idx)
16
+ end
17
+ end
18
+
19
+ def i_i(idx)
20
+ x_lag[idx] / denominators[idx]
21
+ end
22
+
23
+ def x
24
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
25
+ end
26
+ alias z x
27
+
28
+ def star?
29
+ @star ||= weights.full.trace.positive?
30
+ end
31
+
32
+ private
33
+
34
+ def w
35
+ @w ||= begin
36
+ if star?
37
+ # TODO: try to fix this because it will still likely be a
38
+ # bottleneck in mc testing
39
+ weights.full.windowed.row_standardized
40
+ else
41
+ weights.standardized
42
+ end
43
+ end
44
+ end
45
+
46
+ def z_lag
47
+ # window if star is true
48
+ @z_lag ||= begin
49
+ if star?
50
+ SpatialStats::Utils::Lag.window_sum(w, x)
51
+ else
52
+ SpatialStats::Utils::Lag.neighbor_sum(w, x)
53
+ end
54
+ end
55
+ end
56
+ alias x_lag z_lag
57
+
58
+ def denominators
59
+ @denominators ||= begin
60
+ n = w.shape[0]
61
+ if star?
62
+ [x.sum] * n
63
+ else
64
+ # add everything but i
65
+ (0..n - 1).each.map do |idx|
66
+ terms = x.dup
67
+ terms.delete_at(idx)
68
+ terms.sum
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class Geary < Stat
6
+ def initialize(scope, field, weights)
7
+ super(scope, field, weights)
8
+ end
9
+ attr_writer :x
10
+
11
+ def i
12
+ z.each_with_index.map do |_zi, idx|
13
+ i_i(idx)
14
+ end
15
+ end
16
+
17
+ def i_i(idx)
18
+ n = w.shape[0]
19
+ sum = 0
20
+ (0..n - 1).each do |j|
21
+ sum += w[idx, j] * ((z[idx] - z[j])**2)
22
+ end
23
+ sum
24
+ end
25
+
26
+ def x
27
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
28
+ .standardize
29
+ end
30
+ alias z x
31
+
32
+ def mc(permutations = 99, seed = nil)
33
+ # For local tests, we need to shuffle the values
34
+ # but for each item, hold its value in place and shuffle
35
+ # its neighbors. Then we will only test for that item instead
36
+ # of the entire set. This will be done for each item.
37
+ rng = gen_rng(seed)
38
+ shuffles = crand(x, permutations, rng)
39
+
40
+ # r is the number of equal to or more extreme samples
41
+ i_orig = i
42
+ rs = [0] * i_orig.size
43
+
44
+ shuffles.each_with_index do |perms, idx|
45
+ ii_orig = i_orig[idx]
46
+ perms.each do |perm|
47
+ stat = self.class.new(scope, field, weights)
48
+ stat.x = perm
49
+ ii_new = stat.i_i(idx)
50
+
51
+ # https://geodacenter.github.io/glossary.html#ppvalue
52
+ # NOTE: this is inconsistent with the output from GeoDa
53
+ # for local permutation tests, they seem to use greater than
54
+ # not greater than or equal to. I'm going to go by the definition
55
+ # in the glossary for now.
56
+ if ii_orig.positive?
57
+ rs[idx] += 1 if ii_new >= ii_orig
58
+ else
59
+ rs[idx] += 1 if ii_new <= ii_orig
60
+ end
61
+ end
62
+ end
63
+
64
+ rs.map do |ri|
65
+ (ri + 1.0) / (permutations + 1.0)
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def w
72
+ @w ||= weights.full.row_standardized
73
+ end
74
+ end
75
+ end
76
+ end