spatial_stats 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 229947cadddad49771285a051c87a3797166fd4407a24fb27c9b2e3ccabe709c
4
+ data.tar.gz: a4e255eea9ceb986ae3078939cac4f7bc766fc3304801531095883091ceb1c79
5
+ SHA512:
6
+ metadata.gz: d8dff7610402b8867f463d5e0c6a1b4d4ca00ce3fffc0047c2f470f71d71c4856e1534ead05ceaca3fef6d060a46e0e2151e7ac27d988625255bba3063f4bb35
7
+ data.tar.gz: ab930c04d462cc5ff9da4479078ee14ffdfe4e657430176d9d4583322077f87b433115b98edcfbed6268bd1a24204fdd13ef8808e0d31bfa485e9a3790a1d116
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2020 Keith Doggett
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # SpatialStats
2
+
3
+ Short description and motivation.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'spatial_stats'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```bash
16
+ $ bundle
17
+ ```
18
+
19
+ Or install it yourself as:
20
+
21
+ ```bash
22
+ $ gem install spatial_stats
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ How to use my plugin.
28
+
29
+ ## Contributing
30
+
31
+ Once cloned, run the following commands to setup the test database.
32
+
33
+ ```sh
34
+ cd ./spatial_stats
35
+ bundle install
36
+ cd test/dummy
37
+ rake db:create
38
+ rake db:migrate
39
+ ```
40
+
41
+ If you are getting an error, you may need to set the following environment variables.
42
+
43
+ ```
44
+ $PGUSER # default "postgres"
45
+ $PGPASSWORD # default ""
46
+ $PGHOST # default "127.0.0.1"
47
+ $PGPORT # default "5432"
48
+ $PGDATABASE # default "spatial_stats_test"
49
+ ```
50
+
51
+ If the dummy app is setup correctly, run the following:
52
+
53
+ ```
54
+ cd ../..
55
+ rake
56
+ ```
57
+
58
+ This will run the tests. If they all pass, then your environment is setup correctly.
59
+
60
+ Note: It is recommended to have GEOS installed and linked to RGeo. You can test this by running the following:
61
+
62
+ ```
63
+ cd test/dummy
64
+ rails c
65
+
66
+ RGeo::Geos.supported?
67
+ # => true
68
+ ```
69
+
70
+ ## TODO
71
+
72
+ - Memoize expensive functions within classes
73
+ - Make star a parameter to getis-ord class
74
+
75
+ ## Future Work
76
+
77
+ #### General
78
+
79
+ - Refactor stats to inherit an abstract class.
80
+
81
+ #### Weights
82
+
83
+ - Add Kernel based weighting.
84
+
85
+ #### Utils
86
+
87
+ - Rate smoothing
88
+ - Bayes smoothing
89
+
90
+ #### Local
91
+
92
+ - Join Count Statistic
93
+
94
+ ## License
95
+
96
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'bundler/setup'
5
+ rescue LoadError
6
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
7
+ end
8
+
9
+ require 'rdoc/task'
10
+
11
+ RDoc::Task.new(:rdoc) do |rdoc|
12
+ rdoc.rdoc_dir = 'rdoc'
13
+ rdoc.title = 'SpatialStats'
14
+ rdoc.options << '--line-numbers'
15
+ rdoc.rdoc_files.include('README.md')
16
+ rdoc.rdoc_files.include('lib/**/*.rb')
17
+ end
18
+
19
+ require 'bundler/gem_tasks'
20
+
21
+ require 'rake/testtask'
22
+
23
+ Rake::TestTask.new(:test) do |t|
24
+ t.libs << 'test'
25
+ t.pattern = 'test/**/*_test.rb'
26
+ t.verbose = false
27
+ t.warning = false # shut up annoying warnings
28
+ end
29
+
30
+ task default: :test
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Enumerable
4
+ def standardize
5
+ # standardize is (variable - mean)/stdev
6
+ m = mean
7
+ std = Math.sqrt(sample_variance)
8
+ map { |v| (v - m) / std }
9
+ end
10
+
11
+ def mean
12
+ sum / size.to_f
13
+ end
14
+
15
+ def sample_variance
16
+ m = mean
17
+ numerator = sum { |v| (v - m)**2 }
18
+ numerator / (size - 1).to_f
19
+ end
20
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://geodacenter.github.io/workbook/5b_global_adv/lab5b.html
4
+ module SpatialStats
5
+ module Global
6
+ class BivariateMoran < Stat
7
+ def initialize(scope, x_field, y_field, weights)
8
+ @scope = scope
9
+ @x_field = x_field
10
+ @y_field = y_field
11
+ @weights = weights
12
+ end
13
+ attr_writer :x, :y
14
+
15
+ def i
16
+ w = @weights.standardized
17
+ y_lag = SpatialStats::Utils::Lag.neighbor_sum(w, y)
18
+ numerator = 0
19
+ x.each_with_index do |xi, idx|
20
+ numerator += xi * y_lag[idx]
21
+ end
22
+
23
+ denominator = x.sum { |xi| xi**2 }
24
+ numerator / denominator
25
+ end
26
+
27
+ def expectation
28
+ -1.0 / (@weights.n - 1)
29
+ end
30
+
31
+ def variance
32
+ # https://en.wikipedia.org/wiki/Moran%27s_I#Expected_value
33
+ n = @weights.n
34
+ wij = @weights.full
35
+ w = wij.sum
36
+ e = expectation
37
+
38
+ s1 = s1_calc(n, wij)
39
+ s2 = s2_calc(n, wij)
40
+ s3 = s3_calc(n, x)
41
+
42
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
43
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
44
+
45
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
46
+ var_right = e**2
47
+ var_left - var_right
48
+ end
49
+
50
+ def mc(permutations = 99, seed = nil)
51
+ # call super monte carlo for multivariate
52
+ mc_bv(permutations, seed)
53
+ end
54
+
55
+ def x
56
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
57
+ .standardize
58
+ end
59
+
60
+ def y
61
+ @y ||= SpatialStats::Queries::Variables.query_field(@scope, @y_field)
62
+ .standardize
63
+ end
64
+
65
+ private
66
+
67
+ def s3_calc(n, zs)
68
+ numerator = (1.0 / n) * zs.sum { |v| v**4 }
69
+ denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
70
+ numerator / denominator
71
+ end
72
+
73
+ def s2_calc(n, wij)
74
+ s2 = 0
75
+ (0..n - 1).each do |i|
76
+ left_term = 0
77
+ right_term = 0
78
+ (0..n - 1).each do |j|
79
+ left_term += wij[i, j]
80
+ right_term += wij[j, i]
81
+ end
82
+ s2 += (left_term + right_term)**2
83
+ end
84
+ s2
85
+ end
86
+
87
+ def s1_calc(n, wij)
88
+ s1 = 0
89
+ (0..n - 1).each do |i|
90
+ (0..n - 1).each do |j|
91
+ s1 += (wij[i, j] + wij[j, i])**2
92
+ end
93
+ end
94
+ s1 / 2
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Global
5
+ class Moran < Stat
6
+ def initialize(scope, field, weights)
7
+ super(scope, field, weights)
8
+ end
9
+ attr_writer :x
10
+
11
+ def i
12
+ # compute's Moran's I. numerator is sum of zi * spatial lag of zi
13
+ # denominator is sum of zi**2.
14
+ # have to use row-standardized
15
+ @i ||= begin
16
+ w = @weights.standardized
17
+ z_lag = SpatialStats::Utils::Lag.neighbor_sum(w, z)
18
+ numerator = 0
19
+ z.each_with_index do |zi, j|
20
+ row_sum = zi * z_lag[j]
21
+ numerator += row_sum
22
+ end
23
+
24
+ denominator = z.sum { |zi| zi**2 }
25
+ numerator / denominator
26
+ end
27
+ end
28
+
29
+ def expectation
30
+ # -1/(n-1)
31
+ -1.0 / (@weights.n - 1)
32
+ end
33
+
34
+ def variance
35
+ # https://en.wikipedia.org/wiki/Moran%27s_I#Expected_value
36
+ n = @weights.n
37
+ wij = @weights.full
38
+ w = wij.sum
39
+ e = expectation
40
+
41
+ s1 = s1_calc(n, wij)
42
+ s2 = s2_calc(n, wij)
43
+ s3 = s3_calc(n, z)
44
+
45
+ s4 = (n**2 - 3 * n + 3) * s1 - n * s2 + 3 * (w**2)
46
+ s5 = (n**2 - n) * s1 - 2 * n * s2 + 6 * (w**2)
47
+
48
+ var_left = (n * s4 - s3 * s5) / ((n - 1) * (n - 2) * (n - 3) * w**2)
49
+ var_right = e**2
50
+ var_left - var_right
51
+ end
52
+
53
+ def mc(permutations = 99, seed = nil)
54
+ super(permutations, seed)
55
+ end
56
+
57
+ def x
58
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
59
+ end
60
+
61
+ def zbar
62
+ x.sum / x.size
63
+ end
64
+
65
+ def z
66
+ x.map { |val| val - zbar }
67
+ end
68
+
69
+ private
70
+
71
+ def s3_calc(n, zs)
72
+ numerator = (1.0 / n) * zs.sum { |v| v**4 }
73
+ denominator = ((1.0 / n) * zs.sum { |v| v**2 })**2
74
+ numerator / denominator
75
+ end
76
+
77
+ def s2_calc(n, wij)
78
+ s2 = 0
79
+ (0..n - 1).each do |i|
80
+ left_term = 0
81
+ right_term = 0
82
+ (0..n - 1).each do |j|
83
+ left_term += wij[i, j]
84
+ right_term += wij[j, i]
85
+ end
86
+ s2 += (left_term + right_term)**2
87
+ end
88
+ s2
89
+ end
90
+
91
+ def s1_calc(n, wij)
92
+ s1 = 0
93
+ (0..n - 1).each do |i|
94
+ (0..n - 1).each do |j|
95
+ s1 += (wij[i, j] + wij[j, i])**2
96
+ end
97
+ end
98
+ s1 / 2
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Global
5
+ class Stat
6
+ # Base class for global stats
7
+ def initialize(scope, field, weights)
8
+ @scope = scope
9
+ @field = field
10
+ @weights = weights
11
+ end
12
+ attr_accessor :scope, :field, :weights
13
+
14
+ def i
15
+ raise NotImplementedError, 'method i not defined'
16
+ end
17
+
18
+ def expectation
19
+ raise NotImplementedError, 'method expectation not implemented'
20
+ end
21
+
22
+ def variance
23
+ raise NotImplementedError, 'method variance not implemented'
24
+ end
25
+
26
+ def z_score
27
+ (i - expectation) / Math.sqrt(variance)
28
+ end
29
+
30
+ def mc(permutations, seed)
31
+ rng = gen_rng(seed)
32
+ shuffles = []
33
+ permutations.times do
34
+ shuffles << x.shuffle(random: rng)
35
+ end
36
+ # r is the number of equal to or more extreme samples
37
+ # one sided
38
+ i_orig = i
39
+ r = 0
40
+ shuffles.each do |shuffle|
41
+ stat = self.class.new(@scope, @field, @weights)
42
+ stat.x = shuffle
43
+
44
+ # https://geodacenter.github.io/glossary.html#ppvalue
45
+ if i_orig.positive?
46
+ r += 1 if stat.i >= i_orig
47
+ else
48
+ r += 1 if stat.i <= i_orig
49
+ end
50
+ end
51
+
52
+ (r + 1.0) / (permutations + 1.0)
53
+ end
54
+
55
+ def mc_bv(permutations, seed)
56
+ # in multivariate, hold x and shuffle y
57
+ rng = gen_rng(seed)
58
+ shuffles = []
59
+ permutations.times do
60
+ shuffles << y.shuffle(random: rng)
61
+ end
62
+
63
+ # r is the number of equal to or more extreme samples
64
+ i_orig = i
65
+ r = 0
66
+ shuffles.each do |shuffle|
67
+ stat = self.class.new(@scope, @x_field, @y_field, @weights)
68
+ stat.x = x
69
+ stat.y = shuffle
70
+
71
+ if i_orig.positive?
72
+ r += 1 if stat.i >= i_orig
73
+ else
74
+ r += 1 if stat.i <= i_orig
75
+ end
76
+ end
77
+
78
+ (r + 1.0) / (permutations + 1.0)
79
+ end
80
+
81
+ private
82
+
83
+ def gen_rng(seed)
84
+ if seed
85
+ Random.new(seed)
86
+ else
87
+ Random.new
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spatial_stats/global/stat'
4
+ require 'spatial_stats/global/bivariate_moran'
5
+ require 'spatial_stats/global/moran'
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class BivariateMoran < Stat
6
+ def initialize(scope, x_field, y_field, weights)
7
+ @scope = scope
8
+ @x_field = x_field
9
+ @y_field = y_field
10
+ @weights = weights
11
+ end
12
+ attr_accessor :scope, :x_field, :y_field, :weights
13
+ attr_writer :x, :y
14
+
15
+ def i
16
+ x.each_with_index.map do |_xi, idx|
17
+ i_i(idx)
18
+ end
19
+ end
20
+
21
+ def i_i(idx)
22
+ x[idx] * y_lag[idx]
23
+ end
24
+
25
+ def mc(permutations = 99, seed = nil)
26
+ mc_bv(permutations, seed)
27
+ end
28
+
29
+ def x
30
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @x_field)
31
+ .standardize
32
+ end
33
+
34
+ def y
35
+ @y ||= SpatialStats::Queries::Variables.query_field(@scope, @y_field)
36
+ .standardize
37
+ end
38
+
39
+ private
40
+
41
+ def y_lag
42
+ @y_lag ||= SpatialStats::Utils::Lag.neighbor_sum(w, y)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class G < Stat
6
+ def initialize(scope, field, weights, star = false)
7
+ super(scope, field, weights)
8
+ @star = star
9
+ end
10
+ attr_accessor :star
11
+ attr_writer :x, :z_lag
12
+
13
+ def i
14
+ x.each_with_index.map do |_x_val, idx|
15
+ i_i(idx)
16
+ end
17
+ end
18
+
19
+ def i_i(idx)
20
+ x_lag[idx] / denominators[idx]
21
+ end
22
+
23
+ def x
24
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
25
+ end
26
+ alias z x
27
+
28
+ def star?
29
+ @star ||= weights.full.trace.positive?
30
+ end
31
+
32
+ private
33
+
34
+ def w
35
+ @w ||= begin
36
+ if star?
37
+ # TODO: try to fix this because it will still likely be a
38
+ # bottleneck in mc testing
39
+ weights.full.windowed.row_standardized
40
+ else
41
+ weights.standardized
42
+ end
43
+ end
44
+ end
45
+
46
+ def z_lag
47
+ # window if star is true
48
+ @z_lag ||= begin
49
+ if star?
50
+ SpatialStats::Utils::Lag.window_sum(w, x)
51
+ else
52
+ SpatialStats::Utils::Lag.neighbor_sum(w, x)
53
+ end
54
+ end
55
+ end
56
+ alias x_lag z_lag
57
+
58
+ def denominators
59
+ @denominators ||= begin
60
+ n = w.shape[0]
61
+ if star?
62
+ [x.sum] * n
63
+ else
64
+ # add everything but i
65
+ (0..n - 1).each.map do |idx|
66
+ terms = x.dup
67
+ terms.delete_at(idx)
68
+ terms.sum
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SpatialStats
4
+ module Local
5
+ class Geary < Stat
6
+ def initialize(scope, field, weights)
7
+ super(scope, field, weights)
8
+ end
9
+ attr_writer :x
10
+
11
+ def i
12
+ z.each_with_index.map do |_zi, idx|
13
+ i_i(idx)
14
+ end
15
+ end
16
+
17
+ def i_i(idx)
18
+ n = w.shape[0]
19
+ sum = 0
20
+ (0..n - 1).each do |j|
21
+ sum += w[idx, j] * ((z[idx] - z[j])**2)
22
+ end
23
+ sum
24
+ end
25
+
26
+ def x
27
+ @x ||= SpatialStats::Queries::Variables.query_field(@scope, @field)
28
+ .standardize
29
+ end
30
+ alias z x
31
+
32
+ def mc(permutations = 99, seed = nil)
33
+ # For local tests, we need to shuffle the values
34
+ # but for each item, hold its value in place and shuffle
35
+ # its neighbors. Then we will only test for that item instead
36
+ # of the entire set. This will be done for each item.
37
+ rng = gen_rng(seed)
38
+ shuffles = crand(x, permutations, rng)
39
+
40
+ # r is the number of equal to or more extreme samples
41
+ i_orig = i
42
+ rs = [0] * i_orig.size
43
+
44
+ shuffles.each_with_index do |perms, idx|
45
+ ii_orig = i_orig[idx]
46
+ perms.each do |perm|
47
+ stat = self.class.new(scope, field, weights)
48
+ stat.x = perm
49
+ ii_new = stat.i_i(idx)
50
+
51
+ # https://geodacenter.github.io/glossary.html#ppvalue
52
+ # NOTE: this is inconsistent with the output from GeoDa
53
+ # for local permutation tests, they seem to use greater than
54
+ # not greater than or equal to. I'm going to go by the definition
55
+ # in the glossary for now.
56
+ if ii_orig.positive?
57
+ rs[idx] += 1 if ii_new >= ii_orig
58
+ else
59
+ rs[idx] += 1 if ii_new <= ii_orig
60
+ end
61
+ end
62
+ end
63
+
64
+ rs.map do |ri|
65
+ (ri + 1.0) / (permutations + 1.0)
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def w
72
+ @w ||= weights.full.row_standardized
73
+ end
74
+ end
75
+ end
76
+ end