spatial_stats 0.2.2 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +126 -55
- data/Rakefile +7 -0
- data/ext/spatial_stats/csr_matrix.c +380 -0
- data/ext/spatial_stats/csr_matrix.h +34 -0
- data/ext/spatial_stats/extconf.rb +6 -0
- data/ext/spatial_stats/spatial_stats.c +32 -0
- data/lib/spatial_stats.rb +1 -0
- data/lib/spatial_stats/global/bivariate_moran.rb +60 -22
- data/lib/spatial_stats/global/moran.rb +43 -36
- data/lib/spatial_stats/global/stat.rb +55 -27
- data/lib/spatial_stats/local/bivariate_moran.rb +84 -2
- data/lib/spatial_stats/local/geary.rb +35 -5
- data/lib/spatial_stats/local/getis_ord.rb +45 -17
- data/lib/spatial_stats/local/moran.rb +39 -9
- data/lib/spatial_stats/local/multivariate_geary.rb +45 -22
- data/lib/spatial_stats/local/stat.rb +112 -80
- data/lib/spatial_stats/narray_ext.rb +5 -5
- data/lib/spatial_stats/spatial_stats.so +0 -0
- data/lib/spatial_stats/utils.rb +25 -0
- data/lib/spatial_stats/utils/lag.rb +10 -10
- data/lib/spatial_stats/version.rb +1 -1
- data/lib/spatial_stats/weights/contiguous.rb +20 -10
- data/lib/spatial_stats/weights/distant.rb +38 -20
- data/lib/spatial_stats/weights/weights_matrix.rb +83 -26
- metadata +33 -11
- data/MIT-LICENSE +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 053a1b234d54d2231e35c7eb74547b086f7415fc96190bc7d3afb25f51dd879e
|
4
|
+
data.tar.gz: a85d682009559812fb20695c5bea8f2978acf49a12d0b3df2506d58ced8c5e77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 868ec3b50f4b1b9fc261d6dd94439bba62a08d4c496a48d164c41590d1cd5a088c87de682a7cb96bcf4dbe0d534b5cf35e136e39d42c505ba8cc1f4c3569a3d4
|
7
|
+
data.tar.gz: b767cc6ed7f22ce4096965a6172f77d21a348d51bd84dc3b96f0d51bf16e2f4f92617e21e9bcda1a36eeda2cd5c07c51452dd55d9e5a62567e206bf7a06c8fe0
|
data/README.md
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
![Spatial Stats](/assets/ruby.svg)
|
2
|
+
|
1
3
|
[![Build Status](https://travis-ci.com/keithdoggett/spatial_stats.svg?branch=master)](https://travis-ci.com/keithdoggett/spatial_stats)
|
2
4
|
|
5
|
+
[Docs](https://keithdoggett.github.io/spatial_stats)
|
6
|
+
|
3
7
|
# SpatialStats
|
4
8
|
|
5
|
-
SpatialStats is an ActiveRecord plugin that utilizes PostGIS
|
9
|
+
SpatialStats is an ActiveRecord/Rails plugin that utilizes PostGIS to compute weights/statistics of spatial data sets in Rails Apps.
|
6
10
|
|
7
11
|
## Installation
|
8
12
|
|
@@ -58,42 +62,50 @@ weights = SpatialStats::Weights::Distant.idw_knn(scope, :geom, 5)
|
|
58
62
|
|
59
63
|
Weight matrices can be defined by a hash that describes each key's neighbor and weight.
|
60
64
|
|
61
|
-
Note: Currently, the keys must be numeric.
|
62
|
-
|
63
65
|
Example: Define WeightsMatrix and get the matrix in row_standardized format.
|
64
66
|
|
65
67
|
```ruby
|
66
68
|
weights = {
|
67
|
-
1 => [{
|
68
|
-
2 => [{
|
69
|
-
3 => [{
|
70
|
-
4 => [{
|
69
|
+
1 => [{ id: 2, weight: 1 }, { id: 4, weight: 1 }],
|
70
|
+
2 => [{ id: 1, weight: 1 }],
|
71
|
+
3 => [{ id: 4, weight: 1 }],
|
72
|
+
4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
|
71
73
|
}
|
72
74
|
keys = weights.keys
|
73
|
-
wm = SpatialStats::Weights::WeightsMatrix.new(
|
74
|
-
# => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:
|
75
|
-
|
76
|
-
wm.
|
77
|
-
# =>
|
78
|
-
|
79
|
-
|
80
|
-
# [
|
81
|
-
#
|
75
|
+
wm = SpatialStats::Weights::WeightsMatrix.new(weights)
|
76
|
+
# => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>1}, {:id=>4, :weight=>1}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>1}, {:id=>3, :weight=>1}]}, @n=4>
|
77
|
+
|
78
|
+
wm = wm.standardize
|
79
|
+
# => #<SpatialStats::Weights::WeightsMatrix:0x0000561e205677c0 @keys=[1, 2, 3, 4], @weights={1=>[{:id=>2, :weight=>0.5}, {:id=>4, :weight=>0.5}], 2=>[{:id=>1, :weight=>1}], 3=>[{:id=>4, :weight=>1}], 4=>[{:id=>1, :weight=>0.5}, {:id=>3, :weight=>0.5}]}, @n=4>
|
80
|
+
|
81
|
+
wm.dense
|
82
|
+
# => Numo::DFloat[
|
83
|
+
# [0, 0.5, 0, 0.5],
|
84
|
+
# [1, 0, 0, 0],
|
85
|
+
# [0, 0, 0, 1],
|
86
|
+
# [0.5, 0, 0.5, 0]
|
87
|
+
# ]
|
88
|
+
|
89
|
+
wm.sparse
|
90
|
+
# => #<SpatialStats::Weights::CSRMatrix @m=4, @n=4, @nnz=6>
|
82
91
|
```
|
83
92
|
|
84
93
|
### Lagged Variables
|
85
94
|
|
86
|
-
Spatially lagged variables can be computed with
|
95
|
+
Spatially lagged variables can be computed with weights matrix and 1-D vector (`Array`).
|
87
96
|
|
88
97
|
#### Compute a Lagged Variable
|
89
98
|
|
90
99
|
```ruby
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
100
|
+
weights = {
|
101
|
+
1 => [{ id: 2, weight: 1 }, { id: 4, weight: 1 }],
|
102
|
+
2 => [{ id: 1, weight: 1 }],
|
103
|
+
3 => [{ id: 4, weight: 1 }],
|
104
|
+
4 => [{ id: 1, weight: 1 }, { id: 3, weight: 1 }]
|
105
|
+
}
|
106
|
+
wm = SpatialStats::Weights::WeightsMatrix.new(weights).standardize
|
95
107
|
vec = [1, 2, 3, 4]
|
96
|
-
lagged_var = SpatialStats::Utils::Lag.neighbor_sum(
|
108
|
+
lagged_var = SpatialStats::Utils::Lag.neighbor_sum(wm, vec)
|
97
109
|
# => [3.0, 1.0, 4.0, 2.0]
|
98
110
|
```
|
99
111
|
|
@@ -118,6 +130,26 @@ moran.i
|
|
118
130
|
# => 0.834
|
119
131
|
```
|
120
132
|
|
133
|
+
#### Compute Moran's I without Querying Data
|
134
|
+
|
135
|
+
To calculate the statistic by using an array of data and not querying a database field. The order of the data must correspond to the order of `weights.keys`.
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
scope = County.all
|
139
|
+
weights = SpatialStats::Weights::Contiguous.rook(scope, :geom)
|
140
|
+
|
141
|
+
field = nil
|
142
|
+
moran = SpatialStats::Global::Moran.new(scope, field, weights)
|
143
|
+
# => <SpatialStats::Global::Moran>
|
144
|
+
|
145
|
+
# data is automatically standardized on input
|
146
|
+
data = [1,2,3,4,5,6]
|
147
|
+
moran.x = data
|
148
|
+
|
149
|
+
moran.stat
|
150
|
+
# => 0.521
|
151
|
+
```
|
152
|
+
|
121
153
|
#### Compute Moran's I Z-Score
|
122
154
|
|
123
155
|
```ruby
|
@@ -144,6 +176,20 @@ moran.mc(999, 123_456)
|
|
144
176
|
# => 0.003
|
145
177
|
```
|
146
178
|
|
179
|
+
#### Get Summary of Permutation Test
|
180
|
+
|
181
|
+
All stat classes have the `summary` method which takes `permutations` and `seed` as its parameters. `summary` runs `stat` and `mc` then combines the results into a hash.
|
182
|
+
|
183
|
+
```ruby
|
184
|
+
scope = County.all
|
185
|
+
weights = SpatialStats::Weights::Contiguous.rook(scope, :geom)
|
186
|
+
moran = SpatialStats::Global::Moran.new(scope, :avg_income, weights)
|
187
|
+
# => <SpatialStats::Global::Moran>
|
188
|
+
|
189
|
+
moran.summary(999, 123_456)
|
190
|
+
# => {stat: 0.834, p: 0.003}
|
191
|
+
```
|
192
|
+
|
147
193
|
### Local Stats
|
148
194
|
|
149
195
|
Local stats compute a value each observation in the dataset, like how similar its neighbors are to itself. Local stats operate similarly to global stats, except that almost every operation will return an array of length `n` where `n` is the number of observations in the dataset.
|
@@ -165,6 +211,26 @@ moran.i
|
|
165
211
|
# => [0.888, 0.675, 0.2345, -0.987, -0.42, ...]
|
166
212
|
```
|
167
213
|
|
214
|
+
#### Compute Moran's I without Querying Data
|
215
|
+
|
216
|
+
To calculate the statistic by using an array of data and not querying a database field. The order of the data must correspond to the order of `weights.keys`.
|
217
|
+
|
218
|
+
```ruby
|
219
|
+
scope = County.all
|
220
|
+
weights = SpatialStats::Weights::Contiguous.rook(scope, :geom)
|
221
|
+
|
222
|
+
field = nil
|
223
|
+
moran = SpatialStats::Local::Moran.new(scope, field, weights)
|
224
|
+
# => <SpatialStats::Local::Moran>
|
225
|
+
|
226
|
+
# data is automatically standardized on input
|
227
|
+
data = [1,2,3,4,5,6]
|
228
|
+
moran.x = data
|
229
|
+
|
230
|
+
moran.stat
|
231
|
+
# => [0.521, 0.123, -0.432, -0.56,. ...]
|
232
|
+
```
|
233
|
+
|
168
234
|
#### Compute Moran's I Z-Scores
|
169
235
|
|
170
236
|
Note: Many classes do not have a variance or expectation method implemented and this will raise a `NotImplementedError`.
|
@@ -193,6 +259,20 @@ moran.mc(999, 123_456)
|
|
193
259
|
# => [0.24, 0.13, 0.53, 0.023, 0.65, ...]
|
194
260
|
```
|
195
261
|
|
262
|
+
#### Get Summary of Permutation Test
|
263
|
+
|
264
|
+
All stat classes have the `summary` method which takes `permutations` and `seed` as its parameters. `summary` runs `stat`, `mc`, and `groups` then combines the results into a hash array indexed by `weight.keys`.
|
265
|
+
|
266
|
+
```ruby
|
267
|
+
scope = County.all
|
268
|
+
weights = SpatialStats::Weights::Contiguous.rook(scope, :geom)
|
269
|
+
moran = SpatialStats::Local::Moran.new(scope, :avg_income, weights)
|
270
|
+
# => <SpatialStats::Local::Moran>
|
271
|
+
|
272
|
+
moran.summary(999, 123_456)
|
273
|
+
# => [{key: 1, stat: 0.521, p: 0.24, group: 'HH'}, ...]
|
274
|
+
```
|
275
|
+
|
196
276
|
## Contributing
|
197
277
|
|
198
278
|
Once cloned, run the following commands to setup the test database.
|
@@ -234,46 +314,37 @@ RGeo::Geos.supported?
|
|
234
314
|
# => true
|
235
315
|
```
|
236
316
|
|
237
|
-
##
|
238
|
-
|
239
|
-
- ~~Memoize expensive functions within classes~~
|
240
|
-
- ~~Make star a parameter to getis-ord class~~
|
241
|
-
- ~~Add examples/usage to docs~~
|
242
|
-
- ~~Create RDocs~~
|
243
|
-
- Refactor Global Moran and BVMoran
|
244
|
-
- Support non-numeric keys in WeightsMatrix/General refactor
|
245
|
-
- Write SparseMatrix C ext
|
246
|
-
|
247
|
-
## Future Work
|
248
|
-
|
249
|
-
#### General
|
250
|
-
|
251
|
-
- ~~Refactor stats to inherit an abstract class.~~
|
252
|
-
- Change WeightsMatrix class and Stat classes to utilize sparse matrix methods.
|
253
|
-
- Split into two separate gems spatial_stats and spatial_stats-activerecord
|
254
|
-
|
255
|
-
#### Weights
|
256
|
-
|
257
|
-
- Add Kernel based weighting
|
258
|
-
|
259
|
-
#### Utils
|
317
|
+
## Path Forward
|
260
318
|
|
261
|
-
|
262
|
-
- Bayes smoothing
|
319
|
+
Summaries of milestones for v1.x and v2.0. These lists are subject to change. If you have an additional feature you want to see for either milestone, open up an issue or PR.
|
263
320
|
|
264
|
-
###
|
321
|
+
### v1.x
|
265
322
|
|
266
|
-
|
267
|
-
-
|
323
|
+
1. Global Measurements
|
324
|
+
- `Geary`'s C
|
325
|
+
- `GetisOrd`
|
326
|
+
2. Local Measurements
|
327
|
+
- `Join Count`
|
328
|
+
3. Utilities
|
329
|
+
- Add support for .gal/.swm file imports
|
330
|
+
- Add support for Rate variables
|
331
|
+
- Add support for Bayes smoothing
|
332
|
+
- ~Add support for Bonferroni Bounds and FDR~
|
333
|
+
4. General
|
334
|
+
- ~Add new stat constructors that only rely on a weights matrix and data vector~
|
335
|
+
- Point Pattern Analysis Module
|
336
|
+
- Regression Module
|
268
337
|
|
269
|
-
|
338
|
+
### v2.0
|
270
339
|
|
271
|
-
-
|
340
|
+
- Break gem into core `spatial_stats` that will not include queries module and `spatial_stats-activerecord`. This will remove the dependency on rails for the core gem.
|
341
|
+
- Create `spatial_stats-import/geojson/shp` gem that will allow importing files and generating a `WeightsMatrix`. Will likely rely on `RGeo` or another spatial lib.
|
272
342
|
|
273
|
-
###
|
343
|
+
### Other TODOs
|
274
344
|
|
275
|
-
-
|
345
|
+
- Update Docs to show `from_observation` when version is bumped
|
346
|
+
- Refactor `MultivariateGeary` so that it can be used without `activerecord` by adding `from_observations` and supporting methods.
|
276
347
|
|
277
348
|
## License
|
278
349
|
|
279
|
-
The gem is available as open source under the terms of the [
|
350
|
+
The gem is available as open source under the terms of the [BSD-3-Clause](https://opensource.org/licenses/BSD-3-Clause).
|
data/Rakefile
CHANGED
@@ -18,6 +18,12 @@ end
|
|
18
18
|
|
19
19
|
require 'bundler/gem_tasks'
|
20
20
|
|
21
|
+
require 'rake/extensiontask'
|
22
|
+
|
23
|
+
Rake::ExtensionTask.new 'spatial_stats' do |ext|
|
24
|
+
ext.lib_dir = 'lib/spatial_stats'
|
25
|
+
end
|
26
|
+
|
21
27
|
require 'rake/testtask'
|
22
28
|
|
23
29
|
Rake::TestTask.new(:test) do |t|
|
@@ -27,4 +33,5 @@ Rake::TestTask.new(:test) do |t|
|
|
27
33
|
t.warning = false # shut up annoying warnings
|
28
34
|
end
|
29
35
|
|
36
|
+
task test: :compile
|
30
37
|
task default: :test
|
@@ -0,0 +1,380 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include "csr_matrix.h"
|
5
|
+
|
6
|
+
void csr_matrix_free(void *mat)
|
7
|
+
{
|
8
|
+
csr_matrix *csr = (csr_matrix *)mat;
|
9
|
+
|
10
|
+
if (csr->init == 1)
|
11
|
+
{
|
12
|
+
free(csr->values);
|
13
|
+
free(csr->col_index);
|
14
|
+
free(csr->row_index);
|
15
|
+
}
|
16
|
+
free(mat);
|
17
|
+
}
|
18
|
+
|
19
|
+
size_t csr_matrix_memsize(const void *ptr)
|
20
|
+
{
|
21
|
+
const csr_matrix *csr = (const csr_matrix *)ptr;
|
22
|
+
return sizeof(*csr);
|
23
|
+
}
|
24
|
+
|
25
|
+
VALUE csr_matrix_alloc(VALUE self)
|
26
|
+
{
|
27
|
+
csr_matrix *csr = malloc(sizeof(csr_matrix));
|
28
|
+
return TypedData_Wrap_Struct(self, &csr_matrix_type, csr);
|
29
|
+
}
|
30
|
+
|
31
|
+
void mat_to_sparse(csr_matrix *csr, VALUE data, VALUE keys, VALUE num_rows)
|
32
|
+
{
|
33
|
+
|
34
|
+
int nnz = 0;
|
35
|
+
int n = NUM2INT(num_rows);
|
36
|
+
int m;
|
37
|
+
|
38
|
+
VALUE key;
|
39
|
+
VALUE row;
|
40
|
+
VALUE entry;
|
41
|
+
VALUE key_lookup = rb_hash_new();
|
42
|
+
VALUE weight_sym = ID2SYM(rb_intern("weight"));
|
43
|
+
VALUE id_sym = ID2SYM(rb_intern("id"));
|
44
|
+
|
45
|
+
double *values;
|
46
|
+
int *col_index;
|
47
|
+
int *row_index;
|
48
|
+
|
49
|
+
int nz_idx;
|
50
|
+
double weight;
|
51
|
+
|
52
|
+
int i;
|
53
|
+
int j;
|
54
|
+
|
55
|
+
// first get number non zero count so we can alloc values and col_index
|
56
|
+
for (i = 0; i < n; i++)
|
57
|
+
{
|
58
|
+
key = rb_ary_entry(keys, i);
|
59
|
+
|
60
|
+
// set lookup index for this key
|
61
|
+
rb_hash_aset(key_lookup, key, INT2NUM(i));
|
62
|
+
|
63
|
+
// check the value of this row is actually an array
|
64
|
+
// if it is, add array len to nnz
|
65
|
+
row = rb_hash_aref(data, key);
|
66
|
+
Check_Type(row, T_ARRAY);
|
67
|
+
nnz += rb_array_len(row);
|
68
|
+
}
|
69
|
+
|
70
|
+
values = malloc(sizeof(double) * nnz);
|
71
|
+
col_index = malloc(sizeof(int) * nnz);
|
72
|
+
row_index = malloc(sizeof(int) * (n + 1));
|
73
|
+
|
74
|
+
// for every row, work through each hash
|
75
|
+
// in each hash, add the weight to values and get col_index
|
76
|
+
// by looking at the key_lookup of id.
|
77
|
+
// Row index will be computed by adding len of each row and updating array.
|
78
|
+
nz_idx = 0;
|
79
|
+
for (i = 0; i < n; i++)
|
80
|
+
{
|
81
|
+
row_index[i] = nz_idx;
|
82
|
+
|
83
|
+
key = rb_ary_entry(keys, i);
|
84
|
+
row = rb_hash_aref(data, key);
|
85
|
+
m = rb_array_len(row);
|
86
|
+
|
87
|
+
for (j = 0; j < m; j++)
|
88
|
+
{
|
89
|
+
entry = rb_ary_entry(row, j);
|
90
|
+
Check_Type(entry, T_HASH);
|
91
|
+
|
92
|
+
key = rb_hash_aref(entry, id_sym);
|
93
|
+
weight = NUM2DBL(rb_hash_aref(entry, weight_sym));
|
94
|
+
|
95
|
+
// assign the nnz the weight
|
96
|
+
// get index in the keys array of key from lookup table
|
97
|
+
values[nz_idx] = weight;
|
98
|
+
col_index[nz_idx] = NUM2INT(rb_hash_aref(key_lookup, key));
|
99
|
+
nz_idx++;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
row_index[n] = nnz;
|
103
|
+
|
104
|
+
csr->n = n;
|
105
|
+
csr->nnz = nnz;
|
106
|
+
csr->values = values;
|
107
|
+
csr->col_index = col_index;
|
108
|
+
csr->row_index = row_index;
|
109
|
+
csr->init = 1;
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* A new instance of CSRMatrix.
|
114
|
+
* Uses a Dictionary of Keys (DOK) as input to represent a square matrix.
|
115
|
+
* @example
|
116
|
+
* weights = {
|
117
|
+
* 'a' => [{ id: 'c', weight: 1 }],
|
118
|
+
* 'b' => [{ id: 'b', weight: 1 }],
|
119
|
+
* 'c' => [{ id: 'a', weight: 1 }]
|
120
|
+
* }
|
121
|
+
* num_rows = 3
|
122
|
+
*
|
123
|
+
* csr = CSRMatrix.new(data, num_rows)
|
124
|
+
*
|
125
|
+
* @param [Array] data in 1-D format
|
126
|
+
* @param [Integer] num_rows in the 2-D representation
|
127
|
+
*
|
128
|
+
* @return [CSRMatrix]
|
129
|
+
*/
|
130
|
+
VALUE csr_matrix_initialize(VALUE self, VALUE data, VALUE num_rows)
|
131
|
+
{
|
132
|
+
VALUE keys;
|
133
|
+
csr_matrix *csr;
|
134
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
135
|
+
csr->init = 0;
|
136
|
+
|
137
|
+
Check_Type(data, T_HASH);
|
138
|
+
Check_Type(num_rows, T_FIXNUM);
|
139
|
+
|
140
|
+
keys = rb_funcall(data, rb_intern("keys"), 0);
|
141
|
+
|
142
|
+
// check dimensions are correct
|
143
|
+
if (NUM2INT(num_rows) != rb_array_len(keys))
|
144
|
+
{
|
145
|
+
rb_raise(rb_eArgError, "n_rows != keys.size, check your dimensions");
|
146
|
+
}
|
147
|
+
|
148
|
+
mat_to_sparse(csr, data, keys, num_rows);
|
149
|
+
|
150
|
+
rb_iv_set(self, "@n", num_rows);
|
151
|
+
rb_iv_set(self, "@nnz", INT2NUM(csr->nnz));
|
152
|
+
|
153
|
+
return self;
|
154
|
+
}
|
155
|
+
|
156
|
+
/**
|
157
|
+
* Non-zero values in the matrix.
|
158
|
+
*
|
159
|
+
* @return [Array] of the non-zero values.
|
160
|
+
*/
|
161
|
+
VALUE csr_matrix_values(VALUE self)
|
162
|
+
{
|
163
|
+
csr_matrix *csr;
|
164
|
+
VALUE result;
|
165
|
+
|
166
|
+
int i;
|
167
|
+
|
168
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
169
|
+
|
170
|
+
result = rb_ary_new_capa(csr->nnz);
|
171
|
+
for (i = 0; i < csr->nnz; i++)
|
172
|
+
{
|
173
|
+
rb_ary_store(result, i, DBL2NUM(csr->values[i]));
|
174
|
+
}
|
175
|
+
|
176
|
+
return result;
|
177
|
+
}
|
178
|
+
|
179
|
+
/**
|
180
|
+
* Column indices of the non-zero values.
|
181
|
+
*
|
182
|
+
* @return [Array] of the column indices.
|
183
|
+
*/
|
184
|
+
VALUE csr_matrix_col_index(VALUE self)
|
185
|
+
{
|
186
|
+
csr_matrix *csr;
|
187
|
+
VALUE result;
|
188
|
+
|
189
|
+
int i;
|
190
|
+
|
191
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
192
|
+
|
193
|
+
result = rb_ary_new_capa(csr->nnz);
|
194
|
+
for (i = 0; i < csr->nnz; i++)
|
195
|
+
{
|
196
|
+
rb_ary_store(result, i, INT2NUM(csr->col_index[i]));
|
197
|
+
}
|
198
|
+
|
199
|
+
return result;
|
200
|
+
}
|
201
|
+
|
202
|
+
/**
|
203
|
+
* Row indices of the non-zero values. Represents the start index
|
204
|
+
* of values in a row. For example [0,2,3] would represent a matrix
|
205
|
+
* with 2 rows, the first containing 2 non-zero values and the second
|
206
|
+
* containing 1. Length is num_rows + 1.
|
207
|
+
*
|
208
|
+
* Used for row slicing operations.
|
209
|
+
*
|
210
|
+
* @return [Array] of the row indices.
|
211
|
+
*/
|
212
|
+
VALUE csr_matrix_row_index(VALUE self)
|
213
|
+
{
|
214
|
+
csr_matrix *csr;
|
215
|
+
VALUE result;
|
216
|
+
|
217
|
+
int i;
|
218
|
+
|
219
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
220
|
+
|
221
|
+
result = rb_ary_new_capa(csr->n + 1);
|
222
|
+
for (i = 0; i <= csr->n; i++)
|
223
|
+
{
|
224
|
+
rb_ary_store(result, i, INT2NUM(csr->row_index[i]));
|
225
|
+
}
|
226
|
+
|
227
|
+
return result;
|
228
|
+
}
|
229
|
+
|
230
|
+
/**
|
231
|
+
* Multiply matrix by the input vector.
|
232
|
+
*
|
233
|
+
* @see https://github.com/scipy/scipy/blob/53fac7a1d8a81d48be757632ad285b6fc76529ba/scipy/sparse/sparsetools/csr.h#L1120
|
234
|
+
*
|
235
|
+
* @param [Array] vec of length n.
|
236
|
+
*
|
237
|
+
* @return [Array] of the result of the multiplication.
|
238
|
+
*/
|
239
|
+
VALUE csr_matrix_mulvec(VALUE self, VALUE vec)
|
240
|
+
{
|
241
|
+
csr_matrix *csr;
|
242
|
+
VALUE result;
|
243
|
+
|
244
|
+
int i;
|
245
|
+
int jj;
|
246
|
+
double tmp;
|
247
|
+
|
248
|
+
Check_Type(vec, T_ARRAY);
|
249
|
+
|
250
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
251
|
+
|
252
|
+
if (rb_array_len(vec) != csr->n)
|
253
|
+
{
|
254
|
+
rb_raise(rb_eArgError, "Dimension Mismatch CSRMatrix.n != vec.size");
|
255
|
+
}
|
256
|
+
|
257
|
+
result = rb_ary_new_capa(csr->n);
|
258
|
+
|
259
|
+
// float *vals = (float *)DATA_PTR(result);
|
260
|
+
|
261
|
+
for (i = 0; i < csr->n; i++)
|
262
|
+
{
|
263
|
+
tmp = 0;
|
264
|
+
for (jj = csr->row_index[i]; jj < csr->row_index[i + 1]; jj++)
|
265
|
+
{
|
266
|
+
tmp += csr->values[jj] * NUM2DBL(rb_ary_entry(vec, csr->col_index[jj]));
|
267
|
+
}
|
268
|
+
rb_ary_store(result, i, DBL2NUM(tmp));
|
269
|
+
}
|
270
|
+
|
271
|
+
return result;
|
272
|
+
}
|
273
|
+
|
274
|
+
/**
|
275
|
+
* Compute the dot product of the given row with the input vector.
|
276
|
+
* Equivalent to +mulvec(vec)[row]+.
|
277
|
+
*
|
278
|
+
* @param [Array] vec of length n.
|
279
|
+
* @param [Integer] row of the dot product.
|
280
|
+
*
|
281
|
+
* @return [Float] of the result of the dot product.
|
282
|
+
*/
|
283
|
+
VALUE csr_matrix_dot_row(VALUE self, VALUE vec, VALUE row)
|
284
|
+
{
|
285
|
+
csr_matrix *csr;
|
286
|
+
VALUE result;
|
287
|
+
|
288
|
+
int i;
|
289
|
+
int jj;
|
290
|
+
double tmp;
|
291
|
+
|
292
|
+
Check_Type(vec, T_ARRAY);
|
293
|
+
Check_Type(row, T_FIXNUM);
|
294
|
+
|
295
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
296
|
+
|
297
|
+
if (rb_array_len(vec) != csr->n)
|
298
|
+
{
|
299
|
+
rb_raise(rb_eArgError, "Dimension Mismatch CSRMatrix.n != vec.size");
|
300
|
+
}
|
301
|
+
|
302
|
+
i = NUM2INT(row);
|
303
|
+
if (!(i >= 0 && i < csr->n))
|
304
|
+
{
|
305
|
+
rb_raise(rb_eArgError, "Index Error row_idx >= m or idx < 0");
|
306
|
+
}
|
307
|
+
|
308
|
+
tmp = 0;
|
309
|
+
for (jj = csr->row_index[i]; jj < csr->row_index[i + 1]; jj++)
|
310
|
+
{
|
311
|
+
tmp += csr->values[jj] * NUM2DBL(rb_ary_entry(vec, csr->col_index[jj]));
|
312
|
+
}
|
313
|
+
|
314
|
+
result = DBL2NUM(tmp);
|
315
|
+
return result;
|
316
|
+
}
|
317
|
+
|
318
|
+
/**
|
319
|
+
* A hash representation of the matrix with coordinates as keys.
|
320
|
+
* @example
|
321
|
+
* data = [
|
322
|
+
* [0, 1, 0]
|
323
|
+
* [0, 0, 0],
|
324
|
+
* [1, 0, 1]
|
325
|
+
* ]
|
326
|
+
* num_rows = 3
|
327
|
+
* num_cols = 3
|
328
|
+
* data = data.flatten!
|
329
|
+
* csr = CSRMatrix.new(data, num_rows, num_cols)
|
330
|
+
*
|
331
|
+
* csr.coordinates
|
332
|
+
* # => {
|
333
|
+
* [0,1] => 1,
|
334
|
+
* [2,0] => 1,
|
335
|
+
* [2,2] => 1
|
336
|
+
* }
|
337
|
+
*
|
338
|
+
* @return [Hash]
|
339
|
+
*/
|
340
|
+
VALUE csr_matrix_coordinates(VALUE self)
|
341
|
+
{
|
342
|
+
csr_matrix *csr;
|
343
|
+
VALUE result;
|
344
|
+
|
345
|
+
int i;
|
346
|
+
int k;
|
347
|
+
|
348
|
+
VALUE key;
|
349
|
+
VALUE val;
|
350
|
+
int row_end;
|
351
|
+
|
352
|
+
TypedData_Get_Struct(self, csr_matrix, &csr_matrix_type, csr);
|
353
|
+
|
354
|
+
result = rb_hash_new();
|
355
|
+
|
356
|
+
// iterate through every value in the matrix and assign it's coordinates
|
357
|
+
// [x,y] as the key to the hash, with the value as the value.
|
358
|
+
// Use i to keep track of what row we are on.
|
359
|
+
i = 0;
|
360
|
+
row_end = csr->row_index[1];
|
361
|
+
for (k = 0; k < csr->nnz; k++)
|
362
|
+
{
|
363
|
+
if (k == row_end)
|
364
|
+
{
|
365
|
+
i++;
|
366
|
+
row_end = csr->row_index[i + 1];
|
367
|
+
}
|
368
|
+
|
369
|
+
// store i,j coordinates j is col_index[k]
|
370
|
+
key = rb_ary_new_capa(2);
|
371
|
+
rb_ary_store(key, 0, INT2NUM(i));
|
372
|
+
rb_ary_store(key, 1, INT2NUM(csr->col_index[k]));
|
373
|
+
|
374
|
+
val = DBL2NUM(csr->values[k]);
|
375
|
+
|
376
|
+
rb_hash_aset(result, key, val);
|
377
|
+
}
|
378
|
+
|
379
|
+
return result;
|
380
|
+
}
|