stat_c 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +48 -4
- data/benchmark/benchmark.rb +25 -7
- data/ext/stat_c/stat_c.c +178 -30
- data/lib/stat_c/version.rb +2 -1
- data/stat_c.gemspec +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0e5819680b725e08009314b625cd71e806fd2bc
|
4
|
+
data.tar.gz: 707e936d379f538b9ac30661c3d7b8aa99a8d0ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a2338631efc5343c4181147bf65aa90e0306fb679772c676bbeeada9de57998f24467a012fe2d39cfde2ec1bc6e58ba2b419aa8adf853fd932dbbe7521bbd77
|
7
|
+
data.tar.gz: 70702b3acb197b3b35d6de33d733c00ac86ee11891f6994bb64f8751c0ffa0545f652b6e1e58f50a794848020d3c7358038a4afe7d9d48693cdf59226b3e28b0
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# StatC
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Fast, well documented C stats extension for Ruby.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,7 +20,53 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
23
|
+
```ruby
|
24
|
+
require "stat_c"
|
25
|
+
|
26
|
+
ary = [-1.4, 0, 1, 2, 3.0]
|
27
|
+
|
28
|
+
StatC::Array.mean(ary).round(2) #=> 0.92
|
29
|
+
|
30
|
+
# Stats based on sample variance
|
31
|
+
StatC::Array.var(ary).round(2) #=> 2.93
|
32
|
+
StatC::Array.sd(ary).round(2) #=> 1.71
|
33
|
+
StatC::Array.se(ary).round(2) #=> 0.77
|
34
|
+
|
35
|
+
# Stats based on population variance
|
36
|
+
StatC::Array.var(ary, pop=true).round(2) #=> 2.35
|
37
|
+
StatC::Array.sd(ary, pop=true).round(2) #=> 1.53
|
38
|
+
StatC::Array.se(ary, pop=true).round(2) #=> 0.68
|
39
|
+
```
|
40
|
+
|
41
|
+
## Benchmark ##
|
42
|
+
|
43
|
+
StatC is faster than pure Ruby (duh, it's a C extension {^_^} ). See `benchmark/benchmark.rb` for more info.
|
44
|
+
|
45
|
+
$ ruby benchmark/benchmark.rb
|
46
|
+
|
47
|
+
Rehearsal ----------------------------------------------
|
48
|
+
Ruby mean 0.090000 0.000000 0.090000 ( 0.085029)
|
49
|
+
StatC mean 0.010000 0.000000 0.010000 ( 0.009604)
|
50
|
+
Ruby var 0.350000 0.010000 0.360000 ( 0.357243)
|
51
|
+
StatC var 0.020000 0.000000 0.020000 ( 0.020343)
|
52
|
+
Ruby sd 0.350000 0.000000 0.350000 ( 0.355273)
|
53
|
+
StatC sd 0.020000 0.000000 0.020000 ( 0.018590)
|
54
|
+
Ruby se 0.340000 0.000000 0.340000 ( 0.353170)
|
55
|
+
StatC se 0.030000 0.000000 0.030000 ( 0.025813)
|
56
|
+
------------------------------------- total: 1.220000sec
|
57
|
+
|
58
|
+
user system total real
|
59
|
+
Ruby mean 0.080000 0.000000 0.080000 ( 0.079849)
|
60
|
+
StatC mean 0.000000 0.000000 0.000000 ( 0.009006)
|
61
|
+
|
62
|
+
Ruby var 0.320000 0.010000 0.330000 ( 0.322538)
|
63
|
+
StatC var 0.020000 0.000000 0.020000 ( 0.018962)
|
64
|
+
|
65
|
+
Ruby sd 0.330000 0.000000 0.330000 ( 0.329038)
|
66
|
+
StatC sd 0.020000 0.000000 0.020000 ( 0.020783)
|
67
|
+
|
68
|
+
Ruby se 0.310000 0.000000 0.310000 ( 0.319696)
|
69
|
+
StatC se 0.020000 0.000000 0.020000 ( 0.019259)
|
26
70
|
|
27
71
|
## Development
|
28
72
|
|
data/benchmark/benchmark.rb
CHANGED
@@ -1,18 +1,36 @@
|
|
1
1
|
require "stat_c"
|
2
2
|
require "benchmark"
|
3
3
|
|
4
|
-
def
|
4
|
+
def ary_mean ary
|
5
5
|
ary.reduce(:+) / ary.length.to_f
|
6
6
|
end
|
7
7
|
|
8
|
+
# sample variance
|
9
|
+
def ary_var ary
|
10
|
+
mean = ary_mean ary
|
11
|
+
ary.map { |num| (num - mean) ** 2 }.reduce(:+) / (ary.length - 1)
|
12
|
+
end
|
13
|
+
|
14
|
+
def ary_sd ary
|
15
|
+
Math.sqrt(ary_var ary)
|
16
|
+
end
|
17
|
+
|
18
|
+
def ary_se ary
|
19
|
+
ary_sd(ary) / Math.sqrt(ary.length)
|
20
|
+
end
|
21
|
+
|
8
22
|
ary = (1..1_000_000).map(&:itself)
|
9
23
|
|
10
24
|
Benchmark.bmbm do |x|
|
11
|
-
x.report("
|
12
|
-
|
13
|
-
|
25
|
+
x.report("Ruby mean") { ary_mean ary }
|
26
|
+
x.report("StatC mean") { StatC::Array.mean ary }
|
27
|
+
|
28
|
+
x.report("Ruby var") { ary_var ary }
|
29
|
+
x.report("StatC var") { StatC::Array.var ary }
|
30
|
+
|
31
|
+
x.report("Ruby sd") { ary_sd ary }
|
32
|
+
x.report("StatC sd") { StatC::Array.sd ary }
|
14
33
|
|
15
|
-
x.report("
|
16
|
-
|
17
|
-
end
|
34
|
+
x.report("Ruby se") { ary_se ary }
|
35
|
+
x.report("StatC se") { StatC::Array.se ary }
|
18
36
|
end
|
data/ext/stat_c/stat_c.c
CHANGED
@@ -21,6 +21,10 @@ along with StatC. If not, see <http://www.gnu.org/licenses/>.
|
|
21
21
|
*********************************************************************/
|
22
22
|
|
23
23
|
#include <ruby.h>
|
24
|
+
#include <math.h>
|
25
|
+
|
26
|
+
/* based on NIL_P in ruby.h */
|
27
|
+
#define FALSE_P(v) !((VALUE)(v) != Qfalse)
|
24
28
|
|
25
29
|
/* classes and modules */
|
26
30
|
VALUE sc_mStatC;
|
@@ -28,16 +32,42 @@ VALUE sc_mArray;
|
|
28
32
|
VALUE sc_mError;
|
29
33
|
VALUE sc_eError;
|
30
34
|
|
31
|
-
|
35
|
+
/* @private */
|
36
|
+
static size_t assert_array_not_empty(VALUE ary)
|
32
37
|
{
|
33
|
-
|
34
|
-
unsigned long i = 0;
|
35
|
-
long double sum = 0;
|
36
38
|
size_t len = RARRAY_LEN(ary);
|
37
39
|
|
38
40
|
if (len <= 0) {
|
39
41
|
rb_raise(sc_eError, "Array cannot be empty");
|
42
|
+
} else {
|
43
|
+
return len;
|
40
44
|
}
|
45
|
+
}
|
46
|
+
|
47
|
+
/* @private */
|
48
|
+
static long double sc_ary_entry(VALUE ary, long idx)
|
49
|
+
{
|
50
|
+
return NUM2DBL(rb_ary_entry(ary, idx));
|
51
|
+
}
|
52
|
+
|
53
|
+
/* Calculate the mean of values in the given array.
|
54
|
+
|
55
|
+
@param ary [Array<Numeric>] an array of Numerics
|
56
|
+
|
57
|
+
@example Get mean of array
|
58
|
+
StatC::Array.mean([-1.4, 0, 1, 2, 3.0]).round(2) #=> 0.92
|
59
|
+
|
60
|
+
@raise [StatC::Error::Error] if array length is zero
|
61
|
+
|
62
|
+
@return [Numeric] mean of values in the array
|
63
|
+
|
64
|
+
*/
|
65
|
+
static VALUE sc_mean(VALUE obj, VALUE ary)
|
66
|
+
{
|
67
|
+
|
68
|
+
unsigned long i = 0;
|
69
|
+
long double sum = 0;
|
70
|
+
size_t len = assert_array_not_empty(ary);
|
41
71
|
|
42
72
|
for (i = 0; i < len; ++i) {
|
43
73
|
sum += NUM2DBL(rb_ary_entry(ary, i));
|
@@ -46,44 +76,162 @@ static VALUE sc_mean(VALUE self, VALUE ary)
|
|
46
76
|
return DBL2NUM(sum / len);
|
47
77
|
}
|
48
78
|
|
49
|
-
/*
|
50
|
-
/* { */
|
79
|
+
/* Calculate the variance of values in given array.
|
51
80
|
|
52
|
-
|
81
|
+
If pop param is set to true, calculates the population variance of
|
82
|
+
values in the array. Otherwise, the sample variance is calculated
|
83
|
+
(default).
|
53
84
|
|
54
|
-
|
55
|
-
|
56
|
-
|
85
|
+
@param ary [Array<Numeric>] an array of Numerics
|
86
|
+
@param pop [Bool] pass true to calculate population variance,
|
87
|
+
default: false
|
57
88
|
|
58
|
-
|
59
|
-
|
89
|
+
@example Get sample variance of array
|
90
|
+
StatC::Array.var([-1.4, 0, 1, 2, 3.0]).round(2) #=> 2.93
|
91
|
+
@example Get population variance of array
|
92
|
+
StatC::Array.var([-1.4, 0, 1, 2, 3.0], pop=true).round(2) #=> 2.35
|
60
93
|
|
61
|
-
|
62
|
-
/* if (NIL_P(sample)) { sample = 1; } */
|
63
|
-
/* } */
|
94
|
+
@raise [StatC::Error::Error] if array length is zero
|
64
95
|
|
65
|
-
|
66
|
-
/* { */
|
67
|
-
/* VALUE ary, sample; */
|
96
|
+
@return [Numeric] variance of values in the array
|
68
97
|
|
69
|
-
|
70
|
-
|
98
|
+
*/
|
99
|
+
static VALUE sc_var(int argc, VALUE* argv, VALUE obj)
|
100
|
+
{
|
101
|
+
VALUE ary, calc_pop_var;
|
71
102
|
|
72
|
-
/*
|
73
|
-
|
74
|
-
/* } */
|
103
|
+
/* one required and one optional argument */
|
104
|
+
rb_scan_args(argc, argv, "11", &ary, &calc_pop_var);
|
75
105
|
|
76
|
-
|
106
|
+
unsigned long i = 0;
|
107
|
+
long double sum = 0;
|
108
|
+
size_t len = assert_array_not_empty(ary);
|
109
|
+
|
110
|
+
long double mean = NUM2DBL(sc_mean(obj, ary));
|
111
|
+
|
112
|
+
for (i = 0; i < len; ++i) {
|
113
|
+
sum += pow(sc_ary_entry(ary, i) - mean, 2);
|
114
|
+
}
|
115
|
+
|
116
|
+
if (NIL_P(calc_pop_var) || FALSE_P(calc_pop_var)) { /* sample variance */
|
117
|
+
return DBL2NUM(sum / (len - 1));
|
118
|
+
} else { /* population variance */
|
119
|
+
return DBL2NUM(sum / len);
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
/* Calculate the standard deviation of values in given array.
|
124
|
+
|
125
|
+
If pop param is set to true, the standard deviation is based on
|
126
|
+
population variance. Otherwise, sample variance is used (default).
|
127
|
+
|
128
|
+
@param ary [Array<Numeric>] an array of Numerics
|
129
|
+
@param pop [Bool] pass true to calculate population standard
|
130
|
+
deviation, default: false
|
131
|
+
|
132
|
+
@example Get sample standard deviation of array
|
133
|
+
StatC::Array.sd([-1.4, 0, 1, 2, 3.0]).round(2) #=> 1.71
|
134
|
+
@example Get population standard deviation of array
|
135
|
+
StatC::Array.sd([-1.4, 0, 1, 2, 3.0], pop=true).round(2) #=> 1.53
|
136
|
+
|
137
|
+
@raise [StatC::Error::Error] if array length is zero
|
138
|
+
|
139
|
+
@return [Numeric] standard deviation of values in the array
|
140
|
+
|
141
|
+
*/
|
142
|
+
static VALUE sc_sd(int argc, VALUE* argv, VALUE obj)
|
77
143
|
{
|
78
|
-
|
144
|
+
VALUE ary, calc_pop_var;
|
79
145
|
|
146
|
+
/* one required and one optional argument */
|
147
|
+
rb_scan_args(argc, argv, "11", &ary, &calc_pop_var);
|
148
|
+
|
149
|
+
return DBL2NUM(sqrt(NUM2DBL(sc_var(argc, argv, obj))));
|
150
|
+
}
|
151
|
+
|
152
|
+
/* Calculate the standard deviation of values in given array.
|
153
|
+
|
154
|
+
If pop param is set to true, the standard error of the mean is based
|
155
|
+
on population variance. Otherwise, sample variance is used (default).
|
156
|
+
|
157
|
+
@param ary [Array<Numeric>] an array of Numerics
|
158
|
+
@param pop [Bool] pass true to calculate population standard
|
159
|
+
error of the mean, default: false
|
160
|
+
|
161
|
+
@example Get sample standard error of array
|
162
|
+
StatC::Array.se([-1.4, 0, 1, 2, 3.0]).round(2) #=> 0.77
|
163
|
+
@example Get population standard error of array
|
164
|
+
StatC::Array.se([-1.4, 0, 1, 2, 3.0], pop=true).round(2) #=> 0.68
|
165
|
+
|
166
|
+
@raise [StatC::Error::Error] if array length is zero
|
167
|
+
|
168
|
+
@return [Numeric] standard error of the mean for values in the array
|
169
|
+
|
170
|
+
*/
|
171
|
+
static VALUE sc_se(int argc, VALUE* argv, VALUE obj)
|
172
|
+
{
|
173
|
+
VALUE ary, calc_pop_var;
|
174
|
+
|
175
|
+
/* one required and one optional argument */
|
176
|
+
rb_scan_args(argc, argv, "11", &ary, &calc_pop_var);
|
177
|
+
|
178
|
+
long double sd = NUM2DBL(sc_sd(argc, argv, obj));
|
179
|
+
|
180
|
+
size_t len = assert_array_not_empty(ary);
|
181
|
+
|
182
|
+
return DBL2NUM(sd / sqrt(len));
|
183
|
+
}
|
184
|
+
|
185
|
+
/* Document-module: StatC::Array
|
186
|
+
|
187
|
+
Statistical methods operating on the values of an array
|
188
|
+
|
189
|
+
*/
|
190
|
+
static void sc_init_mArray(void)
|
191
|
+
{
|
80
192
|
sc_mArray = rb_define_module_under(sc_mStatC, "Array");
|
193
|
+
|
194
|
+
rb_define_singleton_method(sc_mArray, "mean", sc_mean, 1);
|
195
|
+
rb_define_singleton_method(sc_mArray, "var", sc_var, -1);
|
196
|
+
rb_define_singleton_method(sc_mArray, "sd", sc_sd, -1);
|
197
|
+
rb_define_singleton_method(sc_mArray, "se", sc_se, -1);
|
198
|
+
}
|
199
|
+
|
200
|
+
/* Document-module: StatC::Error
|
201
|
+
|
202
|
+
Module containing all error classes of the StatC module.
|
203
|
+
|
204
|
+
*/
|
205
|
+
static void sc_init_mError(void)
|
206
|
+
{
|
81
207
|
sc_mError = rb_define_module_under(sc_mStatC, "Error");
|
208
|
+
}
|
82
209
|
|
83
|
-
sc_eError = rb_define_class_under(sc_mError, "Error", rb_eStandardError);
|
84
210
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
211
|
+
/* Document-class: StatC::Error::Error
|
212
|
+
|
213
|
+
Error class from which all errors raised by StatC inherit. Thus, you
|
214
|
+
can rescue from StatC::Error::Error to catch all errors specific to
|
215
|
+
StatC.
|
216
|
+
|
217
|
+
@note Inherits from StandardError
|
218
|
+
|
219
|
+
*/
|
220
|
+
static void sc_init_eError(void)
|
221
|
+
{
|
222
|
+
sc_eError = rb_define_class_under(sc_mError, "Error", rb_eStandardError);
|
223
|
+
}
|
224
|
+
|
225
|
+
/* Document-module: StatC
|
226
|
+
|
227
|
+
C stats module for Ruby.
|
228
|
+
|
229
|
+
*/
|
230
|
+
void Init_stat_c(void)
|
231
|
+
{
|
232
|
+
sc_mStatC = rb_define_module("StatC");
|
233
|
+
|
234
|
+
sc_init_mArray();
|
235
|
+
sc_init_mError();
|
236
|
+
sc_init_eError();
|
89
237
|
}
|
data/lib/stat_c/version.rb
CHANGED
data/stat_c.gemspec
CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["moorer@udel.edu"]
|
11
11
|
spec.license = "GPLv3"
|
12
12
|
|
13
|
-
spec.summary = %q{Fast
|
14
|
-
spec.description = %q{Fast
|
13
|
+
spec.summary = %q{Fast, well documented C stats extension for Ruby.}
|
14
|
+
spec.description = %q{Fast, well documented C stats extension for Ruby.}
|
15
15
|
spec.homepage = "https://github.com/mooreryan/stat_c"
|
16
16
|
|
17
17
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stat_c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
@@ -114,7 +114,7 @@ dependencies:
|
|
114
114
|
- - "~>"
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 0.8.11
|
117
|
-
description: Fast
|
117
|
+
description: Fast, well documented C stats extension for Ruby.
|
118
118
|
email:
|
119
119
|
- moorer@udel.edu
|
120
120
|
executables: []
|
@@ -164,6 +164,6 @@ rubyforge_project:
|
|
164
164
|
rubygems_version: 2.5.1
|
165
165
|
signing_key:
|
166
166
|
specification_version: 4
|
167
|
-
summary: Fast
|
167
|
+
summary: Fast, well documented C stats extension for Ruby.
|
168
168
|
test_files: []
|
169
169
|
has_rdoc:
|