stat_c 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f63ee4a4e26bb9031d4eed9ea02cf02b63fb629
4
- data.tar.gz: 4cb7709b9c16ddff13113495d0758533b6e96ec3
3
+ metadata.gz: e0e5819680b725e08009314b625cd71e806fd2bc
4
+ data.tar.gz: 707e936d379f538b9ac30661c3d7b8aa99a8d0ab
5
5
  SHA512:
6
- metadata.gz: b2fe2a4aed50969143e634271e133e3459dc51390c45ee983372a818b46e388e6c4c8b34752c24ebcf832c1cc9606c841caeec32a189a6d92ce82ddfd8763f46
7
- data.tar.gz: aa2763572a0a0111ac0a6a2c451573aee9432e6c1f2115b7e428be055f33c21f15b7b1c5bef843596003ded7fb201082651e68f94488e4ad6ec1bebcddd9f7bb
6
+ metadata.gz: 4a2338631efc5343c4181147bf65aa90e0306fb679772c676bbeeada9de57998f24467a012fe2d39cfde2ec1bc6e58ba2b419aa8adf853fd932dbbe7521bbd77
7
+ data.tar.gz: 70702b3acb197b3b35d6de33d733c00ac86ee11891f6994bb64f8751c0ffa0545f652b6e1e58f50a794848020d3c7358038a4afe7d9d48693cdf59226b3e28b0
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # StatC
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/stat_c`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ Fast, well documented C stats extension for Ruby.
6
4
 
7
5
  ## Installation
8
6
 
@@ -22,7 +20,53 @@ Or install it yourself as:
22
20
 
23
21
  ## Usage
24
22
 
25
- TODO: Write usage instructions here
23
+ ```ruby
24
+ require "stat_c"
25
+
26
+ ary = [-1.4, 0, 1, 2, 3.0]
27
+
28
+ StatC::Array.mean(ary).round(2) #=> 0.92
29
+
30
+ # Stats based on sample variance
31
+ StatC::Array.var(ary).round(2) #=> 2.93
32
+ StatC::Array.sd(ary).round(2) #=> 1.71
33
+ StatC::Array.se(ary).round(2) #=> 0.77
34
+
35
+ # Stats based on population variance
36
+ StatC::Array.var(ary, pop=true).round(2) #=> 2.35
37
+ StatC::Array.sd(ary, pop=true).round(2) #=> 1.53
38
+ StatC::Array.se(ary, pop=true).round(2) #=> 0.68
39
+ ```
40
+
41
+ ## Benchmark ##
42
+
43
+ StatC is faster than pure Ruby (duh, it's a C extension {^_^} ). See `benchmark/benchmark.rb` for more info.
44
+
45
+ $ ruby benchmark/benchmark.rb
46
+
47
+ Rehearsal ----------------------------------------------
48
+ Ruby mean 0.090000 0.000000 0.090000 ( 0.085029)
49
+ StatC mean 0.010000 0.000000 0.010000 ( 0.009604)
50
+ Ruby var 0.350000 0.010000 0.360000 ( 0.357243)
51
+ StatC var 0.020000 0.000000 0.020000 ( 0.020343)
52
+ Ruby sd 0.350000 0.000000 0.350000 ( 0.355273)
53
+ StatC sd 0.020000 0.000000 0.020000 ( 0.018590)
54
+ Ruby se 0.340000 0.000000 0.340000 ( 0.353170)
55
+ StatC se 0.030000 0.000000 0.030000 ( 0.025813)
56
+ ------------------------------------- total: 1.220000sec
57
+
58
+ user system total real
59
+ Ruby mean 0.080000 0.000000 0.080000 ( 0.079849)
60
+ StatC mean 0.000000 0.000000 0.000000 ( 0.009006)
61
+
62
+ Ruby var 0.320000 0.010000 0.330000 ( 0.322538)
63
+ StatC var 0.020000 0.000000 0.020000 ( 0.018962)
64
+
65
+ Ruby sd 0.330000 0.000000 0.330000 ( 0.329038)
66
+ StatC sd 0.020000 0.000000 0.020000 ( 0.020783)
67
+
68
+ Ruby se 0.310000 0.000000 0.310000 ( 0.319696)
69
+ StatC se 0.020000 0.000000 0.020000 ( 0.019259)
26
70
 
27
71
  ## Development
28
72
 
@@ -1,18 +1,36 @@
1
1
  require "stat_c"
2
2
  require "benchmark"
3
3
 
4
- def mean ary
4
+ def ary_mean ary
5
5
  ary.reduce(:+) / ary.length.to_f
6
6
  end
7
7
 
8
+ # sample variance
9
+ def ary_var ary
10
+ mean = ary_mean ary
11
+ ary.map { |num| (num - mean) ** 2 }.reduce(:+) / (ary.length - 1)
12
+ end
13
+
14
+ def ary_sd ary
15
+ Math.sqrt(ary_var ary)
16
+ end
17
+
18
+ def ary_se ary
19
+ ary_sd(ary) / Math.sqrt(ary.length)
20
+ end
21
+
8
22
  ary = (1..1_000_000).map(&:itself)
9
23
 
10
24
  Benchmark.bmbm do |x|
11
- x.report("ruby mean") do
12
- mean ary
13
- end
25
+ x.report("Ruby mean") { ary_mean ary }
26
+ x.report("StatC mean") { StatC::Array.mean ary }
27
+
28
+ x.report("Ruby var") { ary_var ary }
29
+ x.report("StatC var") { StatC::Array.var ary }
30
+
31
+ x.report("Ruby sd") { ary_sd ary }
32
+ x.report("StatC sd") { StatC::Array.sd ary }
14
33
 
15
- x.report("c mean") do
16
- StatC::Array.mean ary
17
- end
34
+ x.report("Ruby se") { ary_se ary }
35
+ x.report("StatC se") { StatC::Array.se ary }
18
36
  end
@@ -21,6 +21,10 @@ along with StatC. If not, see <http://www.gnu.org/licenses/>.
21
21
  *********************************************************************/
22
22
 
23
23
  #include <ruby.h>
24
+ #include <math.h>
25
+
26
+ /* based on NIL_P in ruby.h */
27
+ #define FALSE_P(v) !((VALUE)(v) != Qfalse)
24
28
 
25
29
  /* classes and modules */
26
30
  VALUE sc_mStatC;
@@ -28,16 +32,42 @@ VALUE sc_mArray;
28
32
  VALUE sc_mError;
29
33
  VALUE sc_eError;
30
34
 
31
- static VALUE sc_mean(VALUE self, VALUE ary)
35
+ /* @private */
36
+ static size_t assert_array_not_empty(VALUE ary)
32
37
  {
33
-
34
- unsigned long i = 0;
35
- long double sum = 0;
36
38
  size_t len = RARRAY_LEN(ary);
37
39
 
38
40
  if (len <= 0) {
39
41
  rb_raise(sc_eError, "Array cannot be empty");
42
+ } else {
43
+ return len;
40
44
  }
45
+ }
46
+
47
+ /* @private */
48
+ static long double sc_ary_entry(VALUE ary, long idx)
49
+ {
50
+ return NUM2DBL(rb_ary_entry(ary, idx));
51
+ }
52
+
53
+ /* Calculate the mean of values in the given array.
54
+
55
+ @param ary [Array<Numeric>] an array of Numerics
56
+
57
+ @example Get mean of array
58
+ StatC::Array.mean([-1.4, 0, 1, 2, 3.0]).round(2) #=> 0.92
59
+
60
+ @raise [StatC::Error::Error] if array length is zero
61
+
62
+ @return [Numeric] mean of values in the array
63
+
64
+ */
65
+ static VALUE sc_mean(VALUE obj, VALUE ary)
66
+ {
67
+
68
+ unsigned long i = 0;
69
+ long double sum = 0;
70
+ size_t len = assert_array_not_empty(ary);
41
71
 
42
72
  for (i = 0; i < len; ++i) {
43
73
  sum += NUM2DBL(rb_ary_entry(ary, i));
@@ -46,44 +76,162 @@ static VALUE sc_mean(VALUE self, VALUE ary)
46
76
  return DBL2NUM(sum / len);
47
77
  }
48
78
 
49
- /* static VALUE var(VALUE self, VALUE ary) */
50
- /* { */
79
+ /* Calculate the variance of values in given array.
51
80
 
52
- /* } */
81
+ If pop param is set to true, calculates the population variance of
82
+ values in the array. Otherwise, the sample variance is calculated
83
+ (default).
53
84
 
54
- /* static VALUE var(int argc, VALUE* argv, VALUE self) */
55
- /* { */
56
- /* VALUE ary, sample; */
85
+ @param ary [Array<Numeric>] an array of Numerics
86
+ @param pop [Bool] pass true to calculate population variance,
87
+ default: false
57
88
 
58
- /* /\* one required and one optional argument *\/ */
59
- /* rb_scan_args(argc, argv, "11", &ary, &sample); */
89
+ @example Get sample variance of array
90
+ StatC::Array.var([-1.4, 0, 1, 2, 3.0]).round(2) #=> 2.93
91
+ @example Get population variance of array
92
+ StatC::Array.var([-1.4, 0, 1, 2, 3.0], pop=true).round(2) #=> 2.35
60
93
 
61
- /* /\* if sample wasn't specified, set it to true *\/ */
62
- /* if (NIL_P(sample)) { sample = 1; } */
63
- /* } */
94
+ @raise [StatC::Error::Error] if array length is zero
64
95
 
65
- /* static VALUE var(int argc, VALUE* argv, VALUE self) */
66
- /* { */
67
- /* VALUE ary, sample; */
96
+ @return [Numeric] variance of values in the array
68
97
 
69
- /* /\* one required and one optional argument *\/ */
70
- /* rb_scan_args(argc, argv, "11", &ary, &sample); */
98
+ */
99
+ static VALUE sc_var(int argc, VALUE* argv, VALUE obj)
100
+ {
101
+ VALUE ary, calc_pop_var;
71
102
 
72
- /* /\* if sample wasn't specified, set it to true *\/ */
73
- /* if (NIL_P(sample)) { sample = 1; } */
74
- /* } */
103
+ /* one required and one optional argument */
104
+ rb_scan_args(argc, argv, "11", &ary, &calc_pop_var);
75
105
 
76
- void Init_stat_c(void)
106
+ unsigned long i = 0;
107
+ long double sum = 0;
108
+ size_t len = assert_array_not_empty(ary);
109
+
110
+ long double mean = NUM2DBL(sc_mean(obj, ary));
111
+
112
+ for (i = 0; i < len; ++i) {
113
+ sum += pow(sc_ary_entry(ary, i) - mean, 2);
114
+ }
115
+
116
+ if (NIL_P(calc_pop_var) || FALSE_P(calc_pop_var)) { /* sample variance */
117
+ return DBL2NUM(sum / (len - 1));
118
+ } else { /* population variance */
119
+ return DBL2NUM(sum / len);
120
+ }
121
+ }
122
+
123
+ /* Calculate the standard deviation of values in given array.
124
+
125
+ If pop param is set to true, the standard deviation is based on
126
+ population variance. Otherwise, sample variance is used (default).
127
+
128
+ @param ary [Array<Numeric>] an array of Numerics
129
+ @param pop [Bool] pass true to calculate population standard
130
+ deviation, default: false
131
+
132
+ @example Get sample standard deviation of array
133
+ StatC::Array.sd([-1.4, 0, 1, 2, 3.0]).round(2) #=> 1.71
134
+ @example Get population standard deviation of array
135
+ StatC::Array.sd([-1.4, 0, 1, 2, 3.0], pop=true).round(2) #=> 1.53
136
+
137
+ @raise [StatC::Error::Error] if array length is zero
138
+
139
+ @return [Numeric] standard deviation of values in the array
140
+
141
+ */
142
+ static VALUE sc_sd(int argc, VALUE* argv, VALUE obj)
77
143
  {
78
- sc_mStatC = rb_define_module("StatC");
144
+ VALUE ary, calc_pop_var;
79
145
 
146
+ /* one required and one optional argument */
147
+ rb_scan_args(argc, argv, "11", &ary, &calc_pop_var);
148
+
149
+ return DBL2NUM(sqrt(NUM2DBL(sc_var(argc, argv, obj))));
150
+ }
151
+
152
+ /* Calculate the standard deviation of values in given array.
153
+
154
+ If pop param is set to true, the standard error of the mean is based
155
+ on population variance. Otherwise, sample variance is used (default).
156
+
157
+ @param ary [Array<Numeric>] an array of Numerics
158
+ @param pop [Bool] pass true to calculate population standard
159
+ error of the mean, default: false
160
+
161
+ @example Get sample standard error of array
162
+ StatC::Array.se([-1.4, 0, 1, 2, 3.0]).round(2) #=> 0.77
163
+ @example Get population standard error of array
164
+ StatC::Array.se([-1.4, 0, 1, 2, 3.0], pop=true).round(2) #=> 0.68
165
+
166
+ @raise [StatC::Error::Error] if array length is zero
167
+
168
+ @return [Numeric] standard error of the mean for values in the array
169
+
170
+ */
171
+ static VALUE sc_se(int argc, VALUE* argv, VALUE obj)
172
+ {
173
+ VALUE ary, calc_pop_var;
174
+
175
+ /* one required and one optional argument */
176
+ rb_scan_args(argc, argv, "11", &ary, &calc_pop_var);
177
+
178
+ long double sd = NUM2DBL(sc_sd(argc, argv, obj));
179
+
180
+ size_t len = assert_array_not_empty(ary);
181
+
182
+ return DBL2NUM(sd / sqrt(len));
183
+ }
184
+
185
+ /* Document-module: StatC::Array
186
+
187
+ Statistical methods operating on the values of an array
188
+
189
+ */
190
+ static void sc_init_mArray(void)
191
+ {
80
192
  sc_mArray = rb_define_module_under(sc_mStatC, "Array");
193
+
194
+ rb_define_singleton_method(sc_mArray, "mean", sc_mean, 1);
195
+ rb_define_singleton_method(sc_mArray, "var", sc_var, -1);
196
+ rb_define_singleton_method(sc_mArray, "sd", sc_sd, -1);
197
+ rb_define_singleton_method(sc_mArray, "se", sc_se, -1);
198
+ }
199
+
200
+ /* Document-module: StatC::Error
201
+
202
+ Module containing all error classes of the StatC module.
203
+
204
+ */
205
+ static void sc_init_mError(void)
206
+ {
81
207
  sc_mError = rb_define_module_under(sc_mStatC, "Error");
208
+ }
82
209
 
83
- sc_eError = rb_define_class_under(sc_mError, "Error", rb_eStandardError);
84
210
 
85
- rb_define_singleton_method(sc_mArray, "mean", sc_mean, 1);
86
- /* rb_define_singleton_method(mArray, "var", var, 1); */
87
- /* rb_define_singleton_method(mArray, "sd", sd, -1); */
88
- /* rb_define_singleton_method(mArray, "se", se, -1); */
211
+ /* Document-class: StatC::Error::Error
212
+
213
+ Error class from which all errors raised by StatC inherit. Thus, you
214
+ can rescue from StatC::Error::Error to catch all errors specific to
215
+ StatC.
216
+
217
+ @note Inherits from StandardError
218
+
219
+ */
220
+ static void sc_init_eError(void)
221
+ {
222
+ sc_eError = rb_define_class_under(sc_mError, "Error", rb_eStandardError);
223
+ }
224
+
225
+ /* Document-module: StatC
226
+
227
+ C stats module for Ruby.
228
+
229
+ */
230
+ void Init_stat_c(void)
231
+ {
232
+ sc_mStatC = rb_define_module("StatC");
233
+
234
+ sc_init_mArray();
235
+ sc_init_mError();
236
+ sc_init_eError();
89
237
  }
@@ -1,3 +1,4 @@
1
1
  module StatC
2
- VERSION = "0.1.0"
2
+ # module version number
3
+ VERSION = "0.2.0"
3
4
  end
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["moorer@udel.edu"]
11
11
  spec.license = "GPLv3"
12
12
 
13
- spec.summary = %q{Fast statistics library. C extension.}
14
- spec.description = %q{Fast statistics library. C extension.}
13
+ spec.summary = %q{Fast, well documented C stats extension for Ruby.}
14
+ spec.description = %q{Fast, well documented C stats extension for Ruby.}
15
15
  spec.homepage = "https://github.com/mooreryan/stat_c"
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stat_c
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
@@ -114,7 +114,7 @@ dependencies:
114
114
  - - "~>"
115
115
  - !ruby/object:Gem::Version
116
116
  version: 0.8.11
117
- description: Fast statistics library. C extension.
117
+ description: Fast, well documented C stats extension for Ruby.
118
118
  email:
119
119
  - moorer@udel.edu
120
120
  executables: []
@@ -164,6 +164,6 @@ rubyforge_project:
164
164
  rubygems_version: 2.5.1
165
165
  signing_key:
166
166
  specification_version: 4
167
- summary: Fast statistics library. C extension.
167
+ summary: Fast, well documented C stats extension for Ruby.
168
168
  test_files: []
169
169
  has_rdoc: