ruby_native_statistics 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vscode/settings.json +5 -1
- data/Gemfile.lock +1 -1
- data/README.md +14 -0
- data/Rakefile +2 -6
- data/ext/ruby_native_statistics/conversions.c +45 -0
- data/ext/ruby_native_statistics/conversions.h +5 -0
- data/ext/ruby_native_statistics/dispersion.c +102 -0
- data/ext/{dispersion → ruby_native_statistics}/dispersion.h +6 -1
- data/ext/ruby_native_statistics/extconf.rb +9 -0
- data/ext/{mathematics → ruby_native_statistics}/mathematics.c +3 -46
- data/ext/{mathematics → ruby_native_statistics}/mathematics.h +6 -2
- data/ext/ruby_native_statistics/ruby_native_statistics.c +16 -0
- data/ext/ruby_native_statistics/ruby_native_statistics.h +5 -0
- data/lib/ruby_native_statistics.rb +1 -2
- data/lib/ruby_native_statistics/version.rb +1 -1
- data/ruby_native_statistics.gemspec +1 -1
- metadata +12 -10
- data/ext/dispersion/dispersion.c +0 -68
- data/ext/dispersion/extconf.rb +0 -3
- data/ext/mathematics/extconf.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99451de1aa0d6eab93118c4ce68330f21210437115e05ef3bcdff4ae63e780d7
|
4
|
+
data.tar.gz: edf6bdec21f87d70e808eaca249ddf1447b0ca57979a039e2295d679976e73e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e12d43e6f7ac1dd5b92350252e2feb83d69a07c311edd7f97040f2af471dad7763bb3515c769dab7e64b40e0d6ecabd92b3f546dea5178cc16638ce5564fbd2
|
7
|
+
data.tar.gz: cd0ef3c36d85d60505f5826d25a0189e8bcdfa0d7799b574f16d010ea9e4ba1367f66dba522e3287df5248e0dc11d5631252ad726b1050b8e730bc3fc7cbc045
|
data/.vscode/settings.json
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -8,6 +8,7 @@ This is a native extension to Ruby that adds native (C) statistical functions to
|
|
8
8
|
- [Population Variance](https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance) (varp)
|
9
9
|
- [Median](https://en.wikipedia.org/wiki/Median) (median)
|
10
10
|
- [Mean](https://en.wikipedia.org/wiki/Arithmetic_mean) (mean)
|
11
|
+
- [Percentile](https://en.wikipedia.org/wiki/Quantile) (percentile)
|
11
12
|
|
12
13
|
Check the Github Actions build to see the currently supported versions of Ruby. This list will match whatever stable versions are specified at https://www.ruby-lang.org/en/downloads/.
|
13
14
|
|
@@ -16,6 +17,10 @@ It is much more performant than calculating the standard deviation with pure Rub
|
|
16
17
|
bench_native_dispersion 0.000425 0.000341 0.000420 0.000324 0.000319
|
17
18
|
bench_ruby_dispersion 0.002168 0.002156 0.002148 0.002149 0.002151
|
18
19
|
|
20
|
+
## Found a bug? Need a function?
|
21
|
+
|
22
|
+
If you found a bug or would need a particulr function, please let me know! I work on this gem in my spare time, mainly for learning purposes. Feel free to open a PR or a Github issue and I'll take a look as soon as possible.
|
23
|
+
|
19
24
|
## Usage
|
20
25
|
|
21
26
|
require 'ruby_native_statistics'
|
@@ -33,6 +38,15 @@ It is much more performant than calculating the standard deviation with pure Rub
|
|
33
38
|
# calculate median
|
34
39
|
p r.median
|
35
40
|
|
41
|
+
# calculate percentile
|
42
|
+
p r.percentile(0.3333)
|
43
|
+
|
44
|
+
## Implementation notes
|
45
|
+
|
46
|
+
### Percentile
|
47
|
+
|
48
|
+
Percentile uses the same rounding method as Excel, sometimes called R7.
|
49
|
+
|
36
50
|
## Links
|
37
51
|
|
38
52
|
This is the third version of this gem, and it is a total rewrite of a SWIG-based design. Lots of thanks to the following resources:
|
data/Rakefile
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
require "rake/extensiontask"
|
2
2
|
require "rake/testtask"
|
3
3
|
|
4
|
-
Rake::ExtensionTask.new "
|
5
|
-
ext.lib_dir = "lib"
|
6
|
-
end
|
7
|
-
|
8
|
-
Rake::ExtensionTask.new "mathematics" do |ext|
|
9
|
-
ext.lib_dir = "lib"
|
4
|
+
Rake::ExtensionTask.new "ruby_native_statistics" do |ext|
|
5
|
+
ext.lib_dir = "lib/ruby_native_statistics"
|
10
6
|
end
|
11
7
|
|
12
8
|
Rake::TestTask.new(:test) do |t|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#include "conversions.h"
|
2
|
+
|
3
|
+
int compare_doubles(const void *a, const void *b)
|
4
|
+
{
|
5
|
+
double *dbl_a = (double *)a;
|
6
|
+
double *dbl_b = (double *)b;
|
7
|
+
|
8
|
+
double cmp_a = *dbl_a;
|
9
|
+
double cmp_b = *dbl_b;
|
10
|
+
|
11
|
+
return (cmp_a - cmp_b);
|
12
|
+
}
|
13
|
+
|
14
|
+
double *sorted_ruby_array(VALUE array, long array_length)
|
15
|
+
{
|
16
|
+
long i;
|
17
|
+
double *working_array;
|
18
|
+
|
19
|
+
working_array = malloc(array_length * sizeof(double));
|
20
|
+
|
21
|
+
if (working_array == NULL)
|
22
|
+
{
|
23
|
+
rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
|
24
|
+
}
|
25
|
+
|
26
|
+
for (i = 0; i < array_length; i++)
|
27
|
+
{
|
28
|
+
VALUE item = rb_ary_entry(array, i);
|
29
|
+
|
30
|
+
if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
|
31
|
+
{
|
32
|
+
free(working_array);
|
33
|
+
rb_raise(rb_eTypeError, "element is not a number");
|
34
|
+
}
|
35
|
+
|
36
|
+
working_array[i] = NUM2DBL(item);
|
37
|
+
}
|
38
|
+
|
39
|
+
// Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
|
40
|
+
// to a pointer to the first element in the array.
|
41
|
+
// https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
|
42
|
+
qsort(working_array, array_length, sizeof(double), compare_doubles);
|
43
|
+
|
44
|
+
return working_array;
|
45
|
+
}
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#include "dispersion.h"
|
2
|
+
|
3
|
+
VALUE rb_sample_standard_deviation(VALUE self)
|
4
|
+
{
|
5
|
+
unsigned int array_length;
|
6
|
+
|
7
|
+
Check_Type(self, T_ARRAY);
|
8
|
+
|
9
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
10
|
+
|
11
|
+
if (array_length <= 1)
|
12
|
+
{
|
13
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
14
|
+
}
|
15
|
+
|
16
|
+
return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length) / (array_length - 1))));
|
17
|
+
}
|
18
|
+
|
19
|
+
VALUE rb_sample_variance(VALUE self)
|
20
|
+
{
|
21
|
+
unsigned int array_length;
|
22
|
+
|
23
|
+
Check_Type(self, T_ARRAY);
|
24
|
+
|
25
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
26
|
+
|
27
|
+
if (array_length <= 1)
|
28
|
+
{
|
29
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
30
|
+
}
|
31
|
+
|
32
|
+
return DBL2NUM((calculate_total_distance_from_mean(self, array_length) / (array_length - 1)));
|
33
|
+
}
|
34
|
+
|
35
|
+
VALUE rb_population_standard_deviation(VALUE self)
|
36
|
+
{
|
37
|
+
unsigned int array_length;
|
38
|
+
|
39
|
+
Check_Type(self, T_ARRAY);
|
40
|
+
|
41
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
42
|
+
|
43
|
+
if (array_length <= 1)
|
44
|
+
{
|
45
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
46
|
+
}
|
47
|
+
|
48
|
+
return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
|
49
|
+
}
|
50
|
+
|
51
|
+
VALUE rb_population_variance(VALUE self)
|
52
|
+
{
|
53
|
+
unsigned int array_length;
|
54
|
+
|
55
|
+
Check_Type(self, T_ARRAY);
|
56
|
+
|
57
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
58
|
+
|
59
|
+
if (array_length <= 1)
|
60
|
+
{
|
61
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
62
|
+
}
|
63
|
+
|
64
|
+
return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE rb_percentile(VALUE self, VALUE r_percentile)
|
68
|
+
{
|
69
|
+
double result;
|
70
|
+
Check_Type(self, T_ARRAY);
|
71
|
+
|
72
|
+
long array_length = rb_array_len(self);
|
73
|
+
double percentile = NUM2DBL(r_percentile);
|
74
|
+
|
75
|
+
if (array_length == 0)
|
76
|
+
{
|
77
|
+
rb_raise(rb_eRangeError, "array must have at least one element");
|
78
|
+
}
|
79
|
+
|
80
|
+
if (percentile < 0 || percentile > 1)
|
81
|
+
{
|
82
|
+
rb_raise(rb_eRangeError, "percentile must be between 0 and 1 inclusive");
|
83
|
+
}
|
84
|
+
|
85
|
+
double *sorted_array = sorted_ruby_array(self, array_length);
|
86
|
+
|
87
|
+
double h = (array_length - 1) * percentile + 1;
|
88
|
+
|
89
|
+
if (trunc(h) == h)
|
90
|
+
{
|
91
|
+
result = sorted_array[(long)h - 1];
|
92
|
+
}
|
93
|
+
else
|
94
|
+
{
|
95
|
+
long h_floor = (long)trunc(h);
|
96
|
+
result = (h - h_floor) * (sorted_array[h_floor] - sorted_array[h_floor - 1]) + sorted_array[h_floor - 1];
|
97
|
+
}
|
98
|
+
|
99
|
+
free(sorted_array);
|
100
|
+
|
101
|
+
return DBL2NUM(result);
|
102
|
+
}
|
@@ -1,5 +1,10 @@
|
|
1
|
-
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
#include "conversions.h"
|
4
|
+
#include "mathematics.h"
|
5
|
+
|
2
6
|
VALUE rb_sample_standard_deviation(VALUE self);
|
3
7
|
VALUE rb_population_standard_deviation(VALUE self);
|
4
8
|
VALUE rb_sample_variance(VALUE self);
|
5
9
|
VALUE rb_population_variance(VALUE self);
|
10
|
+
VALUE rb_percentile(VALUE self, VALUE percentile);
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
|
3
|
+
abort "missing pow()" unless have_func "pow"
|
4
|
+
abort "missing sqrt()" unless have_func "sqrt"
|
5
|
+
abort "missing malloc()" unless have_func "malloc"
|
6
|
+
abort "missing free()" unless have_func "free"
|
7
|
+
abort "missing trunc()" unless have_func "trunc"
|
8
|
+
|
9
|
+
create_makefile "ruby_native_statistics/ruby_native_statistics"
|
@@ -1,13 +1,5 @@
|
|
1
|
-
#include "stdbool.h"
|
2
|
-
#include "ruby.h"
|
3
|
-
#include "mathematics.h"
|
4
1
|
|
5
|
-
|
6
|
-
{
|
7
|
-
MathematicsModule = rb_define_module("Mathematics");
|
8
|
-
rb_define_method(MathematicsModule, "mean", rb_mean, 0);
|
9
|
-
rb_define_method(MathematicsModule, "median", rb_median, 0);
|
10
|
-
}
|
2
|
+
#include "mathematics.h"
|
11
3
|
|
12
4
|
double calculate_mean(VALUE array, unsigned long array_length)
|
13
5
|
{
|
@@ -57,22 +49,10 @@ VALUE rb_mean(VALUE self)
|
|
57
49
|
return DBL2NUM(calculate_mean(self, array_length));
|
58
50
|
}
|
59
51
|
|
60
|
-
int compare_doubles(const void *a, const void *b)
|
61
|
-
{
|
62
|
-
double *dbl_a = (double *)a;
|
63
|
-
double *dbl_b = (double *)b;
|
64
|
-
|
65
|
-
double cmp_a = *dbl_a;
|
66
|
-
double cmp_b = *dbl_b;
|
67
|
-
|
68
|
-
return (cmp_a - cmp_b);
|
69
|
-
}
|
70
|
-
|
71
52
|
VALUE rb_median(VALUE self)
|
72
53
|
{
|
73
54
|
unsigned long array_length;
|
74
|
-
|
75
|
-
double *working_array;
|
55
|
+
|
76
56
|
VALUE result;
|
77
57
|
|
78
58
|
Check_Type(self, T_ARRAY);
|
@@ -87,30 +67,7 @@ VALUE rb_median(VALUE self)
|
|
87
67
|
bool array_even_size = (array_length % 2) == 0;
|
88
68
|
unsigned long middle = (long)floor(array_length / 2.0);
|
89
69
|
|
90
|
-
working_array =
|
91
|
-
|
92
|
-
if (working_array == NULL)
|
93
|
-
{
|
94
|
-
rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
|
95
|
-
}
|
96
|
-
|
97
|
-
for (i = 0; i < array_length; i++)
|
98
|
-
{
|
99
|
-
VALUE item = rb_ary_entry(self, i);
|
100
|
-
|
101
|
-
if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
|
102
|
-
{
|
103
|
-
free(working_array);
|
104
|
-
rb_raise(rb_eTypeError, "element is not a number");
|
105
|
-
}
|
106
|
-
|
107
|
-
working_array[i] = NUM2DBL(item);
|
108
|
-
}
|
109
|
-
|
110
|
-
// Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
|
111
|
-
// to a pointer to the first element in the array.
|
112
|
-
// https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
|
113
|
-
qsort(working_array, array_length, sizeof(double), compare_doubles);
|
70
|
+
double *working_array = sorted_ruby_array(self, array_length);
|
114
71
|
|
115
72
|
if (!array_even_size)
|
116
73
|
{
|
@@ -1,5 +1,9 @@
|
|
1
|
-
|
1
|
+
#include <stdbool.h>
|
2
|
+
#include <ruby.h>
|
3
|
+
#include "conversions.h"
|
4
|
+
|
2
5
|
VALUE rb_mean(VALUE self);
|
3
6
|
VALUE rb_median(VALUE self);
|
7
|
+
|
4
8
|
double calculate_mean(VALUE array, unsigned long array_length);
|
5
|
-
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
|
9
|
+
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#include "ruby_native_statistics.h"
|
2
|
+
|
3
|
+
void Init_ruby_native_statistics()
|
4
|
+
{
|
5
|
+
DispersionModule = rb_define_module("Dispersion");
|
6
|
+
rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
|
7
|
+
rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
|
8
|
+
rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
|
9
|
+
rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
|
10
|
+
rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
|
11
|
+
rb_define_method(DispersionModule, "percentile", rb_percentile, 1);
|
12
|
+
|
13
|
+
MathematicsModule = rb_define_module("Mathematics");
|
14
|
+
rb_define_method(MathematicsModule, "mean", rb_mean, 0);
|
15
|
+
rb_define_method(MathematicsModule, "median", rb_median, 0);
|
16
|
+
}
|
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.extensions = %w[ext/
|
24
|
+
spec.extensions = %w[ext/ruby_native_statistics/extconf.rb]
|
25
25
|
end
|
metadata
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_native_statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cory Buecker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
15
15
|
- cory.buecker@gmail.com
|
16
16
|
executables: []
|
17
17
|
extensions:
|
18
|
-
- ext/
|
19
|
-
- ext/dispersion/extconf.rb
|
18
|
+
- ext/ruby_native_statistics/extconf.rb
|
20
19
|
extra_rdoc_files: []
|
21
20
|
files:
|
22
21
|
- ".github/workflows/main.yml"
|
@@ -29,12 +28,15 @@ files:
|
|
29
28
|
- README.md
|
30
29
|
- Rakefile
|
31
30
|
- changelog.md
|
32
|
-
- ext/
|
33
|
-
- ext/
|
34
|
-
- ext/dispersion
|
35
|
-
- ext/
|
36
|
-
- ext/
|
37
|
-
- ext/
|
31
|
+
- ext/ruby_native_statistics/conversions.c
|
32
|
+
- ext/ruby_native_statistics/conversions.h
|
33
|
+
- ext/ruby_native_statistics/dispersion.c
|
34
|
+
- ext/ruby_native_statistics/dispersion.h
|
35
|
+
- ext/ruby_native_statistics/extconf.rb
|
36
|
+
- ext/ruby_native_statistics/mathematics.c
|
37
|
+
- ext/ruby_native_statistics/mathematics.h
|
38
|
+
- ext/ruby_native_statistics/ruby_native_statistics.c
|
39
|
+
- ext/ruby_native_statistics/ruby_native_statistics.h
|
38
40
|
- lib/ruby_native_statistics.rb
|
39
41
|
- lib/ruby_native_statistics/version.rb
|
40
42
|
- ruby_native_statistics.gemspec
|
data/ext/dispersion/dispersion.c
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
#include "ruby.h"
|
2
|
-
#include "dispersion.h"
|
3
|
-
#include "../mathematics/mathematics.h"
|
4
|
-
|
5
|
-
void Init_dispersion() {
|
6
|
-
DispersionModule = rb_define_module("Dispersion");
|
7
|
-
rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
|
8
|
-
rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
|
9
|
-
rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
|
10
|
-
rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
|
11
|
-
rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
|
12
|
-
}
|
13
|
-
|
14
|
-
VALUE rb_sample_standard_deviation(VALUE self) {
|
15
|
-
unsigned int array_length;
|
16
|
-
|
17
|
-
Check_Type(self, T_ARRAY);
|
18
|
-
|
19
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
20
|
-
|
21
|
-
if (array_length <= 1) {
|
22
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
23
|
-
}
|
24
|
-
|
25
|
-
return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length)/(array_length - 1))));
|
26
|
-
}
|
27
|
-
|
28
|
-
VALUE rb_sample_variance(VALUE self) {
|
29
|
-
unsigned int array_length;
|
30
|
-
|
31
|
-
Check_Type(self, T_ARRAY);
|
32
|
-
|
33
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
34
|
-
|
35
|
-
if (array_length <= 1) {
|
36
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
37
|
-
}
|
38
|
-
|
39
|
-
return DBL2NUM((calculate_total_distance_from_mean(self, array_length)/(array_length - 1)));
|
40
|
-
}
|
41
|
-
|
42
|
-
VALUE rb_population_standard_deviation(VALUE self) {
|
43
|
-
unsigned int array_length;
|
44
|
-
|
45
|
-
Check_Type(self, T_ARRAY);
|
46
|
-
|
47
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
48
|
-
|
49
|
-
if (array_length <= 1) {
|
50
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
51
|
-
}
|
52
|
-
|
53
|
-
return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
|
54
|
-
}
|
55
|
-
|
56
|
-
VALUE rb_population_variance(VALUE self) {
|
57
|
-
unsigned int array_length;
|
58
|
-
|
59
|
-
Check_Type(self, T_ARRAY);
|
60
|
-
|
61
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
62
|
-
|
63
|
-
if (array_length <= 1) {
|
64
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
65
|
-
}
|
66
|
-
|
67
|
-
return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
|
68
|
-
}
|
data/ext/dispersion/extconf.rb
DELETED