ruby_native_statistics 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +5 -1
- data/Gemfile.lock +1 -1
- data/README.md +14 -0
- data/Rakefile +2 -6
- data/ext/ruby_native_statistics/conversions.c +45 -0
- data/ext/ruby_native_statistics/conversions.h +5 -0
- data/ext/ruby_native_statistics/dispersion.c +102 -0
- data/ext/{dispersion → ruby_native_statistics}/dispersion.h +6 -1
- data/ext/ruby_native_statistics/extconf.rb +9 -0
- data/ext/{mathematics → ruby_native_statistics}/mathematics.c +3 -46
- data/ext/{mathematics → ruby_native_statistics}/mathematics.h +6 -2
- data/ext/ruby_native_statistics/ruby_native_statistics.c +16 -0
- data/ext/ruby_native_statistics/ruby_native_statistics.h +5 -0
- data/lib/ruby_native_statistics.rb +1 -2
- data/lib/ruby_native_statistics/version.rb +1 -1
- data/ruby_native_statistics.gemspec +1 -1
- metadata +12 -10
- data/ext/dispersion/dispersion.c +0 -68
- data/ext/dispersion/extconf.rb +0 -3
- data/ext/mathematics/extconf.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99451de1aa0d6eab93118c4ce68330f21210437115e05ef3bcdff4ae63e780d7
|
4
|
+
data.tar.gz: edf6bdec21f87d70e808eaca249ddf1447b0ca57979a039e2295d679976e73e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e12d43e6f7ac1dd5b92350252e2feb83d69a07c311edd7f97040f2af471dad7763bb3515c769dab7e64b40e0d6ecabd92b3f546dea5178cc16638ce5564fbd2
|
7
|
+
data.tar.gz: cd0ef3c36d85d60505f5826d25a0189e8bcdfa0d7799b574f16d010ea9e4ba1367f66dba522e3287df5248e0dc11d5631252ad726b1050b8e730bc3fc7cbc045
|
data/.vscode/settings.json
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -8,6 +8,7 @@ This is a native extension to Ruby that adds native (C) statistical functions to
|
|
8
8
|
- [Population Variance](https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance) (varp)
|
9
9
|
- [Median](https://en.wikipedia.org/wiki/Median) (median)
|
10
10
|
- [Mean](https://en.wikipedia.org/wiki/Arithmetic_mean) (mean)
|
11
|
+
- [Percentile](https://en.wikipedia.org/wiki/Quantile) (percentile)
|
11
12
|
|
12
13
|
Check the Github Actions build to see the currently supported versions of Ruby. This list will match whatever stable versions are specified at https://www.ruby-lang.org/en/downloads/.
|
13
14
|
|
@@ -16,6 +17,10 @@ It is much more performant than calculating the standard deviation with pure Rub
|
|
16
17
|
bench_native_dispersion 0.000425 0.000341 0.000420 0.000324 0.000319
|
17
18
|
bench_ruby_dispersion 0.002168 0.002156 0.002148 0.002149 0.002151
|
18
19
|
|
20
|
+
## Found a bug? Need a function?
|
21
|
+
|
22
|
+
If you found a bug or would need a particulr function, please let me know! I work on this gem in my spare time, mainly for learning purposes. Feel free to open a PR or a Github issue and I'll take a look as soon as possible.
|
23
|
+
|
19
24
|
## Usage
|
20
25
|
|
21
26
|
require 'ruby_native_statistics'
|
@@ -33,6 +38,15 @@ It is much more performant than calculating the standard deviation with pure Rub
|
|
33
38
|
# calculate median
|
34
39
|
p r.median
|
35
40
|
|
41
|
+
# calculate percentile
|
42
|
+
p r.percentile(0.3333)
|
43
|
+
|
44
|
+
## Implementation notes
|
45
|
+
|
46
|
+
### Percentile
|
47
|
+
|
48
|
+
Percentile uses the same rounding method as Excel, sometimes called R7.
|
49
|
+
|
36
50
|
## Links
|
37
51
|
|
38
52
|
This is the third version of this gem, and it is a total rewrite of a SWIG-based design. Lots of thanks to the following resources:
|
data/Rakefile
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
require "rake/extensiontask"
|
2
2
|
require "rake/testtask"
|
3
3
|
|
4
|
-
Rake::ExtensionTask.new "
|
5
|
-
ext.lib_dir = "lib"
|
6
|
-
end
|
7
|
-
|
8
|
-
Rake::ExtensionTask.new "mathematics" do |ext|
|
9
|
-
ext.lib_dir = "lib"
|
4
|
+
Rake::ExtensionTask.new "ruby_native_statistics" do |ext|
|
5
|
+
ext.lib_dir = "lib/ruby_native_statistics"
|
10
6
|
end
|
11
7
|
|
12
8
|
Rake::TestTask.new(:test) do |t|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#include "conversions.h"
|
2
|
+
|
3
|
+
int compare_doubles(const void *a, const void *b)
|
4
|
+
{
|
5
|
+
double *dbl_a = (double *)a;
|
6
|
+
double *dbl_b = (double *)b;
|
7
|
+
|
8
|
+
double cmp_a = *dbl_a;
|
9
|
+
double cmp_b = *dbl_b;
|
10
|
+
|
11
|
+
return (cmp_a - cmp_b);
|
12
|
+
}
|
13
|
+
|
14
|
+
double *sorted_ruby_array(VALUE array, long array_length)
|
15
|
+
{
|
16
|
+
long i;
|
17
|
+
double *working_array;
|
18
|
+
|
19
|
+
working_array = malloc(array_length * sizeof(double));
|
20
|
+
|
21
|
+
if (working_array == NULL)
|
22
|
+
{
|
23
|
+
rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
|
24
|
+
}
|
25
|
+
|
26
|
+
for (i = 0; i < array_length; i++)
|
27
|
+
{
|
28
|
+
VALUE item = rb_ary_entry(array, i);
|
29
|
+
|
30
|
+
if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
|
31
|
+
{
|
32
|
+
free(working_array);
|
33
|
+
rb_raise(rb_eTypeError, "element is not a number");
|
34
|
+
}
|
35
|
+
|
36
|
+
working_array[i] = NUM2DBL(item);
|
37
|
+
}
|
38
|
+
|
39
|
+
// Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
|
40
|
+
// to a pointer to the first element in the array.
|
41
|
+
// https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
|
42
|
+
qsort(working_array, array_length, sizeof(double), compare_doubles);
|
43
|
+
|
44
|
+
return working_array;
|
45
|
+
}
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#include "dispersion.h"
|
2
|
+
|
3
|
+
VALUE rb_sample_standard_deviation(VALUE self)
|
4
|
+
{
|
5
|
+
unsigned int array_length;
|
6
|
+
|
7
|
+
Check_Type(self, T_ARRAY);
|
8
|
+
|
9
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
10
|
+
|
11
|
+
if (array_length <= 1)
|
12
|
+
{
|
13
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
14
|
+
}
|
15
|
+
|
16
|
+
return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length) / (array_length - 1))));
|
17
|
+
}
|
18
|
+
|
19
|
+
VALUE rb_sample_variance(VALUE self)
|
20
|
+
{
|
21
|
+
unsigned int array_length;
|
22
|
+
|
23
|
+
Check_Type(self, T_ARRAY);
|
24
|
+
|
25
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
26
|
+
|
27
|
+
if (array_length <= 1)
|
28
|
+
{
|
29
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
30
|
+
}
|
31
|
+
|
32
|
+
return DBL2NUM((calculate_total_distance_from_mean(self, array_length) / (array_length - 1)));
|
33
|
+
}
|
34
|
+
|
35
|
+
VALUE rb_population_standard_deviation(VALUE self)
|
36
|
+
{
|
37
|
+
unsigned int array_length;
|
38
|
+
|
39
|
+
Check_Type(self, T_ARRAY);
|
40
|
+
|
41
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
42
|
+
|
43
|
+
if (array_length <= 1)
|
44
|
+
{
|
45
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
46
|
+
}
|
47
|
+
|
48
|
+
return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
|
49
|
+
}
|
50
|
+
|
51
|
+
VALUE rb_population_variance(VALUE self)
|
52
|
+
{
|
53
|
+
unsigned int array_length;
|
54
|
+
|
55
|
+
Check_Type(self, T_ARRAY);
|
56
|
+
|
57
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
58
|
+
|
59
|
+
if (array_length <= 1)
|
60
|
+
{
|
61
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
62
|
+
}
|
63
|
+
|
64
|
+
return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE rb_percentile(VALUE self, VALUE r_percentile)
|
68
|
+
{
|
69
|
+
double result;
|
70
|
+
Check_Type(self, T_ARRAY);
|
71
|
+
|
72
|
+
long array_length = rb_array_len(self);
|
73
|
+
double percentile = NUM2DBL(r_percentile);
|
74
|
+
|
75
|
+
if (array_length == 0)
|
76
|
+
{
|
77
|
+
rb_raise(rb_eRangeError, "array must have at least one element");
|
78
|
+
}
|
79
|
+
|
80
|
+
if (percentile < 0 || percentile > 1)
|
81
|
+
{
|
82
|
+
rb_raise(rb_eRangeError, "percentile must be between 0 and 1 inclusive");
|
83
|
+
}
|
84
|
+
|
85
|
+
double *sorted_array = sorted_ruby_array(self, array_length);
|
86
|
+
|
87
|
+
double h = (array_length - 1) * percentile + 1;
|
88
|
+
|
89
|
+
if (trunc(h) == h)
|
90
|
+
{
|
91
|
+
result = sorted_array[(long)h - 1];
|
92
|
+
}
|
93
|
+
else
|
94
|
+
{
|
95
|
+
long h_floor = (long)trunc(h);
|
96
|
+
result = (h - h_floor) * (sorted_array[h_floor] - sorted_array[h_floor - 1]) + sorted_array[h_floor - 1];
|
97
|
+
}
|
98
|
+
|
99
|
+
free(sorted_array);
|
100
|
+
|
101
|
+
return DBL2NUM(result);
|
102
|
+
}
|
@@ -1,5 +1,10 @@
|
|
1
|
-
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
#include "conversions.h"
|
4
|
+
#include "mathematics.h"
|
5
|
+
|
2
6
|
VALUE rb_sample_standard_deviation(VALUE self);
|
3
7
|
VALUE rb_population_standard_deviation(VALUE self);
|
4
8
|
VALUE rb_sample_variance(VALUE self);
|
5
9
|
VALUE rb_population_variance(VALUE self);
|
10
|
+
VALUE rb_percentile(VALUE self, VALUE percentile);
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
|
3
|
+
abort "missing pow()" unless have_func "pow"
|
4
|
+
abort "missing sqrt()" unless have_func "sqrt"
|
5
|
+
abort "missing malloc()" unless have_func "malloc"
|
6
|
+
abort "missing free()" unless have_func "free"
|
7
|
+
abort "missing trunc()" unless have_func "trunc"
|
8
|
+
|
9
|
+
create_makefile "ruby_native_statistics/ruby_native_statistics"
|
@@ -1,13 +1,5 @@
|
|
1
|
-
#include "stdbool.h"
|
2
|
-
#include "ruby.h"
|
3
|
-
#include "mathematics.h"
|
4
1
|
|
5
|
-
|
6
|
-
{
|
7
|
-
MathematicsModule = rb_define_module("Mathematics");
|
8
|
-
rb_define_method(MathematicsModule, "mean", rb_mean, 0);
|
9
|
-
rb_define_method(MathematicsModule, "median", rb_median, 0);
|
10
|
-
}
|
2
|
+
#include "mathematics.h"
|
11
3
|
|
12
4
|
double calculate_mean(VALUE array, unsigned long array_length)
|
13
5
|
{
|
@@ -57,22 +49,10 @@ VALUE rb_mean(VALUE self)
|
|
57
49
|
return DBL2NUM(calculate_mean(self, array_length));
|
58
50
|
}
|
59
51
|
|
60
|
-
int compare_doubles(const void *a, const void *b)
|
61
|
-
{
|
62
|
-
double *dbl_a = (double *)a;
|
63
|
-
double *dbl_b = (double *)b;
|
64
|
-
|
65
|
-
double cmp_a = *dbl_a;
|
66
|
-
double cmp_b = *dbl_b;
|
67
|
-
|
68
|
-
return (cmp_a - cmp_b);
|
69
|
-
}
|
70
|
-
|
71
52
|
VALUE rb_median(VALUE self)
|
72
53
|
{
|
73
54
|
unsigned long array_length;
|
74
|
-
|
75
|
-
double *working_array;
|
55
|
+
|
76
56
|
VALUE result;
|
77
57
|
|
78
58
|
Check_Type(self, T_ARRAY);
|
@@ -87,30 +67,7 @@ VALUE rb_median(VALUE self)
|
|
87
67
|
bool array_even_size = (array_length % 2) == 0;
|
88
68
|
unsigned long middle = (long)floor(array_length / 2.0);
|
89
69
|
|
90
|
-
working_array =
|
91
|
-
|
92
|
-
if (working_array == NULL)
|
93
|
-
{
|
94
|
-
rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
|
95
|
-
}
|
96
|
-
|
97
|
-
for (i = 0; i < array_length; i++)
|
98
|
-
{
|
99
|
-
VALUE item = rb_ary_entry(self, i);
|
100
|
-
|
101
|
-
if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
|
102
|
-
{
|
103
|
-
free(working_array);
|
104
|
-
rb_raise(rb_eTypeError, "element is not a number");
|
105
|
-
}
|
106
|
-
|
107
|
-
working_array[i] = NUM2DBL(item);
|
108
|
-
}
|
109
|
-
|
110
|
-
// Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
|
111
|
-
// to a pointer to the first element in the array.
|
112
|
-
// https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
|
113
|
-
qsort(working_array, array_length, sizeof(double), compare_doubles);
|
70
|
+
double *working_array = sorted_ruby_array(self, array_length);
|
114
71
|
|
115
72
|
if (!array_even_size)
|
116
73
|
{
|
@@ -1,5 +1,9 @@
|
|
1
|
-
|
1
|
+
#include <stdbool.h>
|
2
|
+
#include <ruby.h>
|
3
|
+
#include "conversions.h"
|
4
|
+
|
2
5
|
VALUE rb_mean(VALUE self);
|
3
6
|
VALUE rb_median(VALUE self);
|
7
|
+
|
4
8
|
double calculate_mean(VALUE array, unsigned long array_length);
|
5
|
-
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
|
9
|
+
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#include "ruby_native_statistics.h"
|
2
|
+
|
3
|
+
void Init_ruby_native_statistics()
|
4
|
+
{
|
5
|
+
DispersionModule = rb_define_module("Dispersion");
|
6
|
+
rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
|
7
|
+
rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
|
8
|
+
rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
|
9
|
+
rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
|
10
|
+
rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
|
11
|
+
rb_define_method(DispersionModule, "percentile", rb_percentile, 1);
|
12
|
+
|
13
|
+
MathematicsModule = rb_define_module("Mathematics");
|
14
|
+
rb_define_method(MathematicsModule, "mean", rb_mean, 0);
|
15
|
+
rb_define_method(MathematicsModule, "median", rb_median, 0);
|
16
|
+
}
|
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.extensions = %w[ext/
|
24
|
+
spec.extensions = %w[ext/ruby_native_statistics/extconf.rb]
|
25
25
|
end
|
metadata
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_native_statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cory Buecker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
15
15
|
- cory.buecker@gmail.com
|
16
16
|
executables: []
|
17
17
|
extensions:
|
18
|
-
- ext/
|
19
|
-
- ext/dispersion/extconf.rb
|
18
|
+
- ext/ruby_native_statistics/extconf.rb
|
20
19
|
extra_rdoc_files: []
|
21
20
|
files:
|
22
21
|
- ".github/workflows/main.yml"
|
@@ -29,12 +28,15 @@ files:
|
|
29
28
|
- README.md
|
30
29
|
- Rakefile
|
31
30
|
- changelog.md
|
32
|
-
- ext/
|
33
|
-
- ext/
|
34
|
-
- ext/dispersion
|
35
|
-
- ext/
|
36
|
-
- ext/
|
37
|
-
- ext/
|
31
|
+
- ext/ruby_native_statistics/conversions.c
|
32
|
+
- ext/ruby_native_statistics/conversions.h
|
33
|
+
- ext/ruby_native_statistics/dispersion.c
|
34
|
+
- ext/ruby_native_statistics/dispersion.h
|
35
|
+
- ext/ruby_native_statistics/extconf.rb
|
36
|
+
- ext/ruby_native_statistics/mathematics.c
|
37
|
+
- ext/ruby_native_statistics/mathematics.h
|
38
|
+
- ext/ruby_native_statistics/ruby_native_statistics.c
|
39
|
+
- ext/ruby_native_statistics/ruby_native_statistics.h
|
38
40
|
- lib/ruby_native_statistics.rb
|
39
41
|
- lib/ruby_native_statistics/version.rb
|
40
42
|
- ruby_native_statistics.gemspec
|
data/ext/dispersion/dispersion.c
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
#include "ruby.h"
|
2
|
-
#include "dispersion.h"
|
3
|
-
#include "../mathematics/mathematics.h"
|
4
|
-
|
5
|
-
void Init_dispersion() {
|
6
|
-
DispersionModule = rb_define_module("Dispersion");
|
7
|
-
rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
|
8
|
-
rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
|
9
|
-
rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
|
10
|
-
rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
|
11
|
-
rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
|
12
|
-
}
|
13
|
-
|
14
|
-
VALUE rb_sample_standard_deviation(VALUE self) {
|
15
|
-
unsigned int array_length;
|
16
|
-
|
17
|
-
Check_Type(self, T_ARRAY);
|
18
|
-
|
19
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
20
|
-
|
21
|
-
if (array_length <= 1) {
|
22
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
23
|
-
}
|
24
|
-
|
25
|
-
return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length)/(array_length - 1))));
|
26
|
-
}
|
27
|
-
|
28
|
-
VALUE rb_sample_variance(VALUE self) {
|
29
|
-
unsigned int array_length;
|
30
|
-
|
31
|
-
Check_Type(self, T_ARRAY);
|
32
|
-
|
33
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
34
|
-
|
35
|
-
if (array_length <= 1) {
|
36
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
37
|
-
}
|
38
|
-
|
39
|
-
return DBL2NUM((calculate_total_distance_from_mean(self, array_length)/(array_length - 1)));
|
40
|
-
}
|
41
|
-
|
42
|
-
VALUE rb_population_standard_deviation(VALUE self) {
|
43
|
-
unsigned int array_length;
|
44
|
-
|
45
|
-
Check_Type(self, T_ARRAY);
|
46
|
-
|
47
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
48
|
-
|
49
|
-
if (array_length <= 1) {
|
50
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
51
|
-
}
|
52
|
-
|
53
|
-
return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
|
54
|
-
}
|
55
|
-
|
56
|
-
VALUE rb_population_variance(VALUE self) {
|
57
|
-
unsigned int array_length;
|
58
|
-
|
59
|
-
Check_Type(self, T_ARRAY);
|
60
|
-
|
61
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
62
|
-
|
63
|
-
if (array_length <= 1) {
|
64
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
65
|
-
}
|
66
|
-
|
67
|
-
return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
|
68
|
-
}
|
data/ext/dispersion/extconf.rb
DELETED