ruby_native_statistics 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b99d6ace1e36cdd53315356b8c4de080289a46bc307b41bc1979de4cbaff2e42
4
- data.tar.gz: 5b8c3498b2a3014313b0b249eb87350e7f07e5a22237de4ea9e95f67096572c7
3
+ metadata.gz: 99451de1aa0d6eab93118c4ce68330f21210437115e05ef3bcdff4ae63e780d7
4
+ data.tar.gz: edf6bdec21f87d70e808eaca249ddf1447b0ca57979a039e2295d679976e73e0
5
5
  SHA512:
6
- metadata.gz: c837f3f6f7e2a8d47291d1e289d09e48de81c5216bca319e1be24aca7a821f0ba8e1acd3548ccfbe83a50b92752af5dc281a9a69dc052e58cee1f5e49b05e06d
7
- data.tar.gz: 3c8e3629ffe57bccdbdeaa8b792d593aa89bdce401e034a2a0d08fdf62fb1ad0841229cc60c222aaa37fe0895c2fdee83109176ec553e0c1529c33814868fa4e
6
+ metadata.gz: 6e12d43e6f7ac1dd5b92350252e2feb83d69a07c311edd7f97040f2af471dad7763bb3515c769dab7e64b40e0d6ecabd92b3f546dea5178cc16638ce5564fbd2
7
+ data.tar.gz: cd0ef3c36d85d60505f5826d25a0189e8bcdfa0d7799b574f16d010ea9e4ba1367f66dba522e3287df5248e0dc11d5631252ad726b1050b8e730bc3fc7cbc045
@@ -3,5 +3,9 @@
3
3
  "editor.tabSize": 2,
4
4
  "search.useGlobalIgnoreFiles": true,
5
5
  "editor.wordWrap": "on",
6
- "editor.renderWhitespace": "all"
6
+ "editor.renderWhitespace": "all",
7
+ "files.associations": {
8
+ "conversions.h": "c",
9
+ "algorithm": "c"
10
+ }
7
11
  }
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby_native_statistics (0.8)
4
+ ruby_native_statistics (0.10.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -8,6 +8,7 @@ This is a native extension to Ruby that adds native (C) statistical functions to
8
8
  - [Population Variance](https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance) (varp)
9
9
  - [Median](https://en.wikipedia.org/wiki/Median) (median)
10
10
  - [Mean](https://en.wikipedia.org/wiki/Arithmetic_mean) (mean)
11
+ - [Percentile](https://en.wikipedia.org/wiki/Quantile) (percentile)
11
12
 
12
13
  Check the Github Actions build to see the currently supported versions of Ruby. This list will match whatever stable versions are specified at https://www.ruby-lang.org/en/downloads/.
13
14
 
@@ -16,6 +17,10 @@ It is much more performant than calculating the standard deviation with pure Rub
16
17
  bench_native_dispersion 0.000425 0.000341 0.000420 0.000324 0.000319
17
18
  bench_ruby_dispersion 0.002168 0.002156 0.002148 0.002149 0.002151
18
19
 
20
+ ## Found a bug? Need a function?
21
+
22
+ If you found a bug or would need a particulr function, please let me know! I work on this gem in my spare time, mainly for learning purposes. Feel free to open a PR or a Github issue and I'll take a look as soon as possible.
23
+
19
24
  ## Usage
20
25
 
21
26
  require 'ruby_native_statistics'
@@ -33,6 +38,15 @@ It is much more performant than calculating the standard deviation with pure Rub
33
38
  # calculate median
34
39
  p r.median
35
40
 
41
+ # calculate percentile
42
+ p r.percentile(0.3333)
43
+
44
+ ## Implementation notes
45
+
46
+ ### Percentile
47
+
48
+ Percentile uses the same rounding method as Excel, sometimes called R7.
49
+
36
50
  ## Links
37
51
 
38
52
  This is the third version of this gem, and it is a total rewrite of a SWIG-based design. Lots of thanks to the following resources:
data/Rakefile CHANGED
@@ -1,12 +1,8 @@
1
1
  require "rake/extensiontask"
2
2
  require "rake/testtask"
3
3
 
4
- Rake::ExtensionTask.new "dispersion" do |ext|
5
- ext.lib_dir = "lib"
6
- end
7
-
8
- Rake::ExtensionTask.new "mathematics" do |ext|
9
- ext.lib_dir = "lib"
4
+ Rake::ExtensionTask.new "ruby_native_statistics" do |ext|
5
+ ext.lib_dir = "lib/ruby_native_statistics"
10
6
  end
11
7
 
12
8
  Rake::TestTask.new(:test) do |t|
@@ -0,0 +1,45 @@
1
+ #include "conversions.h"
2
+
3
+ int compare_doubles(const void *a, const void *b)
4
+ {
5
+ double *dbl_a = (double *)a;
6
+ double *dbl_b = (double *)b;
7
+
8
+ double cmp_a = *dbl_a;
9
+ double cmp_b = *dbl_b;
10
+
11
+ return (cmp_a - cmp_b);
12
+ }
13
+
14
+ double *sorted_ruby_array(VALUE array, long array_length)
15
+ {
16
+ long i;
17
+ double *working_array;
18
+
19
+ working_array = malloc(array_length * sizeof(double));
20
+
21
+ if (working_array == NULL)
22
+ {
23
+ rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
24
+ }
25
+
26
+ for (i = 0; i < array_length; i++)
27
+ {
28
+ VALUE item = rb_ary_entry(array, i);
29
+
30
+ if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
31
+ {
32
+ free(working_array);
33
+ rb_raise(rb_eTypeError, "element is not a number");
34
+ }
35
+
36
+ working_array[i] = NUM2DBL(item);
37
+ }
38
+
39
+ // Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
40
+ // to a pointer to the first element in the array.
41
+ // https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
42
+ qsort(working_array, array_length, sizeof(double), compare_doubles);
43
+
44
+ return working_array;
45
+ }
@@ -0,0 +1,5 @@
1
+ #include <stdbool.h>
2
+ #include <ruby.h>
3
+
4
+ int compare_doubles(const void *a, const void *b);
5
+ double *sorted_ruby_array(VALUE array, long array_length);
@@ -0,0 +1,102 @@
1
+ #include "dispersion.h"
2
+
3
+ VALUE rb_sample_standard_deviation(VALUE self)
4
+ {
5
+ unsigned int array_length;
6
+
7
+ Check_Type(self, T_ARRAY);
8
+
9
+ array_length = rb_long2int(RARRAY_LEN(self));
10
+
11
+ if (array_length <= 1)
12
+ {
13
+ rb_raise(rb_eRangeError, "array must have more than one element");
14
+ }
15
+
16
+ return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length) / (array_length - 1))));
17
+ }
18
+
19
+ VALUE rb_sample_variance(VALUE self)
20
+ {
21
+ unsigned int array_length;
22
+
23
+ Check_Type(self, T_ARRAY);
24
+
25
+ array_length = rb_long2int(RARRAY_LEN(self));
26
+
27
+ if (array_length <= 1)
28
+ {
29
+ rb_raise(rb_eRangeError, "array must have more than one element");
30
+ }
31
+
32
+ return DBL2NUM((calculate_total_distance_from_mean(self, array_length) / (array_length - 1)));
33
+ }
34
+
35
+ VALUE rb_population_standard_deviation(VALUE self)
36
+ {
37
+ unsigned int array_length;
38
+
39
+ Check_Type(self, T_ARRAY);
40
+
41
+ array_length = rb_long2int(RARRAY_LEN(self));
42
+
43
+ if (array_length <= 1)
44
+ {
45
+ rb_raise(rb_eRangeError, "array must have more than one element");
46
+ }
47
+
48
+ return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
49
+ }
50
+
51
+ VALUE rb_population_variance(VALUE self)
52
+ {
53
+ unsigned int array_length;
54
+
55
+ Check_Type(self, T_ARRAY);
56
+
57
+ array_length = rb_long2int(RARRAY_LEN(self));
58
+
59
+ if (array_length <= 1)
60
+ {
61
+ rb_raise(rb_eRangeError, "array must have more than one element");
62
+ }
63
+
64
+ return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
65
+ }
66
+
67
+ VALUE rb_percentile(VALUE self, VALUE r_percentile)
68
+ {
69
+ double result;
70
+ Check_Type(self, T_ARRAY);
71
+
72
+ long array_length = rb_array_len(self);
73
+ double percentile = NUM2DBL(r_percentile);
74
+
75
+ if (array_length == 0)
76
+ {
77
+ rb_raise(rb_eRangeError, "array must have at least one element");
78
+ }
79
+
80
+ if (percentile < 0 || percentile > 1)
81
+ {
82
+ rb_raise(rb_eRangeError, "percentile must be between 0 and 1 inclusive");
83
+ }
84
+
85
+ double *sorted_array = sorted_ruby_array(self, array_length);
86
+
87
+ double h = (array_length - 1) * percentile + 1;
88
+
89
+ if (trunc(h) == h)
90
+ {
91
+ result = sorted_array[(long)h - 1];
92
+ }
93
+ else
94
+ {
95
+ long h_floor = (long)trunc(h);
96
+ result = (h - h_floor) * (sorted_array[h_floor] - sorted_array[h_floor - 1]) + sorted_array[h_floor - 1];
97
+ }
98
+
99
+ free(sorted_array);
100
+
101
+ return DBL2NUM(result);
102
+ }
@@ -1,5 +1,10 @@
1
- VALUE DispersionModule = Qnil;
1
+ #include <ruby.h>
2
+ #include <math.h>
3
+ #include "conversions.h"
4
+ #include "mathematics.h"
5
+
2
6
  VALUE rb_sample_standard_deviation(VALUE self);
3
7
  VALUE rb_population_standard_deviation(VALUE self);
4
8
  VALUE rb_sample_variance(VALUE self);
5
9
  VALUE rb_population_variance(VALUE self);
10
+ VALUE rb_percentile(VALUE self, VALUE percentile);
@@ -0,0 +1,9 @@
1
+ require "mkmf"
2
+
3
+ abort "missing pow()" unless have_func "pow"
4
+ abort "missing sqrt()" unless have_func "sqrt"
5
+ abort "missing malloc()" unless have_func "malloc"
6
+ abort "missing free()" unless have_func "free"
7
+ abort "missing trunc()" unless have_func "trunc"
8
+
9
+ create_makefile "ruby_native_statistics/ruby_native_statistics"
@@ -1,13 +1,5 @@
1
- #include "stdbool.h"
2
- #include "ruby.h"
3
- #include "mathematics.h"
4
1
 
5
- void Init_mathematics()
6
- {
7
- MathematicsModule = rb_define_module("Mathematics");
8
- rb_define_method(MathematicsModule, "mean", rb_mean, 0);
9
- rb_define_method(MathematicsModule, "median", rb_median, 0);
10
- }
2
+ #include "mathematics.h"
11
3
 
12
4
  double calculate_mean(VALUE array, unsigned long array_length)
13
5
  {
@@ -57,22 +49,10 @@ VALUE rb_mean(VALUE self)
57
49
  return DBL2NUM(calculate_mean(self, array_length));
58
50
  }
59
51
 
60
- int compare_doubles(const void *a, const void *b)
61
- {
62
- double *dbl_a = (double *)a;
63
- double *dbl_b = (double *)b;
64
-
65
- double cmp_a = *dbl_a;
66
- double cmp_b = *dbl_b;
67
-
68
- return (cmp_a - cmp_b);
69
- }
70
-
71
52
  VALUE rb_median(VALUE self)
72
53
  {
73
54
  unsigned long array_length;
74
- unsigned long i;
75
- double *working_array;
55
+
76
56
  VALUE result;
77
57
 
78
58
  Check_Type(self, T_ARRAY);
@@ -87,30 +67,7 @@ VALUE rb_median(VALUE self)
87
67
  bool array_even_size = (array_length % 2) == 0;
88
68
  unsigned long middle = (long)floor(array_length / 2.0);
89
69
 
90
- working_array = malloc(array_length * sizeof(double));
91
-
92
- if (working_array == NULL)
93
- {
94
- rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
95
- }
96
-
97
- for (i = 0; i < array_length; i++)
98
- {
99
- VALUE item = rb_ary_entry(self, i);
100
-
101
- if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
102
- {
103
- free(working_array);
104
- rb_raise(rb_eTypeError, "element is not a number");
105
- }
106
-
107
- working_array[i] = NUM2DBL(item);
108
- }
109
-
110
- // Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
111
- // to a pointer to the first element in the array.
112
- // https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
113
- qsort(working_array, array_length, sizeof(double), compare_doubles);
70
+ double *working_array = sorted_ruby_array(self, array_length);
114
71
 
115
72
  if (!array_even_size)
116
73
  {
@@ -1,5 +1,9 @@
1
- VALUE MathematicsModule = Qnil;
1
+ #include <stdbool.h>
2
+ #include <ruby.h>
3
+ #include "conversions.h"
4
+
2
5
  VALUE rb_mean(VALUE self);
3
6
  VALUE rb_median(VALUE self);
7
+
4
8
  double calculate_mean(VALUE array, unsigned long array_length);
5
- double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
9
+ double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
@@ -0,0 +1,16 @@
1
+ #include "ruby_native_statistics.h"
2
+
3
+ void Init_ruby_native_statistics()
4
+ {
5
+ DispersionModule = rb_define_module("Dispersion");
6
+ rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
7
+ rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
8
+ rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
9
+ rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
10
+ rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
11
+ rb_define_method(DispersionModule, "percentile", rb_percentile, 1);
12
+
13
+ MathematicsModule = rb_define_module("Mathematics");
14
+ rb_define_method(MathematicsModule, "mean", rb_mean, 0);
15
+ rb_define_method(MathematicsModule, "median", rb_median, 0);
16
+ }
@@ -0,0 +1,5 @@
1
+ #include "dispersion.h"
2
+ #include "mathematics.h"
3
+
4
+ VALUE MathematicsModule = Qnil;
5
+ VALUE DispersionModule = Qnil;
@@ -1,6 +1,5 @@
1
1
  require "ruby_native_statistics/version"
2
- require "mathematics"
3
- require "dispersion"
2
+ require "ruby_native_statistics/ruby_native_statistics"
4
3
 
5
4
  class Array
6
5
  include Mathematics
@@ -1,3 +1,3 @@
1
1
  module RubyNativeStatistics
2
- VERSION = "0.9.0"
2
+ VERSION = "0.10.0"
3
3
  end
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
21
21
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.extensions = %w[ext/mathematics/extconf.rb ext/dispersion/extconf.rb]
24
+ spec.extensions = %w[ext/ruby_native_statistics/extconf.rb]
25
25
  end
metadata CHANGED
@@ -1,22 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_native_statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cory Buecker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-12 00:00:00.000000000 Z
11
+ date: 2020-01-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
15
15
  - cory.buecker@gmail.com
16
16
  executables: []
17
17
  extensions:
18
- - ext/mathematics/extconf.rb
19
- - ext/dispersion/extconf.rb
18
+ - ext/ruby_native_statistics/extconf.rb
20
19
  extra_rdoc_files: []
21
20
  files:
22
21
  - ".github/workflows/main.yml"
@@ -29,12 +28,15 @@ files:
29
28
  - README.md
30
29
  - Rakefile
31
30
  - changelog.md
32
- - ext/dispersion/dispersion.c
33
- - ext/dispersion/dispersion.h
34
- - ext/dispersion/extconf.rb
35
- - ext/mathematics/extconf.rb
36
- - ext/mathematics/mathematics.c
37
- - ext/mathematics/mathematics.h
31
+ - ext/ruby_native_statistics/conversions.c
32
+ - ext/ruby_native_statistics/conversions.h
33
+ - ext/ruby_native_statistics/dispersion.c
34
+ - ext/ruby_native_statistics/dispersion.h
35
+ - ext/ruby_native_statistics/extconf.rb
36
+ - ext/ruby_native_statistics/mathematics.c
37
+ - ext/ruby_native_statistics/mathematics.h
38
+ - ext/ruby_native_statistics/ruby_native_statistics.c
39
+ - ext/ruby_native_statistics/ruby_native_statistics.h
38
40
  - lib/ruby_native_statistics.rb
39
41
  - lib/ruby_native_statistics/version.rb
40
42
  - ruby_native_statistics.gemspec
@@ -1,68 +0,0 @@
1
- #include "ruby.h"
2
- #include "dispersion.h"
3
- #include "../mathematics/mathematics.h"
4
-
5
- void Init_dispersion() {
6
- DispersionModule = rb_define_module("Dispersion");
7
- rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
8
- rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
9
- rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
10
- rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
11
- rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
12
- }
13
-
14
- VALUE rb_sample_standard_deviation(VALUE self) {
15
- unsigned int array_length;
16
-
17
- Check_Type(self, T_ARRAY);
18
-
19
- array_length = rb_long2int(RARRAY_LEN(self));
20
-
21
- if (array_length <= 1) {
22
- rb_raise(rb_eRangeError, "array must have more than one element");
23
- }
24
-
25
- return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length)/(array_length - 1))));
26
- }
27
-
28
- VALUE rb_sample_variance(VALUE self) {
29
- unsigned int array_length;
30
-
31
- Check_Type(self, T_ARRAY);
32
-
33
- array_length = rb_long2int(RARRAY_LEN(self));
34
-
35
- if (array_length <= 1) {
36
- rb_raise(rb_eRangeError, "array must have more than one element");
37
- }
38
-
39
- return DBL2NUM((calculate_total_distance_from_mean(self, array_length)/(array_length - 1)));
40
- }
41
-
42
- VALUE rb_population_standard_deviation(VALUE self) {
43
- unsigned int array_length;
44
-
45
- Check_Type(self, T_ARRAY);
46
-
47
- array_length = rb_long2int(RARRAY_LEN(self));
48
-
49
- if (array_length <= 1) {
50
- rb_raise(rb_eRangeError, "array must have more than one element");
51
- }
52
-
53
- return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
54
- }
55
-
56
- VALUE rb_population_variance(VALUE self) {
57
- unsigned int array_length;
58
-
59
- Check_Type(self, T_ARRAY);
60
-
61
- array_length = rb_long2int(RARRAY_LEN(self));
62
-
63
- if (array_length <= 1) {
64
- rb_raise(rb_eRangeError, "array must have more than one element");
65
- }
66
-
67
- return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
68
- }
@@ -1,3 +0,0 @@
1
- require "mkmf"
2
-
3
- create_makefile "dispersion"
@@ -1,5 +0,0 @@
1
- require "mkmf"
2
-
3
- abort "missing pow()" unless have_func "pow"
4
-
5
- create_makefile "mathematics"