ruby_native_statistics 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b99d6ace1e36cdd53315356b8c4de080289a46bc307b41bc1979de4cbaff2e42
4
- data.tar.gz: 5b8c3498b2a3014313b0b249eb87350e7f07e5a22237de4ea9e95f67096572c7
3
+ metadata.gz: 99451de1aa0d6eab93118c4ce68330f21210437115e05ef3bcdff4ae63e780d7
4
+ data.tar.gz: edf6bdec21f87d70e808eaca249ddf1447b0ca57979a039e2295d679976e73e0
5
5
  SHA512:
6
- metadata.gz: c837f3f6f7e2a8d47291d1e289d09e48de81c5216bca319e1be24aca7a821f0ba8e1acd3548ccfbe83a50b92752af5dc281a9a69dc052e58cee1f5e49b05e06d
7
- data.tar.gz: 3c8e3629ffe57bccdbdeaa8b792d593aa89bdce401e034a2a0d08fdf62fb1ad0841229cc60c222aaa37fe0895c2fdee83109176ec553e0c1529c33814868fa4e
6
+ metadata.gz: 6e12d43e6f7ac1dd5b92350252e2feb83d69a07c311edd7f97040f2af471dad7763bb3515c769dab7e64b40e0d6ecabd92b3f546dea5178cc16638ce5564fbd2
7
+ data.tar.gz: cd0ef3c36d85d60505f5826d25a0189e8bcdfa0d7799b574f16d010ea9e4ba1367f66dba522e3287df5248e0dc11d5631252ad726b1050b8e730bc3fc7cbc045
@@ -3,5 +3,9 @@
3
3
  "editor.tabSize": 2,
4
4
  "search.useGlobalIgnoreFiles": true,
5
5
  "editor.wordWrap": "on",
6
- "editor.renderWhitespace": "all"
6
+ "editor.renderWhitespace": "all",
7
+ "files.associations": {
8
+ "conversions.h": "c",
9
+ "algorithm": "c"
10
+ }
7
11
  }
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby_native_statistics (0.8)
4
+ ruby_native_statistics (0.10.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -8,6 +8,7 @@ This is a native extension to Ruby that adds native (C) statistical functions to
8
8
  - [Population Variance](https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance) (varp)
9
9
  - [Median](https://en.wikipedia.org/wiki/Median) (median)
10
10
  - [Mean](https://en.wikipedia.org/wiki/Arithmetic_mean) (mean)
11
+ - [Percentile](https://en.wikipedia.org/wiki/Quantile) (percentile)
11
12
 
12
13
  Check the Github Actions build to see the currently supported versions of Ruby. This list will match whatever stable versions are specified at https://www.ruby-lang.org/en/downloads/.
13
14
 
@@ -16,6 +17,10 @@ It is much more performant than calculating the standard deviation with pure Rub
16
17
  bench_native_dispersion 0.000425 0.000341 0.000420 0.000324 0.000319
17
18
  bench_ruby_dispersion 0.002168 0.002156 0.002148 0.002149 0.002151
18
19
 
20
+ ## Found a bug? Need a function?
21
+
22
+ If you found a bug or would need a particulr function, please let me know! I work on this gem in my spare time, mainly for learning purposes. Feel free to open a PR or a Github issue and I'll take a look as soon as possible.
23
+
19
24
  ## Usage
20
25
 
21
26
  require 'ruby_native_statistics'
@@ -33,6 +38,15 @@ It is much more performant than calculating the standard deviation with pure Rub
33
38
  # calculate median
34
39
  p r.median
35
40
 
41
+ # calculate percentile
42
+ p r.percentile(0.3333)
43
+
44
+ ## Implementation notes
45
+
46
+ ### Percentile
47
+
48
+ Percentile uses the same rounding method as Excel, sometimes called R7.
49
+
36
50
  ## Links
37
51
 
38
52
  This is the third version of this gem, and it is a total rewrite of a SWIG-based design. Lots of thanks to the following resources:
data/Rakefile CHANGED
@@ -1,12 +1,8 @@
1
1
  require "rake/extensiontask"
2
2
  require "rake/testtask"
3
3
 
4
- Rake::ExtensionTask.new "dispersion" do |ext|
5
- ext.lib_dir = "lib"
6
- end
7
-
8
- Rake::ExtensionTask.new "mathematics" do |ext|
9
- ext.lib_dir = "lib"
4
+ Rake::ExtensionTask.new "ruby_native_statistics" do |ext|
5
+ ext.lib_dir = "lib/ruby_native_statistics"
10
6
  end
11
7
 
12
8
  Rake::TestTask.new(:test) do |t|
@@ -0,0 +1,45 @@
1
+ #include "conversions.h"
2
+
3
+ int compare_doubles(const void *a, const void *b)
4
+ {
5
+ double *dbl_a = (double *)a;
6
+ double *dbl_b = (double *)b;
7
+
8
+ double cmp_a = *dbl_a;
9
+ double cmp_b = *dbl_b;
10
+
11
+ return (cmp_a - cmp_b);
12
+ }
13
+
14
+ double *sorted_ruby_array(VALUE array, long array_length)
15
+ {
16
+ long i;
17
+ double *working_array;
18
+
19
+ working_array = malloc(array_length * sizeof(double));
20
+
21
+ if (working_array == NULL)
22
+ {
23
+ rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
24
+ }
25
+
26
+ for (i = 0; i < array_length; i++)
27
+ {
28
+ VALUE item = rb_ary_entry(array, i);
29
+
30
+ if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
31
+ {
32
+ free(working_array);
33
+ rb_raise(rb_eTypeError, "element is not a number");
34
+ }
35
+
36
+ working_array[i] = NUM2DBL(item);
37
+ }
38
+
39
+ // Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
40
+ // to a pointer to the first element in the array.
41
+ // https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
42
+ qsort(working_array, array_length, sizeof(double), compare_doubles);
43
+
44
+ return working_array;
45
+ }
@@ -0,0 +1,5 @@
1
+ #include <stdbool.h>
2
+ #include <ruby.h>
3
+
4
+ int compare_doubles(const void *a, const void *b);
5
+ double *sorted_ruby_array(VALUE array, long array_length);
@@ -0,0 +1,102 @@
1
+ #include "dispersion.h"
2
+
3
+ VALUE rb_sample_standard_deviation(VALUE self)
4
+ {
5
+ unsigned int array_length;
6
+
7
+ Check_Type(self, T_ARRAY);
8
+
9
+ array_length = rb_long2int(RARRAY_LEN(self));
10
+
11
+ if (array_length <= 1)
12
+ {
13
+ rb_raise(rb_eRangeError, "array must have more than one element");
14
+ }
15
+
16
+ return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length) / (array_length - 1))));
17
+ }
18
+
19
+ VALUE rb_sample_variance(VALUE self)
20
+ {
21
+ unsigned int array_length;
22
+
23
+ Check_Type(self, T_ARRAY);
24
+
25
+ array_length = rb_long2int(RARRAY_LEN(self));
26
+
27
+ if (array_length <= 1)
28
+ {
29
+ rb_raise(rb_eRangeError, "array must have more than one element");
30
+ }
31
+
32
+ return DBL2NUM((calculate_total_distance_from_mean(self, array_length) / (array_length - 1)));
33
+ }
34
+
35
+ VALUE rb_population_standard_deviation(VALUE self)
36
+ {
37
+ unsigned int array_length;
38
+
39
+ Check_Type(self, T_ARRAY);
40
+
41
+ array_length = rb_long2int(RARRAY_LEN(self));
42
+
43
+ if (array_length <= 1)
44
+ {
45
+ rb_raise(rb_eRangeError, "array must have more than one element");
46
+ }
47
+
48
+ return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
49
+ }
50
+
51
+ VALUE rb_population_variance(VALUE self)
52
+ {
53
+ unsigned int array_length;
54
+
55
+ Check_Type(self, T_ARRAY);
56
+
57
+ array_length = rb_long2int(RARRAY_LEN(self));
58
+
59
+ if (array_length <= 1)
60
+ {
61
+ rb_raise(rb_eRangeError, "array must have more than one element");
62
+ }
63
+
64
+ return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
65
+ }
66
+
67
+ VALUE rb_percentile(VALUE self, VALUE r_percentile)
68
+ {
69
+ double result;
70
+ Check_Type(self, T_ARRAY);
71
+
72
+ long array_length = rb_array_len(self);
73
+ double percentile = NUM2DBL(r_percentile);
74
+
75
+ if (array_length == 0)
76
+ {
77
+ rb_raise(rb_eRangeError, "array must have at least one element");
78
+ }
79
+
80
+ if (percentile < 0 || percentile > 1)
81
+ {
82
+ rb_raise(rb_eRangeError, "percentile must be between 0 and 1 inclusive");
83
+ }
84
+
85
+ double *sorted_array = sorted_ruby_array(self, array_length);
86
+
87
+ double h = (array_length - 1) * percentile + 1;
88
+
89
+ if (trunc(h) == h)
90
+ {
91
+ result = sorted_array[(long)h - 1];
92
+ }
93
+ else
94
+ {
95
+ long h_floor = (long)trunc(h);
96
+ result = (h - h_floor) * (sorted_array[h_floor] - sorted_array[h_floor - 1]) + sorted_array[h_floor - 1];
97
+ }
98
+
99
+ free(sorted_array);
100
+
101
+ return DBL2NUM(result);
102
+ }
@@ -1,5 +1,10 @@
1
- VALUE DispersionModule = Qnil;
1
+ #include <ruby.h>
2
+ #include <math.h>
3
+ #include "conversions.h"
4
+ #include "mathematics.h"
5
+
2
6
  VALUE rb_sample_standard_deviation(VALUE self);
3
7
  VALUE rb_population_standard_deviation(VALUE self);
4
8
  VALUE rb_sample_variance(VALUE self);
5
9
  VALUE rb_population_variance(VALUE self);
10
+ VALUE rb_percentile(VALUE self, VALUE percentile);
@@ -0,0 +1,9 @@
1
+ require "mkmf"
2
+
3
+ abort "missing pow()" unless have_func "pow"
4
+ abort "missing sqrt()" unless have_func "sqrt"
5
+ abort "missing malloc()" unless have_func "malloc"
6
+ abort "missing free()" unless have_func "free"
7
+ abort "missing trunc()" unless have_func "trunc"
8
+
9
+ create_makefile "ruby_native_statistics/ruby_native_statistics"
@@ -1,13 +1,5 @@
1
- #include "stdbool.h"
2
- #include "ruby.h"
3
- #include "mathematics.h"
4
1
 
5
- void Init_mathematics()
6
- {
7
- MathematicsModule = rb_define_module("Mathematics");
8
- rb_define_method(MathematicsModule, "mean", rb_mean, 0);
9
- rb_define_method(MathematicsModule, "median", rb_median, 0);
10
- }
2
+ #include "mathematics.h"
11
3
 
12
4
  double calculate_mean(VALUE array, unsigned long array_length)
13
5
  {
@@ -57,22 +49,10 @@ VALUE rb_mean(VALUE self)
57
49
  return DBL2NUM(calculate_mean(self, array_length));
58
50
  }
59
51
 
60
- int compare_doubles(const void *a, const void *b)
61
- {
62
- double *dbl_a = (double *)a;
63
- double *dbl_b = (double *)b;
64
-
65
- double cmp_a = *dbl_a;
66
- double cmp_b = *dbl_b;
67
-
68
- return (cmp_a - cmp_b);
69
- }
70
-
71
52
  VALUE rb_median(VALUE self)
72
53
  {
73
54
  unsigned long array_length;
74
- unsigned long i;
75
- double *working_array;
55
+
76
56
  VALUE result;
77
57
 
78
58
  Check_Type(self, T_ARRAY);
@@ -87,30 +67,7 @@ VALUE rb_median(VALUE self)
87
67
  bool array_even_size = (array_length % 2) == 0;
88
68
  unsigned long middle = (long)floor(array_length / 2.0);
89
69
 
90
- working_array = malloc(array_length * sizeof(double));
91
-
92
- if (working_array == NULL)
93
- {
94
- rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
95
- }
96
-
97
- for (i = 0; i < array_length; i++)
98
- {
99
- VALUE item = rb_ary_entry(self, i);
100
-
101
- if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
102
- {
103
- free(working_array);
104
- rb_raise(rb_eTypeError, "element is not a number");
105
- }
106
-
107
- working_array[i] = NUM2DBL(item);
108
- }
109
-
110
- // Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
111
- // to a pointer to the first element in the array.
112
- // https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
113
- qsort(working_array, array_length, sizeof(double), compare_doubles);
70
+ double *working_array = sorted_ruby_array(self, array_length);
114
71
 
115
72
  if (!array_even_size)
116
73
  {
@@ -1,5 +1,9 @@
1
- VALUE MathematicsModule = Qnil;
1
+ #include <stdbool.h>
2
+ #include <ruby.h>
3
+ #include "conversions.h"
4
+
2
5
  VALUE rb_mean(VALUE self);
3
6
  VALUE rb_median(VALUE self);
7
+
4
8
  double calculate_mean(VALUE array, unsigned long array_length);
5
- double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
9
+ double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
@@ -0,0 +1,16 @@
1
+ #include "ruby_native_statistics.h"
2
+
3
+ void Init_ruby_native_statistics()
4
+ {
5
+ DispersionModule = rb_define_module("Dispersion");
6
+ rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
7
+ rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
8
+ rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
9
+ rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
10
+ rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
11
+ rb_define_method(DispersionModule, "percentile", rb_percentile, 1);
12
+
13
+ MathematicsModule = rb_define_module("Mathematics");
14
+ rb_define_method(MathematicsModule, "mean", rb_mean, 0);
15
+ rb_define_method(MathematicsModule, "median", rb_median, 0);
16
+ }
@@ -0,0 +1,5 @@
1
+ #include "dispersion.h"
2
+ #include "mathematics.h"
3
+
4
+ VALUE MathematicsModule = Qnil;
5
+ VALUE DispersionModule = Qnil;
@@ -1,6 +1,5 @@
1
1
  require "ruby_native_statistics/version"
2
- require "mathematics"
3
- require "dispersion"
2
+ require "ruby_native_statistics/ruby_native_statistics"
4
3
 
5
4
  class Array
6
5
  include Mathematics
@@ -1,3 +1,3 @@
1
1
  module RubyNativeStatistics
2
- VERSION = "0.9.0"
2
+ VERSION = "0.10.0"
3
3
  end
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
21
21
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.extensions = %w[ext/mathematics/extconf.rb ext/dispersion/extconf.rb]
24
+ spec.extensions = %w[ext/ruby_native_statistics/extconf.rb]
25
25
  end
metadata CHANGED
@@ -1,22 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_native_statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cory Buecker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-12 00:00:00.000000000 Z
11
+ date: 2020-01-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
15
15
  - cory.buecker@gmail.com
16
16
  executables: []
17
17
  extensions:
18
- - ext/mathematics/extconf.rb
19
- - ext/dispersion/extconf.rb
18
+ - ext/ruby_native_statistics/extconf.rb
20
19
  extra_rdoc_files: []
21
20
  files:
22
21
  - ".github/workflows/main.yml"
@@ -29,12 +28,15 @@ files:
29
28
  - README.md
30
29
  - Rakefile
31
30
  - changelog.md
32
- - ext/dispersion/dispersion.c
33
- - ext/dispersion/dispersion.h
34
- - ext/dispersion/extconf.rb
35
- - ext/mathematics/extconf.rb
36
- - ext/mathematics/mathematics.c
37
- - ext/mathematics/mathematics.h
31
+ - ext/ruby_native_statistics/conversions.c
32
+ - ext/ruby_native_statistics/conversions.h
33
+ - ext/ruby_native_statistics/dispersion.c
34
+ - ext/ruby_native_statistics/dispersion.h
35
+ - ext/ruby_native_statistics/extconf.rb
36
+ - ext/ruby_native_statistics/mathematics.c
37
+ - ext/ruby_native_statistics/mathematics.h
38
+ - ext/ruby_native_statistics/ruby_native_statistics.c
39
+ - ext/ruby_native_statistics/ruby_native_statistics.h
38
40
  - lib/ruby_native_statistics.rb
39
41
  - lib/ruby_native_statistics/version.rb
40
42
  - ruby_native_statistics.gemspec
@@ -1,68 +0,0 @@
1
- #include "ruby.h"
2
- #include "dispersion.h"
3
- #include "../mathematics/mathematics.h"
4
-
5
- void Init_dispersion() {
6
- DispersionModule = rb_define_module("Dispersion");
7
- rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
8
- rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
9
- rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
10
- rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
11
- rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
12
- }
13
-
14
- VALUE rb_sample_standard_deviation(VALUE self) {
15
- unsigned int array_length;
16
-
17
- Check_Type(self, T_ARRAY);
18
-
19
- array_length = rb_long2int(RARRAY_LEN(self));
20
-
21
- if (array_length <= 1) {
22
- rb_raise(rb_eRangeError, "array must have more than one element");
23
- }
24
-
25
- return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length)/(array_length - 1))));
26
- }
27
-
28
- VALUE rb_sample_variance(VALUE self) {
29
- unsigned int array_length;
30
-
31
- Check_Type(self, T_ARRAY);
32
-
33
- array_length = rb_long2int(RARRAY_LEN(self));
34
-
35
- if (array_length <= 1) {
36
- rb_raise(rb_eRangeError, "array must have more than one element");
37
- }
38
-
39
- return DBL2NUM((calculate_total_distance_from_mean(self, array_length)/(array_length - 1)));
40
- }
41
-
42
- VALUE rb_population_standard_deviation(VALUE self) {
43
- unsigned int array_length;
44
-
45
- Check_Type(self, T_ARRAY);
46
-
47
- array_length = rb_long2int(RARRAY_LEN(self));
48
-
49
- if (array_length <= 1) {
50
- rb_raise(rb_eRangeError, "array must have more than one element");
51
- }
52
-
53
- return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
54
- }
55
-
56
- VALUE rb_population_variance(VALUE self) {
57
- unsigned int array_length;
58
-
59
- Check_Type(self, T_ARRAY);
60
-
61
- array_length = rb_long2int(RARRAY_LEN(self));
62
-
63
- if (array_length <= 1) {
64
- rb_raise(rb_eRangeError, "array must have more than one element");
65
- }
66
-
67
- return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
68
- }
@@ -1,3 +0,0 @@
1
- require "mkmf"
2
-
3
- create_makefile "dispersion"
@@ -1,5 +0,0 @@
1
- require "mkmf"
2
-
3
- abort "missing pow()" unless have_func "pow"
4
-
5
- create_makefile "mathematics"