ruby_native_statistics 0.8.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +31 -0
- data/.github/workflows/main.yml +10 -22
- data/.vscode/c_cpp_properties.json +21 -0
- data/.vscode/settings.json +5 -1
- data/Gemfile +5 -4
- data/Gemfile.lock +25 -16
- data/README.md +37 -5
- data/Rakefile +4 -8
- data/{LICENSE → UNLICENSE} +0 -0
- data/changelog.md +9 -0
- data/ext/ruby_native_statistics/conversions.c +45 -0
- data/ext/ruby_native_statistics/conversions.h +5 -0
- data/ext/ruby_native_statistics/dispersion.c +102 -0
- data/ext/{dispersion → ruby_native_statistics}/dispersion.h +6 -1
- data/ext/ruby_native_statistics/extconf.rb +9 -0
- data/ext/ruby_native_statistics/mathematics.c +84 -0
- data/ext/{mathematics → ruby_native_statistics}/mathematics.h +7 -2
- data/ext/ruby_native_statistics/ruby_native_statistics.c +16 -0
- data/ext/ruby_native_statistics/ruby_native_statistics.h +5 -0
- data/lib/ruby_native_statistics/version.rb +1 -1
- data/lib/ruby_native_statistics.rb +1 -2
- data/ruby_native_statistics.gemspec +3 -3
- metadata +18 -15
- data/ext/dispersion/dispersion.c +0 -68
- data/ext/dispersion/extconf.rb +0 -3
- data/ext/mathematics/extconf.rb +0 -5
- data/ext/mathematics/mathematics.c +0 -50
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 73a2fd8450ce06d2b1d1292bfb91e41e6d764651b850895a9b85fd1d9c2a9596
|
|
4
|
+
data.tar.gz: 0ca7e20b6b91e1eacef10a844a07819fb5562fdc7e44c0ecbf268ff5134b69b5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a04f40565730406b34a3591d734e9d33286d41abc19ff49736d715f91b916995e9cdc9b1d6014c8b7745fe44aad1701bdd5dba49aee0e0ab3f39f6fab85a430b
|
|
7
|
+
data.tar.gz: b0cc9f0fff6e26fbbfab5b27d69cfe7b7c19465944a1e0be14455380a312d536ff388246fe1766734bd1b210420acc04f58a71e07448275c6cbf5f19a1e9c969
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Create a report to help us improve
|
|
4
|
+
title: ''
|
|
5
|
+
labels: ''
|
|
6
|
+
assignees: ''
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
**Describe the bug**
|
|
11
|
+
A clear and concise description of what the bug is.
|
|
12
|
+
|
|
13
|
+
**To Reproduce**
|
|
14
|
+
Steps to reproduce the behavior:
|
|
15
|
+
1. Go to '...'
|
|
16
|
+
2. Click on '....'
|
|
17
|
+
3. Scroll down to '....'
|
|
18
|
+
4. See error
|
|
19
|
+
|
|
20
|
+
**Expected behavior**
|
|
21
|
+
A clear and concise description of what you expected to happen.
|
|
22
|
+
|
|
23
|
+
**Screenshots**
|
|
24
|
+
If applicable, add screenshots to help explain your problem.
|
|
25
|
+
|
|
26
|
+
**Desktop (please complete the following information):**
|
|
27
|
+
- OS: [e.g. iOS]
|
|
28
|
+
- Version [e.g. 22]
|
|
29
|
+
|
|
30
|
+
**Additional context**
|
|
31
|
+
Add any other context about the problem here.
|
data/.github/workflows/main.yml
CHANGED
|
@@ -13,29 +13,17 @@ jobs:
|
|
|
13
13
|
strategy:
|
|
14
14
|
matrix:
|
|
15
15
|
ruby:
|
|
16
|
-
- 2.
|
|
17
|
-
- 2.5
|
|
18
|
-
-
|
|
19
|
-
-
|
|
16
|
+
- 2.6.9
|
|
17
|
+
- 2.7.5
|
|
18
|
+
- 3.0.3
|
|
19
|
+
- 3.1.0
|
|
20
20
|
steps:
|
|
21
|
-
-
|
|
22
|
-
|
|
21
|
+
- uses: actions/checkout@v2
|
|
22
|
+
|
|
23
|
+
- uses: ruby/setup-ruby@v1
|
|
23
24
|
with:
|
|
24
25
|
ruby-version: ${{ matrix.ruby }}
|
|
25
26
|
|
|
26
|
-
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
- name: Install Bundler
|
|
30
|
-
run: gem install bundler -v 2.1.2
|
|
31
|
-
|
|
32
|
-
- name: Install gems
|
|
33
|
-
run: bundle
|
|
34
|
-
|
|
35
|
-
- name: Run tests
|
|
36
|
-
run: rake
|
|
37
|
-
|
|
38
|
-
- name: Coveralls
|
|
39
|
-
uses: coverallsapp/github-action@master
|
|
40
|
-
with:
|
|
41
|
-
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
27
|
+
- run: gem install bundler --version 2.3.4 --no-document
|
|
28
|
+
- run: bundle
|
|
29
|
+
- run: rake
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"configurations": [
|
|
3
|
+
{
|
|
4
|
+
"name": "Mac",
|
|
5
|
+
"includePath": [
|
|
6
|
+
"${workspaceFolder}/**",
|
|
7
|
+
"/Users/corybuecker/.asdf/installs/ruby/2.7.0/include/ruby-2.7.0",
|
|
8
|
+
"/Users/corybuecker/.asdf/installs/ruby/2.7.0/include/ruby-2.7.0/x86_64-darwin19"
|
|
9
|
+
],
|
|
10
|
+
"defines": [],
|
|
11
|
+
"macFrameworkPath": [
|
|
12
|
+
"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks"
|
|
13
|
+
],
|
|
14
|
+
"compilerPath": "/usr/bin/clang",
|
|
15
|
+
"cStandard": "c11",
|
|
16
|
+
"cppStandard": "c++17",
|
|
17
|
+
"intelliSenseMode": "clang-x64"
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"version": 4
|
|
21
|
+
}
|
data/.vscode/settings.json
CHANGED
data/Gemfile
CHANGED
|
@@ -2,8 +2,9 @@ source "https://rubygems.org"
|
|
|
2
2
|
|
|
3
3
|
gemspec
|
|
4
4
|
|
|
5
|
-
gem "rake", "~>
|
|
6
|
-
gem "minitest", "~> 5.
|
|
5
|
+
gem "rake", "~> 13.0"
|
|
6
|
+
gem "minitest", "~> 5.15"
|
|
7
|
+
gem "minitest-reporters", "~> 1.4"
|
|
7
8
|
gem "rake-compiler", "~> 1.1"
|
|
8
|
-
gem "simplecov", "~> 0.
|
|
9
|
-
gem "simplecov-lcov", "~> 0.
|
|
9
|
+
gem "simplecov", "~> 0.21", require: false
|
|
10
|
+
gem "simplecov-lcov", "~> 0.8", require: false
|
data/Gemfile.lock
CHANGED
|
@@ -1,34 +1,43 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
ruby_native_statistics (0.
|
|
4
|
+
ruby_native_statistics (1.0.0)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
8
8
|
specs:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
ansi (1.5.0)
|
|
10
|
+
builder (3.2.4)
|
|
11
|
+
docile (1.4.0)
|
|
12
|
+
minitest (5.15.0)
|
|
13
|
+
minitest-reporters (1.4.3)
|
|
14
|
+
ansi
|
|
15
|
+
builder
|
|
16
|
+
minitest (>= 5.0)
|
|
17
|
+
ruby-progressbar
|
|
18
|
+
rake (13.0.6)
|
|
19
|
+
rake-compiler (1.1.7)
|
|
14
20
|
rake
|
|
15
|
-
|
|
21
|
+
ruby-progressbar (1.11.0)
|
|
22
|
+
simplecov (0.21.2)
|
|
16
23
|
docile (~> 1.1)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
simplecov-html (0.
|
|
20
|
-
simplecov-lcov (0.
|
|
24
|
+
simplecov-html (~> 0.11)
|
|
25
|
+
simplecov_json_formatter (~> 0.1)
|
|
26
|
+
simplecov-html (0.12.3)
|
|
27
|
+
simplecov-lcov (0.8.0)
|
|
28
|
+
simplecov_json_formatter (0.1.3)
|
|
21
29
|
|
|
22
30
|
PLATFORMS
|
|
23
31
|
ruby
|
|
24
32
|
|
|
25
33
|
DEPENDENCIES
|
|
26
|
-
minitest (~> 5.
|
|
27
|
-
|
|
34
|
+
minitest (~> 5.15)
|
|
35
|
+
minitest-reporters (~> 1.4)
|
|
36
|
+
rake (~> 13.0)
|
|
28
37
|
rake-compiler (~> 1.1)
|
|
29
38
|
ruby_native_statistics!
|
|
30
|
-
simplecov (~> 0.
|
|
31
|
-
simplecov-lcov (~> 0.
|
|
39
|
+
simplecov (~> 0.21)
|
|
40
|
+
simplecov-lcov (~> 0.8)
|
|
32
41
|
|
|
33
42
|
BUNDLED WITH
|
|
34
|
-
2.
|
|
43
|
+
2.3.3
|
data/README.md
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
# Ruby Native Statistics
|
|
1
|
+
# Ruby Native Statistics
|
|
2
|
+
|
|
3
|
+
[](https://github.com/corybuecker/ruby-native-statistics/actions)
|
|
2
4
|
|
|
3
5
|
This is a native extension to Ruby that adds native (C) statistical functions to the Array class. At present the following functions are provided:
|
|
4
6
|
|
|
@@ -6,14 +8,32 @@ This is a native extension to Ruby that adds native (C) statistical functions to
|
|
|
6
8
|
- [Population Standard Deviation](https://en.wikipedia.org/wiki/Standard_deviation#Uncorrected_sample_standard_deviation) (stdevp)
|
|
7
9
|
- [Sample Variance](https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance) (var)
|
|
8
10
|
- [Population Variance](https://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance) (varp)
|
|
11
|
+
- [Median](https://en.wikipedia.org/wiki/Median) (median)
|
|
9
12
|
- [Mean](https://en.wikipedia.org/wiki/Arithmetic_mean) (mean)
|
|
13
|
+
- [Percentile](https://en.wikipedia.org/wiki/Quantile) (percentile)
|
|
14
|
+
|
|
15
|
+
Check the Github Actions build to see the currently supported versions of Ruby. This list will match whatever stable versions are specified at https://www.ruby-lang.org/en/downloads/.
|
|
16
|
+
|
|
17
|
+
It is generally more performant than calculating these values with pure Ruby. For a comparison, run the benchmarks with `rake benchmark`.
|
|
18
|
+
|
|
19
|
+
| Test (Ruby 3.1.0) | Run 1 | Run 2 | Run 3 | Run 4 | Run 5 |
|
|
20
|
+
| ------------------ | -------- | -------- | -------- | -------- | -------- |
|
|
21
|
+
| bench_native_stdev | 0.000074 | 0.000070 | 0.000071 | 0.000070 | 0.000068 |
|
|
22
|
+
| bench_ruby_stdev | 0.000945 | 0.000942 | 0.000944 | 0.000941 | 0.000969 |
|
|
23
|
+
|
|
24
|
+
| Test (Ruby 3.1.0) | Run 1 | Run 2 | Run 3 | Run 4 | Run 5 |
|
|
25
|
+
| ------------------- | -------- | -------- | -------- | -------- | -------- |
|
|
26
|
+
| bench_native_median | 0.000813 | 0.000773 | 0.000774 | 0.000776 | 0.000773 |
|
|
27
|
+
| bench_ruby_median | 0.000816 | 0.000797 | 0.000832 | 0.000797 | 0.000799 |
|
|
10
28
|
|
|
11
|
-
|
|
29
|
+
| Test (Ruby 3.1.0) | Run 1 | Run 2 | Run 3 | Run 4 | Run 5 |
|
|
30
|
+
| ----------------- | -------- | -------- | -------- | -------- | -------- |
|
|
31
|
+
| bench_native_mean | 0.000040 | 0.000038 | 0.000038 | 0.000037 | 0.000037 |
|
|
32
|
+
| bench_ruby_mean | 0.000347 | 0.000350 | 0.000358 | 0.000349 | 0.000347 |
|
|
12
33
|
|
|
13
|
-
|
|
34
|
+
## Found a bug? Need a function?
|
|
14
35
|
|
|
15
|
-
|
|
16
|
-
bench_ruby_dispersion 0.002168 0.002156 0.002148 0.002149 0.002151
|
|
36
|
+
If you found a bug or need a particular function, please let me know! I work on this gem in my spare time, mainly for learning purposes. Feel free to open a PR or a Github issue and I'll take a look as soon as possible.
|
|
17
37
|
|
|
18
38
|
## Usage
|
|
19
39
|
|
|
@@ -29,6 +49,18 @@ It is much more performant than calculating the standard deviation with pure Rub
|
|
|
29
49
|
# calculate mean
|
|
30
50
|
p r.mean
|
|
31
51
|
|
|
52
|
+
# calculate median
|
|
53
|
+
p r.median
|
|
54
|
+
|
|
55
|
+
# calculate percentile
|
|
56
|
+
p r.percentile(0.3333)
|
|
57
|
+
|
|
58
|
+
## Implementation notes
|
|
59
|
+
|
|
60
|
+
### Percentile
|
|
61
|
+
|
|
62
|
+
Percentile uses the same rounding method as Excel, sometimes called R7.
|
|
63
|
+
|
|
32
64
|
## Links
|
|
33
65
|
|
|
34
66
|
This is the third version of this gem, and it is a total rewrite of a SWIG-based design. Lots of thanks to the following resources:
|
data/Rakefile
CHANGED
|
@@ -1,12 +1,8 @@
|
|
|
1
1
|
require "rake/extensiontask"
|
|
2
2
|
require "rake/testtask"
|
|
3
3
|
|
|
4
|
-
Rake::ExtensionTask.new "
|
|
5
|
-
ext.lib_dir = "lib"
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
Rake::ExtensionTask.new "mathematics" do |ext|
|
|
9
|
-
ext.lib_dir = "lib"
|
|
4
|
+
Rake::ExtensionTask.new "ruby_native_statistics" do |ext|
|
|
5
|
+
ext.lib_dir = "lib/ruby_native_statistics"
|
|
10
6
|
end
|
|
11
7
|
|
|
12
8
|
Rake::TestTask.new(:test) do |t|
|
|
@@ -15,10 +11,10 @@ Rake::TestTask.new(:test) do |t|
|
|
|
15
11
|
t.test_files = FileList["test/**/*_test.rb"]
|
|
16
12
|
end
|
|
17
13
|
|
|
18
|
-
Rake::TestTask.new(:
|
|
14
|
+
Rake::TestTask.new(benchmark: [:clean, :compile]) do |t|
|
|
19
15
|
t.libs << "test"
|
|
20
16
|
t.libs << "lib"
|
|
21
17
|
t.test_files = FileList["test/**/*_benchmark.rb"]
|
|
22
18
|
end
|
|
23
19
|
|
|
24
|
-
task :default => [:compile, :test]
|
|
20
|
+
task :default => [:clean, :compile, :test]
|
data/{LICENSE → UNLICENSE}
RENAMED
|
File without changes
|
data/changelog.md
CHANGED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#include "conversions.h"
|
|
2
|
+
|
|
3
|
+
int compare_doubles(const void *a, const void *b)
|
|
4
|
+
{
|
|
5
|
+
double *dbl_a = (double *)a;
|
|
6
|
+
double *dbl_b = (double *)b;
|
|
7
|
+
|
|
8
|
+
double cmp_a = *dbl_a;
|
|
9
|
+
double cmp_b = *dbl_b;
|
|
10
|
+
|
|
11
|
+
return (cmp_a - cmp_b);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
double *sorted_ruby_array(VALUE array, long array_length)
|
|
15
|
+
{
|
|
16
|
+
long i;
|
|
17
|
+
double *working_array;
|
|
18
|
+
|
|
19
|
+
working_array = malloc(array_length * sizeof(double));
|
|
20
|
+
|
|
21
|
+
if (working_array == NULL)
|
|
22
|
+
{
|
|
23
|
+
rb_raise(rb_eStandardError, "unknown problem calculating median (possibly array is too large)");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
for (i = 0; i < array_length; i++)
|
|
27
|
+
{
|
|
28
|
+
VALUE item = rb_ary_entry(array, i);
|
|
29
|
+
|
|
30
|
+
if (!RB_INTEGER_TYPE_P(item) && !RB_FLOAT_TYPE_P(item))
|
|
31
|
+
{
|
|
32
|
+
free(working_array);
|
|
33
|
+
rb_raise(rb_eTypeError, "element is not a number");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
working_array[i] = NUM2DBL(item);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Reminder to myself as I'm learning C. Using an array as a function parameter decays that reference
|
|
40
|
+
// to a pointer to the first element in the array.
|
|
41
|
+
// https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Function-Parameters
|
|
42
|
+
qsort(working_array, array_length, sizeof(double), compare_doubles);
|
|
43
|
+
|
|
44
|
+
return working_array;
|
|
45
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#include "dispersion.h"
|
|
2
|
+
|
|
3
|
+
VALUE rb_sample_standard_deviation(VALUE self)
|
|
4
|
+
{
|
|
5
|
+
unsigned int array_length;
|
|
6
|
+
|
|
7
|
+
Check_Type(self, T_ARRAY);
|
|
8
|
+
|
|
9
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
|
10
|
+
|
|
11
|
+
if (array_length <= 1)
|
|
12
|
+
{
|
|
13
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length) / (array_length - 1))));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
VALUE rb_sample_variance(VALUE self)
|
|
20
|
+
{
|
|
21
|
+
unsigned int array_length;
|
|
22
|
+
|
|
23
|
+
Check_Type(self, T_ARRAY);
|
|
24
|
+
|
|
25
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
|
26
|
+
|
|
27
|
+
if (array_length <= 1)
|
|
28
|
+
{
|
|
29
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return DBL2NUM((calculate_total_distance_from_mean(self, array_length) / (array_length - 1)));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
VALUE rb_population_standard_deviation(VALUE self)
|
|
36
|
+
{
|
|
37
|
+
unsigned int array_length;
|
|
38
|
+
|
|
39
|
+
Check_Type(self, T_ARRAY);
|
|
40
|
+
|
|
41
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
|
42
|
+
|
|
43
|
+
if (array_length <= 1)
|
|
44
|
+
{
|
|
45
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
VALUE rb_population_variance(VALUE self)
|
|
52
|
+
{
|
|
53
|
+
unsigned int array_length;
|
|
54
|
+
|
|
55
|
+
Check_Type(self, T_ARRAY);
|
|
56
|
+
|
|
57
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
|
58
|
+
|
|
59
|
+
if (array_length <= 1)
|
|
60
|
+
{
|
|
61
|
+
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
VALUE rb_percentile(VALUE self, VALUE r_percentile)
|
|
68
|
+
{
|
|
69
|
+
double result;
|
|
70
|
+
Check_Type(self, T_ARRAY);
|
|
71
|
+
|
|
72
|
+
long array_length = rb_array_len(self);
|
|
73
|
+
double percentile = NUM2DBL(r_percentile);
|
|
74
|
+
|
|
75
|
+
if (array_length == 0)
|
|
76
|
+
{
|
|
77
|
+
rb_raise(rb_eRangeError, "array must have at least one element");
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (percentile < 0 || percentile > 1)
|
|
81
|
+
{
|
|
82
|
+
rb_raise(rb_eRangeError, "percentile must be between 0 and 1 inclusive");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
double *sorted_array = sorted_ruby_array(self, array_length);
|
|
86
|
+
|
|
87
|
+
double h = (array_length - 1) * percentile + 1;
|
|
88
|
+
|
|
89
|
+
if (trunc(h) == h)
|
|
90
|
+
{
|
|
91
|
+
result = sorted_array[(long)h - 1];
|
|
92
|
+
}
|
|
93
|
+
else
|
|
94
|
+
{
|
|
95
|
+
long h_floor = (long)trunc(h);
|
|
96
|
+
result = (h - h_floor) * (sorted_array[h_floor] - sorted_array[h_floor - 1]) + sorted_array[h_floor - 1];
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
free(sorted_array);
|
|
100
|
+
|
|
101
|
+
return DBL2NUM(result);
|
|
102
|
+
}
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <math.h>
|
|
3
|
+
#include "conversions.h"
|
|
4
|
+
#include "mathematics.h"
|
|
5
|
+
|
|
2
6
|
VALUE rb_sample_standard_deviation(VALUE self);
|
|
3
7
|
VALUE rb_population_standard_deviation(VALUE self);
|
|
4
8
|
VALUE rb_sample_variance(VALUE self);
|
|
5
9
|
VALUE rb_population_variance(VALUE self);
|
|
10
|
+
VALUE rb_percentile(VALUE self, VALUE percentile);
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require "mkmf"
|
|
2
|
+
|
|
3
|
+
abort "missing pow()" unless have_func "pow"
|
|
4
|
+
abort "missing sqrt()" unless have_func "sqrt"
|
|
5
|
+
abort "missing malloc()" unless have_func "malloc"
|
|
6
|
+
abort "missing free()" unless have_func "free"
|
|
7
|
+
abort "missing trunc()" unless have_func "trunc"
|
|
8
|
+
|
|
9
|
+
create_makefile "ruby_native_statistics/ruby_native_statistics"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
|
|
2
|
+
#include "mathematics.h"
|
|
3
|
+
|
|
4
|
+
double calculate_mean(VALUE array, unsigned long array_length)
|
|
5
|
+
{
|
|
6
|
+
unsigned long i;
|
|
7
|
+
double total = 0;
|
|
8
|
+
double mean = 0;
|
|
9
|
+
|
|
10
|
+
for (i = 0; i < array_length; i++)
|
|
11
|
+
{
|
|
12
|
+
total += rb_num2dbl(rb_ary_entry(array, i));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
mean = total / array_length;
|
|
16
|
+
|
|
17
|
+
return mean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length)
|
|
21
|
+
{
|
|
22
|
+
unsigned long i;
|
|
23
|
+
double mean = 0;
|
|
24
|
+
double total_distance_from_mean = 0;
|
|
25
|
+
|
|
26
|
+
mean = calculate_mean(array, array_length);
|
|
27
|
+
|
|
28
|
+
for (i = 0; i < array_length; i++)
|
|
29
|
+
{
|
|
30
|
+
total_distance_from_mean += pow((rb_num2dbl(rb_ary_entry(array, i)) - mean), 2);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return total_distance_from_mean;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
VALUE rb_mean(VALUE self)
|
|
37
|
+
{
|
|
38
|
+
unsigned int array_length;
|
|
39
|
+
|
|
40
|
+
Check_Type(self, T_ARRAY);
|
|
41
|
+
|
|
42
|
+
array_length = rb_long2int(RARRAY_LEN(self));
|
|
43
|
+
|
|
44
|
+
if (array_length <= 0)
|
|
45
|
+
{
|
|
46
|
+
rb_raise(rb_eRangeError, "array must have at least one element");
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return DBL2NUM(calculate_mean(self, array_length));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
VALUE rb_median(VALUE self)
|
|
53
|
+
{
|
|
54
|
+
unsigned long array_length;
|
|
55
|
+
|
|
56
|
+
VALUE result;
|
|
57
|
+
|
|
58
|
+
Check_Type(self, T_ARRAY);
|
|
59
|
+
|
|
60
|
+
array_length = RARRAY_LEN(self);
|
|
61
|
+
|
|
62
|
+
if (array_length <= 0)
|
|
63
|
+
{
|
|
64
|
+
rb_raise(rb_eRangeError, "array must have at least one element");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
bool array_even_size = (array_length % 2) == 0;
|
|
68
|
+
unsigned long middle = (long)floor(array_length / 2.0);
|
|
69
|
+
|
|
70
|
+
double *working_array = sorted_ruby_array(self, array_length);
|
|
71
|
+
|
|
72
|
+
if (!array_even_size)
|
|
73
|
+
{
|
|
74
|
+
result = DBL2NUM(working_array[middle]);
|
|
75
|
+
}
|
|
76
|
+
else
|
|
77
|
+
{
|
|
78
|
+
result = DBL2NUM((working_array[middle - 1] + working_array[middle]) / 2);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
free(working_array);
|
|
82
|
+
|
|
83
|
+
return result;
|
|
84
|
+
}
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
#include <stdbool.h>
|
|
2
|
+
#include <ruby.h>
|
|
3
|
+
#include "conversions.h"
|
|
4
|
+
|
|
2
5
|
VALUE rb_mean(VALUE self);
|
|
6
|
+
VALUE rb_median(VALUE self);
|
|
7
|
+
|
|
3
8
|
double calculate_mean(VALUE array, unsigned long array_length);
|
|
4
|
-
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
|
|
9
|
+
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length);
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#include "ruby_native_statistics.h"
|
|
2
|
+
|
|
3
|
+
void Init_ruby_native_statistics()
|
|
4
|
+
{
|
|
5
|
+
DispersionModule = rb_define_module("Dispersion");
|
|
6
|
+
rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
|
|
7
|
+
rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
|
|
8
|
+
rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
|
|
9
|
+
rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
|
|
10
|
+
rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
|
|
11
|
+
rb_define_method(DispersionModule, "percentile", rb_percentile, 1);
|
|
12
|
+
|
|
13
|
+
MathematicsModule = rb_define_module("Mathematics");
|
|
14
|
+
rb_define_method(MathematicsModule, "mean", rb_mean, 0);
|
|
15
|
+
rb_define_method(MathematicsModule, "median", rb_median, 0);
|
|
16
|
+
}
|
|
@@ -7,10 +7,10 @@ Gem::Specification.new do |spec|
|
|
|
7
7
|
spec.email = ["cory.buecker@gmail.com"]
|
|
8
8
|
|
|
9
9
|
spec.license = "Unlicense"
|
|
10
|
-
spec.summary = "
|
|
10
|
+
spec.summary = "High performance, native (C) implementations of various statistical functions."
|
|
11
11
|
spec.homepage = "https://github.com/corybuecker/ruby-native-statistics"
|
|
12
12
|
|
|
13
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.6")
|
|
14
14
|
|
|
15
15
|
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
|
16
16
|
|
|
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
22
22
|
spec.require_paths = ["lib"]
|
|
23
23
|
|
|
24
|
-
spec.extensions = %w[ext/
|
|
24
|
+
spec.extensions = %w[ext/ruby_native_statistics/extconf.rb]
|
|
25
25
|
end
|
metadata
CHANGED
|
@@ -1,39 +1,43 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_native_statistics
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Cory Buecker
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description:
|
|
14
14
|
email:
|
|
15
15
|
- cory.buecker@gmail.com
|
|
16
16
|
executables: []
|
|
17
17
|
extensions:
|
|
18
|
-
- ext/
|
|
19
|
-
- ext/dispersion/extconf.rb
|
|
18
|
+
- ext/ruby_native_statistics/extconf.rb
|
|
20
19
|
extra_rdoc_files: []
|
|
21
20
|
files:
|
|
21
|
+
- ".github/ISSUE_TEMPLATE/bug_report.md"
|
|
22
22
|
- ".github/workflows/main.yml"
|
|
23
23
|
- ".gitignore"
|
|
24
|
+
- ".vscode/c_cpp_properties.json"
|
|
24
25
|
- ".vscode/settings.json"
|
|
25
26
|
- Gemfile
|
|
26
27
|
- Gemfile.lock
|
|
27
|
-
- LICENSE
|
|
28
28
|
- README.md
|
|
29
29
|
- Rakefile
|
|
30
|
+
- UNLICENSE
|
|
30
31
|
- changelog.md
|
|
31
|
-
- ext/
|
|
32
|
-
- ext/
|
|
33
|
-
- ext/dispersion
|
|
34
|
-
- ext/
|
|
35
|
-
- ext/
|
|
36
|
-
- ext/
|
|
32
|
+
- ext/ruby_native_statistics/conversions.c
|
|
33
|
+
- ext/ruby_native_statistics/conversions.h
|
|
34
|
+
- ext/ruby_native_statistics/dispersion.c
|
|
35
|
+
- ext/ruby_native_statistics/dispersion.h
|
|
36
|
+
- ext/ruby_native_statistics/extconf.rb
|
|
37
|
+
- ext/ruby_native_statistics/mathematics.c
|
|
38
|
+
- ext/ruby_native_statistics/mathematics.h
|
|
39
|
+
- ext/ruby_native_statistics/ruby_native_statistics.c
|
|
40
|
+
- ext/ruby_native_statistics/ruby_native_statistics.h
|
|
37
41
|
- lib/ruby_native_statistics.rb
|
|
38
42
|
- lib/ruby_native_statistics/version.rb
|
|
39
43
|
- ruby_native_statistics.gemspec
|
|
@@ -53,16 +57,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
53
57
|
requirements:
|
|
54
58
|
- - ">="
|
|
55
59
|
- !ruby/object:Gem::Version
|
|
56
|
-
version: 2.
|
|
60
|
+
version: '2.6'
|
|
57
61
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
62
|
requirements:
|
|
59
63
|
- - ">="
|
|
60
64
|
- !ruby/object:Gem::Version
|
|
61
65
|
version: '0'
|
|
62
66
|
requirements: []
|
|
63
|
-
rubygems_version: 3.1
|
|
67
|
+
rubygems_version: 3.0.3.1
|
|
64
68
|
signing_key:
|
|
65
69
|
specification_version: 4
|
|
66
|
-
summary:
|
|
67
|
-
to the Array class.
|
|
70
|
+
summary: High performance, native (C) implementations of various statistical functions.
|
|
68
71
|
test_files: []
|
data/ext/dispersion/dispersion.c
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
#include "ruby.h"
|
|
2
|
-
#include "dispersion.h"
|
|
3
|
-
#include "../mathematics/mathematics.h"
|
|
4
|
-
|
|
5
|
-
void Init_dispersion() {
|
|
6
|
-
DispersionModule = rb_define_module("Dispersion");
|
|
7
|
-
rb_define_method(DispersionModule, "stdev", rb_sample_standard_deviation, 0);
|
|
8
|
-
rb_define_method(DispersionModule, "stdevs", rb_sample_standard_deviation, 0);
|
|
9
|
-
rb_define_method(DispersionModule, "stdevp", rb_population_standard_deviation, 0);
|
|
10
|
-
rb_define_method(DispersionModule, "var", rb_sample_variance, 0);
|
|
11
|
-
rb_define_method(DispersionModule, "varp", rb_population_variance, 0);
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
VALUE rb_sample_standard_deviation(VALUE self) {
|
|
15
|
-
unsigned int array_length;
|
|
16
|
-
|
|
17
|
-
Check_Type(self, T_ARRAY);
|
|
18
|
-
|
|
19
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
|
20
|
-
|
|
21
|
-
if (array_length <= 1) {
|
|
22
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
return DBL2NUM(sqrt((calculate_total_distance_from_mean(self, array_length)/(array_length - 1))));
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
VALUE rb_sample_variance(VALUE self) {
|
|
29
|
-
unsigned int array_length;
|
|
30
|
-
|
|
31
|
-
Check_Type(self, T_ARRAY);
|
|
32
|
-
|
|
33
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
|
34
|
-
|
|
35
|
-
if (array_length <= 1) {
|
|
36
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
return DBL2NUM((calculate_total_distance_from_mean(self, array_length)/(array_length - 1)));
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
VALUE rb_population_standard_deviation(VALUE self) {
|
|
43
|
-
unsigned int array_length;
|
|
44
|
-
|
|
45
|
-
Check_Type(self, T_ARRAY);
|
|
46
|
-
|
|
47
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
|
48
|
-
|
|
49
|
-
if (array_length <= 1) {
|
|
50
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return DBL2NUM(sqrt(calculate_total_distance_from_mean(self, array_length) / array_length));
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
VALUE rb_population_variance(VALUE self) {
|
|
57
|
-
unsigned int array_length;
|
|
58
|
-
|
|
59
|
-
Check_Type(self, T_ARRAY);
|
|
60
|
-
|
|
61
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
|
62
|
-
|
|
63
|
-
if (array_length <= 1) {
|
|
64
|
-
rb_raise(rb_eRangeError, "array must have more than one element");
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
return DBL2NUM(calculate_total_distance_from_mean(self, array_length) / array_length);
|
|
68
|
-
}
|
data/ext/dispersion/extconf.rb
DELETED
data/ext/mathematics/extconf.rb
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
#include "ruby.h"
|
|
2
|
-
#include "mathematics.h"
|
|
3
|
-
|
|
4
|
-
void Init_mathematics() {
|
|
5
|
-
MathematicsModule = rb_define_module("Mathematics");
|
|
6
|
-
rb_define_method(MathematicsModule, "mean", rb_mean, 0);
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
double calculate_mean(VALUE array, unsigned long array_length){
|
|
10
|
-
unsigned long i;
|
|
11
|
-
double total = 0;
|
|
12
|
-
double mean = 0;
|
|
13
|
-
|
|
14
|
-
for(i = 0; i < array_length; i++){
|
|
15
|
-
total += rb_num2dbl(rb_ary_entry(array, i));
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
mean = total / array_length;
|
|
19
|
-
|
|
20
|
-
return mean;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
double calculate_total_distance_from_mean(VALUE array, unsigned long array_length){
|
|
24
|
-
unsigned long i;
|
|
25
|
-
double mean = 0;
|
|
26
|
-
double total_distance_from_mean = 0;
|
|
27
|
-
|
|
28
|
-
mean = calculate_mean(array, array_length);
|
|
29
|
-
|
|
30
|
-
for(i = 0; i < array_length; i++){
|
|
31
|
-
total_distance_from_mean += pow((rb_num2dbl(rb_ary_entry(array, i)) - mean), 2);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
return total_distance_from_mean;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
VALUE rb_mean(VALUE self) {
|
|
38
|
-
unsigned int array_length;
|
|
39
|
-
|
|
40
|
-
Check_Type(self, T_ARRAY);
|
|
41
|
-
|
|
42
|
-
array_length = rb_long2int(RARRAY_LEN(self));
|
|
43
|
-
|
|
44
|
-
if (array_length <= 0) {
|
|
45
|
-
rb_raise(rb_eRangeError, "array must have at least one element");
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
return DBL2NUM(calculate_mean(self, array_length));
|
|
49
|
-
}
|
|
50
|
-
|