ruby-statistics 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +15 -6
- data/lib/statistics/statistical_test/f_test.rb +83 -0
- data/lib/statistics/statistical_test/t_test.rb +46 -0
- data/lib/statistics/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a98a2e5755e14ddcfab200641afa2e3bc931a188
|
4
|
+
data.tar.gz: 8532c77ff003ee31a0ea3989e0e22f382c3272e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3582142dd14dd4076c9972b35d0d00599d55376659990111a7012ed574c098f3cf635274259bc013d3d1d6600d9308920274cc77e9ed8ee587d0f73b9f9834e0
|
7
|
+
data.tar.gz: cae3993eb2452cbcce670c6b1262b74e9b6c516a0e80820c1f3b20ec1ec0c0a65de6f138383814c8aae5aa82aa6e8b1a799a9848ed79564fe609bce5afe9e014
|
data/README.md
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
# Statistics
|
1
|
+
# Ruby Statistics
|
2
2
|
|
3
|
-
|
3
|
+
A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar.
|
4
4
|
|
5
|
-
|
5
|
+
We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html).
|
6
|
+
|
7
|
+
Some logic and algorithms are extractions or adaptations from other authors, which are referenced in the comments.
|
8
|
+
This software is released under the MIT License.
|
6
9
|
|
7
10
|
## Installation
|
8
11
|
|
@@ -24,7 +27,7 @@ Or install it yourself as:
|
|
24
27
|
|
25
28
|
just require the `statistics` gem in order to load it. If you don't have defined the `Distribution` namespace, the gem will assign an alias, reducing the number of namespaces needed to use a class.
|
26
29
|
|
27
|
-
Right
|
30
|
+
Right now you can load:
|
28
31
|
|
29
32
|
* The whole statistics gem. `require 'statistics'`
|
30
33
|
* A namespace. `require 'statistics/distribution'`
|
@@ -48,7 +51,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
48
51
|
|
49
52
|
## Contributing
|
50
53
|
|
51
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
54
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
52
55
|
|
53
56
|
## License
|
54
57
|
|
@@ -56,4 +59,10 @@ The gem is available as open source under the terms of the [MIT License](http://
|
|
56
59
|
|
57
60
|
## Code of Conduct
|
58
61
|
|
59
|
-
Everyone interacting in the Statistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
62
|
+
Everyone interacting in the Statistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/estebanz01/ruby-statistics/blob/master/CODE_OF_CONDUCT.md).
|
63
|
+
|
64
|
+
## Contact
|
65
|
+
|
66
|
+
You can contact me via:
|
67
|
+
* [Github](https://github.com/estebanz01)
|
68
|
+
* [Twitter](https://twitter.com/estebanz01)
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Statistics
|
2
|
+
module StatisticalTest
|
3
|
+
class FTest
|
4
|
+
# This method calculates the one-way ANOVA F-test statistic.
|
5
|
+
# We assume that all specified arguments are arrays.
|
6
|
+
# It returns an array with three elements:
|
7
|
+
# [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator].
|
8
|
+
#
|
9
|
+
# Formulas extracted from:
|
10
|
+
# https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/
|
11
|
+
# http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html
|
12
|
+
def self.anova_f_score(*args)
|
13
|
+
# If only two groups have been specified as arguments, we follow the classic F-Test for
|
14
|
+
# equality of variances, which is the ratio between the variances.
|
15
|
+
f_score = nil
|
16
|
+
df1 = nil
|
17
|
+
df2 = nil
|
18
|
+
|
19
|
+
if args.size == 2
|
20
|
+
variances = [args[0].variance, args[1].variance]
|
21
|
+
|
22
|
+
f_score = variances.max/variances.min.to_f
|
23
|
+
df1 = 1 # k-1 (k = 2)
|
24
|
+
df2 = args.flatten.size - 2 # N-k (k = 2)
|
25
|
+
elsif args.size > 2
|
26
|
+
total_groups = args.size
|
27
|
+
total_elements = args.flatten.size
|
28
|
+
overall_mean = args.flatten.mean
|
29
|
+
|
30
|
+
sample_sizes = args.map(&:size)
|
31
|
+
sample_means = args.map(&:mean)
|
32
|
+
sample_stds = args.map(&:standard_deviation)
|
33
|
+
|
34
|
+
# Variance between groups
|
35
|
+
iterator = sample_sizes.each_with_index
|
36
|
+
|
37
|
+
variance_between_groups = iterator.reduce(0) do |summation, (size, index)|
|
38
|
+
inner_calculation = size * ((sample_means[index] - overall_mean) ** 2)
|
39
|
+
|
40
|
+
summation += (inner_calculation / (total_groups - 1).to_f)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Variance within groups
|
44
|
+
variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index|
|
45
|
+
outer_summation += args[group_index].reduce(0) do |inner_sumation, observation|
|
46
|
+
inner_calculation = ((observation - sample_means[group_index]) ** 2)
|
47
|
+
inner_sumation += (inner_calculation / (total_elements - total_groups).to_f)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
f_score = variance_between_groups/variance_within_groups.to_f
|
52
|
+
df1 = total_groups - 1
|
53
|
+
df2 = total_elements - total_groups
|
54
|
+
end
|
55
|
+
|
56
|
+
[f_score, df1, df2]
|
57
|
+
end
|
58
|
+
|
59
|
+
# This method expects the alpha value and the groups to calculate the one-way ANOVA test.
|
60
|
+
# It returns a hash with multiple information and the test result (if reject the null hypotesis or not).
|
61
|
+
# Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis
|
62
|
+
# is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not.
|
63
|
+
|
64
|
+
def self.one_way_anova(alpha, *args)
|
65
|
+
f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result
|
66
|
+
|
67
|
+
return if f_score.nil? || df1.nil? || df2.nil?
|
68
|
+
|
69
|
+
probability = Distribution::F.new(df1, df2).cumulative_function(f_score)
|
70
|
+
p_value = 1 - probability
|
71
|
+
|
72
|
+
# According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha
|
73
|
+
# We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis.
|
74
|
+
{ probability: probability,
|
75
|
+
p_value: p_value,
|
76
|
+
alpha: alpha,
|
77
|
+
null: alpha < p_value,
|
78
|
+
alternative: p_value <= alpha,
|
79
|
+
confidence_level: 1 - alpha }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Statistics
|
2
|
+
module StatisticalTest
|
3
|
+
class TTest
|
4
|
+
# Perform a T-Test for one or two samples.
|
5
|
+
# For the tails param, we need a symbol: :one_tail or :two_tail
|
6
|
+
def self.perform(alpha, tails, *args)
|
7
|
+
return if args.size < 2
|
8
|
+
|
9
|
+
degrees_of_freedom = 0
|
10
|
+
|
11
|
+
t_score = if args[0].is_a? Numeric
|
12
|
+
data_mean = args[1].mean
|
13
|
+
data_std = args[1].standard_deviation
|
14
|
+
comparison_mean = args[0]
|
15
|
+
degrees_of_freedom = args[1].size
|
16
|
+
|
17
|
+
(data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_f).to_f
|
18
|
+
else
|
19
|
+
sample_left_mean = args[0].mean
|
20
|
+
sample_left_variance = args[0].variance
|
21
|
+
sample_right_variance = args[1].variance
|
22
|
+
sample_right_mean = args[1].mean
|
23
|
+
degrees_of_freedom = args.flatten.size - 2
|
24
|
+
|
25
|
+
left_root = sample_left_variance/args[0].size.to_f
|
26
|
+
right_root = sample_right_variance/args[1].size.to_f
|
27
|
+
|
28
|
+
standard_error = Math.sqrt(left_root + right_root)
|
29
|
+
|
30
|
+
(sample_left_mean - sample_right_mean)/standard_error.to_f
|
31
|
+
end
|
32
|
+
|
33
|
+
probability = Distribution::TStudent.new(degrees_of_freedom).cumulative_function(t_score)
|
34
|
+
p_value = 1 - probability
|
35
|
+
p_value *= 2 if tails == :two_tail
|
36
|
+
|
37
|
+
{ probability: probability,
|
38
|
+
p_value: p_value,
|
39
|
+
alpha: alpha,
|
40
|
+
null: alpha < p_value,
|
41
|
+
alternative: p_value <= alpha,
|
42
|
+
confidence_level: 1 - alpha }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/statistics/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- esteban zapata
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -129,6 +129,8 @@ files:
|
|
129
129
|
- lib/statistics/distribution/t_student.rb
|
130
130
|
- lib/statistics/distribution/uniform.rb
|
131
131
|
- lib/statistics/distribution/weibull.rb
|
132
|
+
- lib/statistics/statistical_test/f_test.rb
|
133
|
+
- lib/statistics/statistical_test/t_test.rb
|
132
134
|
- lib/statistics/version.rb
|
133
135
|
- ruby-statistics.gemspec
|
134
136
|
homepage: https://github.com/estebanz01/ruby-statistics
|
@@ -151,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
153
|
version: '0'
|
152
154
|
requirements: []
|
153
155
|
rubyforge_project:
|
154
|
-
rubygems_version: 2.6.
|
156
|
+
rubygems_version: 2.6.14
|
155
157
|
signing_key:
|
156
158
|
specification_version: 4
|
157
159
|
summary: A ruby gem for som specific statistics. Inspired by the jStat js library.
|