ruby-statistics 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -6
- data/lib/statistics/statistical_test/f_test.rb +83 -0
- data/lib/statistics/statistical_test/t_test.rb +46 -0
- data/lib/statistics/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a98a2e5755e14ddcfab200641afa2e3bc931a188
|
4
|
+
data.tar.gz: 8532c77ff003ee31a0ea3989e0e22f382c3272e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3582142dd14dd4076c9972b35d0d00599d55376659990111a7012ed574c098f3cf635274259bc013d3d1d6600d9308920274cc77e9ed8ee587d0f73b9f9834e0
|
7
|
+
data.tar.gz: cae3993eb2452cbcce670c6b1262b74e9b6c516a0e80820c1f3b20ec1ec0c0a65de6f138383814c8aae5aa82aa6e8b1a799a9848ed79564fe609bce5afe9e014
|
data/README.md
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
# Statistics
|
1
|
+
# Ruby Statistics
|
2
2
|
|
3
|
-
|
3
|
+
A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar.
|
4
4
|
|
5
|
-
|
5
|
+
We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html).
|
6
|
+
|
7
|
+
Some logic and algorithms are extractions or adaptations from other authors, which are referenced in the comments.
|
8
|
+
This software is released under the MIT License.
|
6
9
|
|
7
10
|
## Installation
|
8
11
|
|
@@ -24,7 +27,7 @@ Or install it yourself as:
|
|
24
27
|
|
25
28
|
just require the `statistics` gem in order to load it. If you don't have defined the `Distribution` namespace, the gem will assign an alias, reducing the number of namespaces needed to use a class.
|
26
29
|
|
27
|
-
Right
|
30
|
+
Right now you can load:
|
28
31
|
|
29
32
|
* The whole statistics gem. `require 'statistics'`
|
30
33
|
* A namespace. `require 'statistics/distribution'`
|
@@ -48,7 +51,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
48
51
|
|
49
52
|
## Contributing
|
50
53
|
|
51
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
54
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
52
55
|
|
53
56
|
## License
|
54
57
|
|
@@ -56,4 +59,10 @@ The gem is available as open source under the terms of the [MIT License](http://
|
|
56
59
|
|
57
60
|
## Code of Conduct
|
58
61
|
|
59
|
-
Everyone interacting in the Statistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
62
|
+
Everyone interacting in the Statistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/estebanz01/ruby-statistics/blob/master/CODE_OF_CONDUCT.md).
|
63
|
+
|
64
|
+
## Contact
|
65
|
+
|
66
|
+
You can contact me via:
|
67
|
+
* [Github](https://github.com/estebanz01)
|
68
|
+
* [Twitter](https://twitter.com/estebanz01)
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Statistics
|
2
|
+
module StatisticalTest
|
3
|
+
class FTest
|
4
|
+
# This method calculates the one-way ANOVA F-test statistic.
|
5
|
+
# We assume that all specified arguments are arrays.
|
6
|
+
# It returns an array with three elements:
|
7
|
+
# [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator].
|
8
|
+
#
|
9
|
+
# Formulas extracted from:
|
10
|
+
# https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/
|
11
|
+
# http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html
|
12
|
+
def self.anova_f_score(*args)
|
13
|
+
# If only two groups have been specified as arguments, we follow the classic F-Test for
|
14
|
+
# equality of variances, which is the ratio between the variances.
|
15
|
+
f_score = nil
|
16
|
+
df1 = nil
|
17
|
+
df2 = nil
|
18
|
+
|
19
|
+
if args.size == 2
|
20
|
+
variances = [args[0].variance, args[1].variance]
|
21
|
+
|
22
|
+
f_score = variances.max/variances.min.to_f
|
23
|
+
df1 = 1 # k-1 (k = 2)
|
24
|
+
df2 = args.flatten.size - 2 # N-k (k = 2)
|
25
|
+
elsif args.size > 2
|
26
|
+
total_groups = args.size
|
27
|
+
total_elements = args.flatten.size
|
28
|
+
overall_mean = args.flatten.mean
|
29
|
+
|
30
|
+
sample_sizes = args.map(&:size)
|
31
|
+
sample_means = args.map(&:mean)
|
32
|
+
sample_stds = args.map(&:standard_deviation)
|
33
|
+
|
34
|
+
# Variance between groups
|
35
|
+
iterator = sample_sizes.each_with_index
|
36
|
+
|
37
|
+
variance_between_groups = iterator.reduce(0) do |summation, (size, index)|
|
38
|
+
inner_calculation = size * ((sample_means[index] - overall_mean) ** 2)
|
39
|
+
|
40
|
+
summation += (inner_calculation / (total_groups - 1).to_f)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Variance within groups
|
44
|
+
variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index|
|
45
|
+
outer_summation += args[group_index].reduce(0) do |inner_sumation, observation|
|
46
|
+
inner_calculation = ((observation - sample_means[group_index]) ** 2)
|
47
|
+
inner_sumation += (inner_calculation / (total_elements - total_groups).to_f)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
f_score = variance_between_groups/variance_within_groups.to_f
|
52
|
+
df1 = total_groups - 1
|
53
|
+
df2 = total_elements - total_groups
|
54
|
+
end
|
55
|
+
|
56
|
+
[f_score, df1, df2]
|
57
|
+
end
|
58
|
+
|
59
|
+
# This method expects the alpha value and the groups to calculate the one-way ANOVA test.
|
60
|
+
# It returns a hash with multiple information and the test result (if reject the null hypotesis or not).
|
61
|
+
# Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis
|
62
|
+
# is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not.
|
63
|
+
|
64
|
+
def self.one_way_anova(alpha, *args)
|
65
|
+
f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result
|
66
|
+
|
67
|
+
return if f_score.nil? || df1.nil? || df2.nil?
|
68
|
+
|
69
|
+
probability = Distribution::F.new(df1, df2).cumulative_function(f_score)
|
70
|
+
p_value = 1 - probability
|
71
|
+
|
72
|
+
# According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha
|
73
|
+
# We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis.
|
74
|
+
{ probability: probability,
|
75
|
+
p_value: p_value,
|
76
|
+
alpha: alpha,
|
77
|
+
null: alpha < p_value,
|
78
|
+
alternative: p_value <= alpha,
|
79
|
+
confidence_level: 1 - alpha }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Statistics
|
2
|
+
module StatisticalTest
|
3
|
+
class TTest
|
4
|
+
# Perform a T-Test for one or two samples.
|
5
|
+
# For the tails param, we need a symbol: :one_tail or :two_tail
|
6
|
+
def self.perform(alpha, tails, *args)
|
7
|
+
return if args.size < 2
|
8
|
+
|
9
|
+
degrees_of_freedom = 0
|
10
|
+
|
11
|
+
t_score = if args[0].is_a? Numeric
|
12
|
+
data_mean = args[1].mean
|
13
|
+
data_std = args[1].standard_deviation
|
14
|
+
comparison_mean = args[0]
|
15
|
+
degrees_of_freedom = args[1].size
|
16
|
+
|
17
|
+
(data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_f).to_f
|
18
|
+
else
|
19
|
+
sample_left_mean = args[0].mean
|
20
|
+
sample_left_variance = args[0].variance
|
21
|
+
sample_right_variance = args[1].variance
|
22
|
+
sample_right_mean = args[1].mean
|
23
|
+
degrees_of_freedom = args.flatten.size - 2
|
24
|
+
|
25
|
+
left_root = sample_left_variance/args[0].size.to_f
|
26
|
+
right_root = sample_right_variance/args[1].size.to_f
|
27
|
+
|
28
|
+
standard_error = Math.sqrt(left_root + right_root)
|
29
|
+
|
30
|
+
(sample_left_mean - sample_right_mean)/standard_error.to_f
|
31
|
+
end
|
32
|
+
|
33
|
+
probability = Distribution::TStudent.new(degrees_of_freedom).cumulative_function(t_score)
|
34
|
+
p_value = 1 - probability
|
35
|
+
p_value *= 2 if tails == :two_tail
|
36
|
+
|
37
|
+
{ probability: probability,
|
38
|
+
p_value: p_value,
|
39
|
+
alpha: alpha,
|
40
|
+
null: alpha < p_value,
|
41
|
+
alternative: p_value <= alpha,
|
42
|
+
confidence_level: 1 - alpha }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/statistics/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- esteban zapata
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -129,6 +129,8 @@ files:
|
|
129
129
|
- lib/statistics/distribution/t_student.rb
|
130
130
|
- lib/statistics/distribution/uniform.rb
|
131
131
|
- lib/statistics/distribution/weibull.rb
|
132
|
+
- lib/statistics/statistical_test/f_test.rb
|
133
|
+
- lib/statistics/statistical_test/t_test.rb
|
132
134
|
- lib/statistics/version.rb
|
133
135
|
- ruby-statistics.gemspec
|
134
136
|
homepage: https://github.com/estebanz01/ruby-statistics
|
@@ -151,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
153
|
version: '0'
|
152
154
|
requirements: []
|
153
155
|
rubyforge_project:
|
154
|
-
rubygems_version: 2.6.
|
156
|
+
rubygems_version: 2.6.14
|
155
157
|
signing_key:
|
156
158
|
specification_version: 4
|
157
159
|
summary: A ruby gem for som specific statistics. Inspired by the jStat js library.
|