lite-statistics 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +4 -2
- data/README.md +18 -43
- data/benchmarks/base.rb +26 -0
- data/benchmarks/descriptive-statistics.rb +69 -0
- data/benchmarks/descriptive_statistics.rb +11 -32
- data/benchmarks/statistica.rb +70 -0
- data/lib/lite/statistics.rb +3 -1
- data/lib/lite/statistics/descriptive.rb +120 -129
- data/lib/lite/statistics/version.rb +1 -1
- data/lite-statistics.gemspec +2 -0
- metadata +19 -3
- data/lib/lite/statistics/base.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06e80ca142cea5c2b47ca54d08b93f50807b5dba0f159783850382351d686b68
|
4
|
+
data.tar.gz: 9b39a00bc5191edfb9f3546ed6c9586ce8ef0e8232e59aee68218bfdb1bdb37a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a794cb73651bf86dc994ab47c405cc78ed3f29dfabf164638f9d0797f068978e11a1781d56351c1db8b9023be3d28292812edd9034ac9cf9b74ce68e7c3d004
|
7
|
+
data.tar.gz: ba5b2b354eedd4d98e95cf3691480e70110591e6f67b55d640af9d7e47accb41d17e1707c6f261626d2cc7570deef912a4aab513829eac66ca86ebd59cbe4ce0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [1.0.1] - 2019-07-18
|
10
|
+
### Changed
|
11
|
+
- Memoization to use Lite::Memoize::Alias for more speed
|
12
|
+
- Rename zscore to zscores
|
13
|
+
- Rework benchmarks
|
14
|
+
|
9
15
|
## [1.0.0] - 2019-07-13
|
10
16
|
### Added
|
11
17
|
- Initial project version
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
lite-statistics (1.0.
|
4
|
+
lite-statistics (1.0.1)
|
5
|
+
lite-memoize
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
@@ -40,6 +41,7 @@ GEM
|
|
40
41
|
i18n (1.6.0)
|
41
42
|
concurrent-ruby (~> 1.0)
|
42
43
|
jaro_winkler (1.5.3)
|
44
|
+
lite-memoize (1.0.1)
|
43
45
|
loofah (2.2.3)
|
44
46
|
crass (~> 1.0.2)
|
45
47
|
nokogiri (>= 1.5.9)
|
@@ -80,7 +82,7 @@ GEM
|
|
80
82
|
diff-lcs (>= 1.2.0, < 2.0)
|
81
83
|
rspec-support (~> 3.8.0)
|
82
84
|
rspec-support (3.8.2)
|
83
|
-
rubocop (0.
|
85
|
+
rubocop (0.73.0)
|
84
86
|
jaro_winkler (~> 1.5.1)
|
85
87
|
parallel (~> 1.10)
|
86
88
|
parser (>= 2.6)
|
data/README.md
CHANGED
@@ -97,54 +97,29 @@ Including monkey patches will give you `Enumerable` access to statistics.
|
|
97
97
|
|
98
98
|
## Benchmarks
|
99
99
|
|
100
|
-
All benchmarks are
|
100
|
+
All benchmarks are executed using the baseline summary (exact same calculations for each)
|
101
|
+
and the full summary (all available calculations for each). Each is generated
|
102
|
+
using an array containing 1 million random integers on the `2.6.3` Ruby version.
|
101
103
|
|
102
|
-
|
104
|
+
View all how it compares to other libs by running the [benchmarks](https://github.com/drexed/lite-statistics/tree/master/benchmarks).
|
103
105
|
|
104
|
-
|
105
|
-
# From the project folder:
|
106
|
-
ruby benchmarks/descriptive_statistics.rb
|
107
|
-
```
|
106
|
+
#### Baseline summaries
|
108
107
|
|
109
|
-
|
108
|
+
Library | # of Calculations | IPS | Speed
|
109
|
+
--- | --- | --- | ---
|
110
|
+
[lite-statistics](https://github.com/drexed/lite-statistics) | 13 | 2.5 i/s | ---
|
111
|
+
[descriptive_statistics](https://github.com/thirtysixthspan/descriptive_statistics) | 13 | 0.6 i/s | 4.16x slower
|
112
|
+
[descriptive-statistics](https://github.com/jtescher/descriptive-statistics) | 13 | 1.8 i/s | 1.40x slower
|
113
|
+
[statistica](https://github.com/tab300/Statistica) | 13 | --- | Too slow to run
|
110
114
|
|
111
|
-
|
115
|
+
#### Full summaries
|
112
116
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
descriptive_statistics
|
120
|
-
1.000 i/100ms
|
121
|
-
Calculating -------------------------------------
|
122
|
-
lite-statistics 1.191 (± 0.0%) i/s - 6.000 in 5.049012s
|
123
|
-
descriptive_statistics
|
124
|
-
0.560 (± 0.0%) i/s - 3.000 in 5.357009s
|
125
|
-
|
126
|
-
Comparison:
|
127
|
-
lite-statistics: 1.2 i/s
|
128
|
-
descriptive_statistics: 0.6 i/s - 2.13x slower
|
129
|
-
```
|
130
|
-
|
131
|
-
```ruby
|
132
|
-
# lite-statistics => 13 calculations
|
133
|
-
# descriptive_statistics => 13 calculations
|
134
|
-
|
135
|
-
Warming up --------------------------------------
|
136
|
-
lite-statistics 1.000 i/100ms
|
137
|
-
descriptive_statistics
|
138
|
-
1.000 i/100ms
|
139
|
-
Calculating -------------------------------------
|
140
|
-
lite-statistics 2.820 (± 0.0%) i/s - 15.000 in 5.328247s
|
141
|
-
descriptive_statistics
|
142
|
-
0.537 (± 0.0%) i/s - 3.000 in 5.589307s
|
143
|
-
|
144
|
-
Comparison:
|
145
|
-
lite-statistics: 2.8 i/s
|
146
|
-
descriptive_statistics: 0.5 i/s - 5.25x slower
|
147
|
-
```
|
117
|
+
Library | # of Calculations | IPS | Speed
|
118
|
+
--- | --- | --- | ---
|
119
|
+
[lite-statistics](https://github.com/drexed/lite-statistics) | 22 | 1.0 i/s | ---
|
120
|
+
[descriptive_statistics](https://github.com/thirtysixthspan/descriptive_statistics) | 13 | 0.6 i/s | 1.72x slower
|
121
|
+
[descriptive-statistics](https://github.com/jtescher/descriptive-statistics) | 16 | 0.9 i/s | 1.10x slower
|
122
|
+
[statistica](https://github.com/tab300/Statistica) | 19 | --- | Too slow to run
|
148
123
|
|
149
124
|
## Development
|
150
125
|
|
data/benchmarks/base.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
%w[lib benchmarks].each { |name| $LOAD_PATH.unshift(name) }
|
4
|
+
|
5
|
+
require 'benchmark/ips'
|
6
|
+
require 'lite/statistics'
|
7
|
+
|
8
|
+
# rubocop:disable Metrics/MethodLength
|
9
|
+
def ls_baseline_summary(data)
|
10
|
+
{
|
11
|
+
max: data.max,
|
12
|
+
mean: data.mean,
|
13
|
+
median: data.median,
|
14
|
+
min: data.min,
|
15
|
+
mode: data.mode,
|
16
|
+
quartile_1: data.value_from_percentile(25),
|
17
|
+
quartile_2: data.value_from_percentile(50),
|
18
|
+
quartile_3: data.value_from_percentile(75),
|
19
|
+
range: data.range,
|
20
|
+
size: data.size,
|
21
|
+
sum: data.sum,
|
22
|
+
standard_deviation: data.sample_standard_deviation,
|
23
|
+
variance: data.sample_variance
|
24
|
+
}
|
25
|
+
end
|
26
|
+
# rubocop:enable Metrics/MethodLength
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'descriptive-statistics'
|
4
|
+
require_relative 'base'
|
5
|
+
|
6
|
+
collection = []
|
7
|
+
1_000_000.times { collection << rand(1..99) }
|
8
|
+
|
9
|
+
puts '~~~ Baseline Summary Calculations ~~~'
|
10
|
+
puts
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
def baseline_summary(data)
|
14
|
+
{
|
15
|
+
max: data.max,
|
16
|
+
mean: data.mean,
|
17
|
+
median: data.median,
|
18
|
+
min: data.min,
|
19
|
+
mode: data.mode,
|
20
|
+
quartile_1: data.value_from_percentile(25),
|
21
|
+
quartile_2: data.value_from_percentile(50),
|
22
|
+
quartile_3: data.value_from_percentile(75),
|
23
|
+
range: data.range,
|
24
|
+
size: data.size,
|
25
|
+
sum: data.sum,
|
26
|
+
standard_deviation: data.standard_deviation,
|
27
|
+
variance: data.variance
|
28
|
+
}
|
29
|
+
end
|
30
|
+
# rubocop:enable Metrics/MethodLength
|
31
|
+
|
32
|
+
Benchmark.ips do |x|
|
33
|
+
x.report('LS.13') do
|
34
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
35
|
+
ls_baseline_summary(data)
|
36
|
+
end
|
37
|
+
|
38
|
+
x.report('DS.13') do
|
39
|
+
data = DescriptiveStatistics::Stats.new(collection)
|
40
|
+
baseline_summary(data)
|
41
|
+
end
|
42
|
+
|
43
|
+
x.compare!
|
44
|
+
end
|
45
|
+
|
46
|
+
puts '~~~ Full Summary Calculations ~~~'
|
47
|
+
puts
|
48
|
+
|
49
|
+
def full_summary(data)
|
50
|
+
baseline_summary(data).merge(
|
51
|
+
relative_standard_deviation: data.relative_standard_deviation,
|
52
|
+
skewness: data.skewness,
|
53
|
+
kurtosis: data.kurtosis
|
54
|
+
)
|
55
|
+
end
|
56
|
+
|
57
|
+
Benchmark.ips do |x|
|
58
|
+
x.report('LS.22') do
|
59
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
60
|
+
data.sample_summary
|
61
|
+
end
|
62
|
+
|
63
|
+
x.report('DS.16') do
|
64
|
+
data = DescriptiveStatistics::Stats.new(collection)
|
65
|
+
full_summary(data)
|
66
|
+
end
|
67
|
+
|
68
|
+
x.compare!
|
69
|
+
end
|
@@ -1,22 +1,20 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
%w[lib benchmarks].each { |name| $LOAD_PATH.unshift(name) }
|
4
|
-
|
5
|
-
require 'benchmark/ips'
|
6
|
-
require 'lite/statistics'
|
7
3
|
require 'descriptive_statistics/safe'
|
4
|
+
require_relative 'base'
|
8
5
|
|
9
6
|
collection = []
|
10
7
|
1_000_000.times { collection << rand(1..99) }
|
11
8
|
|
12
|
-
puts '~~~
|
9
|
+
puts '~~~ Baseline Summary Calculations ~~~'
|
10
|
+
puts
|
13
11
|
Benchmark.ips do |x|
|
14
|
-
x.report('
|
12
|
+
x.report('LS.13') do
|
15
13
|
data = Lite::Statistics::Descriptive.new(collection)
|
16
|
-
data
|
14
|
+
ls_baseline_summary(data)
|
17
15
|
end
|
18
16
|
|
19
|
-
x.report('
|
17
|
+
x.report('DS.13') do
|
20
18
|
data = DescriptiveStatistics::Stats.new(collection)
|
21
19
|
data.descriptive_statistics
|
22
20
|
end
|
@@ -24,34 +22,15 @@ Benchmark.ips do |x|
|
|
24
22
|
x.compare!
|
25
23
|
end
|
26
24
|
|
27
|
-
|
28
|
-
|
29
|
-
{
|
30
|
-
max: data.max,
|
31
|
-
mean: data.mean,
|
32
|
-
median: data.median,
|
33
|
-
min: data.min,
|
34
|
-
mode: data.mode,
|
35
|
-
quartile_1: data.value_from_percentile(25),
|
36
|
-
quartile_2: data.value_from_percentile(50),
|
37
|
-
quartile_3: data.value_from_percentile(75),
|
38
|
-
range: data.range,
|
39
|
-
size: data.size,
|
40
|
-
sum: data.sum,
|
41
|
-
sample_standard_deviation: data.sample_standard_deviation,
|
42
|
-
sample_variance: data.sample_variance
|
43
|
-
}
|
44
|
-
end
|
45
|
-
# rubocop:enable Metrics/MethodLength
|
46
|
-
|
47
|
-
puts '~~~ Equal Summary Size Calculations ~~~'
|
25
|
+
puts '~~~ Full Summary Calculations ~~~'
|
26
|
+
puts
|
48
27
|
Benchmark.ips do |x|
|
49
|
-
x.report('
|
28
|
+
x.report('LS.22') do
|
50
29
|
data = Lite::Statistics::Descriptive.new(collection)
|
51
|
-
|
30
|
+
data.sample_summary
|
52
31
|
end
|
53
32
|
|
54
|
-
x.report('
|
33
|
+
x.report('DS.13') do
|
55
34
|
data = DescriptiveStatistics::Stats.new(collection)
|
56
35
|
data.descriptive_statistics
|
57
36
|
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'statistica'
|
4
|
+
require_relative 'base'
|
5
|
+
|
6
|
+
collection = []
|
7
|
+
1_000_000.times { collection << rand(1..99) }
|
8
|
+
|
9
|
+
puts '~~~ Baseline Summary Calculations ~~~'
|
10
|
+
puts
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
def baseline_summary(data)
|
14
|
+
{
|
15
|
+
max: data.max,
|
16
|
+
mean: data.get_arith_mean,
|
17
|
+
median: data.get_median,
|
18
|
+
min: data.min,
|
19
|
+
mode: data.get_mode,
|
20
|
+
quartile_1: data.get_percentile(25),
|
21
|
+
quartile_2: data.get_percentile(50),
|
22
|
+
quartile_3: data.get_percentile(75),
|
23
|
+
range: data.get_range,
|
24
|
+
size: data.size,
|
25
|
+
sum: data.get_sum,
|
26
|
+
standard_deviation: data.get_stdev,
|
27
|
+
variance: data.get_variance
|
28
|
+
}
|
29
|
+
end
|
30
|
+
# rubocop:enable Metrics/MethodLength
|
31
|
+
|
32
|
+
Benchmark.ips do |x|
|
33
|
+
x.report('LS.13') do
|
34
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
35
|
+
ls_baseline_summary(data)
|
36
|
+
end
|
37
|
+
|
38
|
+
x.report('ST.13') do
|
39
|
+
baseline_summary(collection)
|
40
|
+
end
|
41
|
+
|
42
|
+
x.compare!
|
43
|
+
end
|
44
|
+
|
45
|
+
puts '~~~ Full Summary Calculations ~~~'
|
46
|
+
puts
|
47
|
+
|
48
|
+
def full_summary(data)
|
49
|
+
baseline_summary(data).merge(
|
50
|
+
geometric_mean: data.get_geo_mean,
|
51
|
+
harmonic_mean: data.get_harm_mean,
|
52
|
+
product: data.get_product,
|
53
|
+
interquartile_range: data.get_interquartile_range,
|
54
|
+
relative_variance: data.get_relative_variance,
|
55
|
+
coeff_variation: data.get_coeff_variation
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
Benchmark.ips do |x|
|
60
|
+
x.report('LS.22') do
|
61
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
62
|
+
data.sample_summary
|
63
|
+
end
|
64
|
+
|
65
|
+
x.report('ST.19') do
|
66
|
+
full_summary(collection)
|
67
|
+
end
|
68
|
+
|
69
|
+
x.compare!
|
70
|
+
end
|
data/lib/lite/statistics.rb
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
module Lite
|
4
4
|
module Statistics
|
5
|
-
class Descriptive
|
5
|
+
class Descriptive
|
6
|
+
|
7
|
+
extend Lite::Memoize::Alias
|
6
8
|
|
7
9
|
CALCULATIONS ||= %i[
|
8
10
|
frequencies max mean median midrange min mode proportions percentile_from_value
|
@@ -29,125 +31,123 @@ module Lite
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def frequencies
|
32
|
-
|
33
|
-
return if @collection.empty?
|
34
|
+
return if @collection.empty?
|
34
35
|
|
35
|
-
|
36
|
-
end
|
36
|
+
@collection.each_with_object(Hash.new(0)) { |val, hash| hash[val] += 1 }
|
37
37
|
end
|
38
38
|
|
39
|
+
memoize :frequencies
|
40
|
+
|
39
41
|
# rubocop:disable Style/UnneededSort
|
40
42
|
def max
|
41
|
-
|
42
|
-
return if @collection.empty?
|
43
|
+
return if @collection.empty?
|
43
44
|
|
44
|
-
|
45
|
-
end
|
45
|
+
sort.last
|
46
46
|
end
|
47
|
+
|
48
|
+
memoize :max
|
47
49
|
# rubocop:enable Style/UnneededSort
|
48
50
|
|
49
51
|
def mean
|
50
|
-
|
51
|
-
return if @collection.empty?
|
52
|
+
return if @collection.empty?
|
52
53
|
|
53
|
-
|
54
|
-
end
|
54
|
+
sum / size.to_f
|
55
55
|
end
|
56
56
|
|
57
|
+
memoize :mean
|
57
58
|
alias average mean
|
58
59
|
|
59
60
|
# rubocop:disable Metrics/AbcSize
|
60
61
|
def median
|
61
|
-
|
62
|
-
|
63
|
-
return sort[size / 2] unless size.even?
|
62
|
+
return if @collection.empty?
|
63
|
+
return sort[size / 2] unless size.even?
|
64
64
|
|
65
|
-
|
66
|
-
end
|
65
|
+
(sort[(size / 2) - 1] + sort[size / 2]) / 2.0
|
67
66
|
end
|
67
|
+
|
68
|
+
memoize :median
|
68
69
|
# rubocop:enable Metrics/AbcSize
|
69
70
|
|
70
71
|
# rubocop:disable Style/UnneededSort
|
71
72
|
def min
|
72
|
-
|
73
|
-
return if @collection.empty?
|
73
|
+
return if @collection.empty?
|
74
74
|
|
75
|
-
|
76
|
-
end
|
75
|
+
sort.first
|
77
76
|
end
|
77
|
+
|
78
|
+
memoize :min
|
78
79
|
# rubocop:enable Style/UnneededSort
|
79
80
|
|
80
81
|
def mode
|
81
|
-
|
82
|
-
return if @collection.empty?
|
82
|
+
return if @collection.empty?
|
83
83
|
|
84
|
-
|
85
|
-
|
84
|
+
top_two = frequencies.sort_by { |_, val| -val }.take(2)
|
85
|
+
return if top_two.first.last == top_two.last.last
|
86
86
|
|
87
|
-
|
88
|
-
end
|
87
|
+
top_two.first.first
|
89
88
|
end
|
90
89
|
|
90
|
+
memoize :mode
|
91
|
+
|
91
92
|
def percentile_from_value(value)
|
92
|
-
|
93
|
-
return if @collection.empty?
|
93
|
+
return if @collection.empty?
|
94
94
|
|
95
|
-
|
96
|
-
end
|
95
|
+
(sort.index(value) / size.to_f * 100).ceil
|
97
96
|
end
|
98
97
|
|
98
|
+
memoize :percentile_from_value
|
99
99
|
alias percentile percentile_from_value
|
100
100
|
|
101
101
|
def population_coefficient_of_variation
|
102
|
-
|
103
|
-
return if @collection.empty?
|
102
|
+
return if @collection.empty?
|
104
103
|
|
105
|
-
|
106
|
-
end
|
104
|
+
population_standard_deviation / mean
|
107
105
|
end
|
108
106
|
|
107
|
+
memoize :population_coefficient_of_variation
|
108
|
+
|
109
109
|
def population_size
|
110
|
-
|
111
|
-
@collection.size - 1
|
112
|
-
end
|
110
|
+
@collection.size - 1
|
113
111
|
end
|
114
112
|
|
113
|
+
memoize :population_size
|
114
|
+
|
115
115
|
def population_kurtosis
|
116
|
-
|
117
|
-
|
118
|
-
return 0 if size == 1
|
116
|
+
return if @collection.empty?
|
117
|
+
return 0 if size == 1
|
119
118
|
|
120
|
-
|
121
|
-
|
122
|
-
end
|
119
|
+
quarted_standard_deviation = population_standard_deviation**4
|
120
|
+
sum_of_power(4) / (population_size * quarted_standard_deviation.to_f)
|
123
121
|
end
|
124
122
|
|
123
|
+
memoize :population_kurtosis
|
124
|
+
|
125
125
|
def population_skewness
|
126
|
-
|
127
|
-
|
128
|
-
return 0 if size == 1
|
126
|
+
return if @collection.empty?
|
127
|
+
return 0 if size == 1
|
129
128
|
|
130
|
-
|
131
|
-
|
132
|
-
end
|
129
|
+
cubed_standard_deviation = population_standard_deviation**3
|
130
|
+
sum_of_power(3) / (population_size * cubed_standard_deviation.to_f)
|
133
131
|
end
|
134
132
|
|
133
|
+
memoize :population_skewness
|
134
|
+
|
135
135
|
def population_standard_deviation
|
136
|
-
|
137
|
-
return if @collection.empty?
|
136
|
+
return if @collection.empty?
|
138
137
|
|
139
|
-
|
140
|
-
end
|
138
|
+
Math.sqrt(population_variance)
|
141
139
|
end
|
142
140
|
|
141
|
+
memoize :population_standard_deviation
|
142
|
+
|
143
143
|
def population_standard_error
|
144
|
-
|
145
|
-
return if @collection.empty?
|
144
|
+
return if @collection.empty?
|
146
145
|
|
147
|
-
|
148
|
-
end
|
146
|
+
population_standard_deviation / Math.sqrt(population_size)
|
149
147
|
end
|
150
148
|
|
149
|
+
memoize :population_standard_error
|
150
|
+
|
151
151
|
def population_summary
|
152
152
|
base_summary.merge(
|
153
153
|
population_coefficient_of_variation: population_coefficient_of_variation,
|
@@ -162,110 +162,103 @@ module Lite
|
|
162
162
|
end
|
163
163
|
|
164
164
|
def population_variance
|
165
|
-
|
166
|
-
return if @collection.empty?
|
165
|
+
return if @collection.empty?
|
167
166
|
|
168
|
-
|
169
|
-
end
|
167
|
+
sum_of_power(2) / population_size.to_f
|
170
168
|
end
|
171
169
|
|
170
|
+
memoize :population_variance
|
171
|
+
|
172
172
|
def population_zscore
|
173
|
-
|
174
|
-
|
175
|
-
return Hash.new(0) if population_standard_deviation.zero?
|
173
|
+
return if size < 2
|
174
|
+
return Hash.new(0) if population_standard_deviation.zero?
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
end
|
176
|
+
@collection.each_with_object({}) do |val, hash|
|
177
|
+
hash[val] ||= (val - mean) / population_standard_deviation
|
180
178
|
end
|
181
179
|
end
|
182
180
|
|
181
|
+
memoize :population_zscore
|
182
|
+
|
183
183
|
def midrange
|
184
|
-
|
185
|
-
return if @collection.empty?
|
184
|
+
return if @collection.empty?
|
186
185
|
|
187
|
-
|
188
|
-
end
|
186
|
+
[min, max].sum / 2.0
|
189
187
|
end
|
190
188
|
|
189
|
+
memoize :midrange
|
191
190
|
alias midextreme midrange
|
192
191
|
|
193
192
|
def proportions
|
194
|
-
|
195
|
-
return if @collection.empty?
|
193
|
+
return if @collection.empty?
|
196
194
|
|
197
|
-
|
198
|
-
end
|
195
|
+
frequencies.each_with_object({}) { |(key, val), hash| hash[key] = val / size.to_f }
|
199
196
|
end
|
200
197
|
|
198
|
+
memoize :proportions
|
199
|
+
|
201
200
|
def range
|
202
|
-
|
203
|
-
return if @collection.empty?
|
201
|
+
return if @collection.empty?
|
204
202
|
|
205
|
-
|
206
|
-
end
|
203
|
+
max - min
|
207
204
|
end
|
208
205
|
|
206
|
+
memoize :range
|
207
|
+
|
209
208
|
def sample_coefficient_of_variation
|
210
|
-
|
211
|
-
return if @collection.empty?
|
209
|
+
return if @collection.empty?
|
212
210
|
|
213
|
-
|
214
|
-
end
|
211
|
+
sample_standard_deviation / mean
|
215
212
|
end
|
216
213
|
|
214
|
+
memoize :sample_coefficient_of_variation
|
217
215
|
alias coefficient_of_variation sample_coefficient_of_variation
|
218
216
|
|
219
217
|
def sample_size
|
220
|
-
|
221
|
-
@collection.size
|
222
|
-
end
|
218
|
+
@collection.size
|
223
219
|
end
|
224
220
|
|
221
|
+
memoize :sample_size
|
225
222
|
alias size sample_size
|
226
223
|
|
227
224
|
def sample_kurtosis
|
228
|
-
|
229
|
-
|
230
|
-
return 0 if size == 1
|
225
|
+
return if @collection.empty?
|
226
|
+
return 0 if size == 1
|
231
227
|
|
232
|
-
|
233
|
-
|
234
|
-
end
|
228
|
+
quarted_standard_deviation = sample_standard_deviation**4
|
229
|
+
sum_of_power(4) / (sample_size * quarted_standard_deviation.to_f)
|
235
230
|
end
|
236
231
|
|
232
|
+
memoize :sample_kurtosis
|
237
233
|
alias kurtosis sample_kurtosis
|
238
234
|
|
239
235
|
def sample_skewness
|
240
|
-
|
241
|
-
|
242
|
-
return 0 if size == 1
|
236
|
+
return if @collection.empty?
|
237
|
+
return 0 if size == 1
|
243
238
|
|
244
|
-
|
245
|
-
|
246
|
-
end
|
239
|
+
cubed_standard_deviation = sample_standard_deviation**3
|
240
|
+
sum_of_power(3) / (sample_size * cubed_standard_deviation.to_f)
|
247
241
|
end
|
248
242
|
|
243
|
+
memoize :sample_skewness
|
249
244
|
alias skewness sample_skewness
|
250
245
|
|
251
246
|
def sample_standard_deviation
|
252
|
-
|
253
|
-
return if @collection.empty?
|
247
|
+
return if @collection.empty?
|
254
248
|
|
255
|
-
|
256
|
-
end
|
249
|
+
Math.sqrt(sample_variance)
|
257
250
|
end
|
258
251
|
|
252
|
+
memoize :sample_standard_deviation
|
259
253
|
alias standard_deviation sample_standard_deviation
|
260
254
|
|
261
255
|
def sample_standard_error
|
262
|
-
|
263
|
-
return if @collection.empty?
|
256
|
+
return if @collection.empty?
|
264
257
|
|
265
|
-
|
266
|
-
end
|
258
|
+
sample_standard_deviation / Math.sqrt(sample_size)
|
267
259
|
end
|
268
260
|
|
261
|
+
memoize :sample_standard_error
|
269
262
|
alias standard_error sample_standard_error
|
270
263
|
|
271
264
|
def sample_summary
|
@@ -284,43 +277,41 @@ module Lite
|
|
284
277
|
alias summary sample_summary
|
285
278
|
|
286
279
|
def sample_variance
|
287
|
-
|
288
|
-
return if @collection.empty?
|
280
|
+
return if @collection.empty?
|
289
281
|
|
290
|
-
|
291
|
-
end
|
282
|
+
sum_of_power(2) / sample_size.to_f
|
292
283
|
end
|
293
284
|
|
285
|
+
memoize :sample_variance
|
294
286
|
alias variance sample_variance
|
295
287
|
|
288
|
+
# TODO: rename this to zscores
|
296
289
|
def sample_zscore
|
297
|
-
|
298
|
-
|
299
|
-
return Hash.new(0) if sample_standard_deviation.zero?
|
290
|
+
return if size < 2
|
291
|
+
return Hash.new(0) if sample_standard_deviation.zero?
|
300
292
|
|
301
|
-
|
302
|
-
|
303
|
-
end
|
293
|
+
@collection.each_with_object({}) do |val, hash|
|
294
|
+
hash[val] ||= (val - mean) / sample_standard_deviation
|
304
295
|
end
|
305
296
|
end
|
306
297
|
|
298
|
+
memoize :sample_zscore
|
307
299
|
alias zscore sample_zscore
|
308
300
|
|
309
301
|
def sum
|
310
|
-
|
311
|
-
@collection.sum
|
312
|
-
end
|
302
|
+
@collection.sum
|
313
303
|
end
|
314
304
|
|
305
|
+
memoize :sum
|
306
|
+
|
315
307
|
def value_from_percentile(percentile)
|
316
|
-
|
317
|
-
return if @collection.empty?
|
308
|
+
return if @collection.empty?
|
318
309
|
|
319
|
-
|
320
|
-
|
321
|
-
end
|
310
|
+
index = (percentile.to_f / 100 * size).ceil
|
311
|
+
sort[index]
|
322
312
|
end
|
323
313
|
|
314
|
+
memoize :value_from_percentile
|
324
315
|
alias percentile_rank value_from_percentile
|
325
316
|
|
326
317
|
private
|
@@ -347,11 +338,11 @@ module Lite
|
|
347
338
|
# rubocop:enable Metrics/MethodLength
|
348
339
|
|
349
340
|
def sort
|
350
|
-
|
351
|
-
@collection.sort
|
352
|
-
end
|
341
|
+
@collection.sort
|
353
342
|
end
|
354
343
|
|
344
|
+
memoize :sort
|
345
|
+
|
355
346
|
def sum_of_power(power)
|
356
347
|
@collection.inject(0) { |acc, val| acc + (val - mean)**power }
|
357
348
|
end
|
data/lite-statistics.gemspec
CHANGED
@@ -37,6 +37,8 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
38
38
|
spec.require_paths = %w[lib]
|
39
39
|
|
40
|
+
spec.add_runtime_dependency 'lite-memoize'
|
41
|
+
|
40
42
|
spec.add_development_dependency 'bundler'
|
41
43
|
spec.add_development_dependency 'fasterer'
|
42
44
|
spec.add_development_dependency 'generator_spec'
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lite-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Juan Gomez
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: lite-memoize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,7 +156,10 @@ files:
|
|
142
156
|
- README.md
|
143
157
|
- Rakefile
|
144
158
|
- _config.yml
|
159
|
+
- benchmarks/base.rb
|
160
|
+
- benchmarks/descriptive-statistics.rb
|
145
161
|
- benchmarks/descriptive_statistics.rb
|
162
|
+
- benchmarks/statistica.rb
|
146
163
|
- bin/console
|
147
164
|
- bin/setup
|
148
165
|
- docs/.DS_Store
|
@@ -170,7 +187,6 @@ files:
|
|
170
187
|
- lib/generators/lite/statistics/install_generator.rb
|
171
188
|
- lib/generators/lite/statistics/templates/install.rb
|
172
189
|
- lib/lite/statistics.rb
|
173
|
-
- lib/lite/statistics/base.rb
|
174
190
|
- lib/lite/statistics/configuration.rb
|
175
191
|
- lib/lite/statistics/descriptive.rb
|
176
192
|
- lib/lite/statistics/enumerable.rb
|
data/lib/lite/statistics/base.rb
DELETED