lite-statistics 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +4 -2
- data/README.md +18 -43
- data/benchmarks/base.rb +26 -0
- data/benchmarks/descriptive-statistics.rb +69 -0
- data/benchmarks/descriptive_statistics.rb +11 -32
- data/benchmarks/statistica.rb +70 -0
- data/lib/lite/statistics.rb +3 -1
- data/lib/lite/statistics/descriptive.rb +120 -129
- data/lib/lite/statistics/version.rb +1 -1
- data/lite-statistics.gemspec +2 -0
- metadata +19 -3
- data/lib/lite/statistics/base.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06e80ca142cea5c2b47ca54d08b93f50807b5dba0f159783850382351d686b68
|
4
|
+
data.tar.gz: 9b39a00bc5191edfb9f3546ed6c9586ce8ef0e8232e59aee68218bfdb1bdb37a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a794cb73651bf86dc994ab47c405cc78ed3f29dfabf164638f9d0797f068978e11a1781d56351c1db8b9023be3d28292812edd9034ac9cf9b74ce68e7c3d004
|
7
|
+
data.tar.gz: ba5b2b354eedd4d98e95cf3691480e70110591e6f67b55d640af9d7e47accb41d17e1707c6f261626d2cc7570deef912a4aab513829eac66ca86ebd59cbe4ce0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [1.0.1] - 2019-07-18
|
10
|
+
### Changed
|
11
|
+
- Memoization to use Lite::Memoize::Alias for more speed
|
12
|
+
- Rename zscore to zscores
|
13
|
+
- Rework benchmarks
|
14
|
+
|
9
15
|
## [1.0.0] - 2019-07-13
|
10
16
|
### Added
|
11
17
|
- Initial project version
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
lite-statistics (1.0.
|
4
|
+
lite-statistics (1.0.1)
|
5
|
+
lite-memoize
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
@@ -40,6 +41,7 @@ GEM
|
|
40
41
|
i18n (1.6.0)
|
41
42
|
concurrent-ruby (~> 1.0)
|
42
43
|
jaro_winkler (1.5.3)
|
44
|
+
lite-memoize (1.0.1)
|
43
45
|
loofah (2.2.3)
|
44
46
|
crass (~> 1.0.2)
|
45
47
|
nokogiri (>= 1.5.9)
|
@@ -80,7 +82,7 @@ GEM
|
|
80
82
|
diff-lcs (>= 1.2.0, < 2.0)
|
81
83
|
rspec-support (~> 3.8.0)
|
82
84
|
rspec-support (3.8.2)
|
83
|
-
rubocop (0.
|
85
|
+
rubocop (0.73.0)
|
84
86
|
jaro_winkler (~> 1.5.1)
|
85
87
|
parallel (~> 1.10)
|
86
88
|
parser (>= 2.6)
|
data/README.md
CHANGED
@@ -97,54 +97,29 @@ Including monkey patches will give you `Enumerable` access to statistics.
|
|
97
97
|
|
98
98
|
## Benchmarks
|
99
99
|
|
100
|
-
All benchmarks are
|
100
|
+
All benchmarks are executed using the baseline summary (exact same calculations for each)
|
101
|
+
and the full summary (all available calculations for each). Each is generated
|
102
|
+
using an array containing 1 million random integers on the `2.6.3` Ruby version.
|
101
103
|
|
102
|
-
|
104
|
+
View all how it compares to other libs by running the [benchmarks](https://github.com/drexed/lite-statistics/tree/master/benchmarks).
|
103
105
|
|
104
|
-
|
105
|
-
# From the project folder:
|
106
|
-
ruby benchmarks/descriptive_statistics.rb
|
107
|
-
```
|
106
|
+
#### Baseline summaries
|
108
107
|
|
109
|
-
|
108
|
+
Library | # of Calculations | IPS | Speed
|
109
|
+
--- | --- | --- | ---
|
110
|
+
[lite-statistics](https://github.com/drexed/lite-statistics) | 13 | 2.5 i/s | ---
|
111
|
+
[descriptive_statistics](https://github.com/thirtysixthspan/descriptive_statistics) | 13 | 0.6 i/s | 4.16x slower
|
112
|
+
[descriptive-statistics](https://github.com/jtescher/descriptive-statistics) | 13 | 1.8 i/s | 1.40x slower
|
113
|
+
[statistica](https://github.com/tab300/Statistica) | 13 | --- | Too slow to run
|
110
114
|
|
111
|
-
|
115
|
+
#### Full summaries
|
112
116
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
descriptive_statistics
|
120
|
-
1.000 i/100ms
|
121
|
-
Calculating -------------------------------------
|
122
|
-
lite-statistics 1.191 (± 0.0%) i/s - 6.000 in 5.049012s
|
123
|
-
descriptive_statistics
|
124
|
-
0.560 (± 0.0%) i/s - 3.000 in 5.357009s
|
125
|
-
|
126
|
-
Comparison:
|
127
|
-
lite-statistics: 1.2 i/s
|
128
|
-
descriptive_statistics: 0.6 i/s - 2.13x slower
|
129
|
-
```
|
130
|
-
|
131
|
-
```ruby
|
132
|
-
# lite-statistics => 13 calculations
|
133
|
-
# descriptive_statistics => 13 calculations
|
134
|
-
|
135
|
-
Warming up --------------------------------------
|
136
|
-
lite-statistics 1.000 i/100ms
|
137
|
-
descriptive_statistics
|
138
|
-
1.000 i/100ms
|
139
|
-
Calculating -------------------------------------
|
140
|
-
lite-statistics 2.820 (± 0.0%) i/s - 15.000 in 5.328247s
|
141
|
-
descriptive_statistics
|
142
|
-
0.537 (± 0.0%) i/s - 3.000 in 5.589307s
|
143
|
-
|
144
|
-
Comparison:
|
145
|
-
lite-statistics: 2.8 i/s
|
146
|
-
descriptive_statistics: 0.5 i/s - 5.25x slower
|
147
|
-
```
|
117
|
+
Library | # of Calculations | IPS | Speed
|
118
|
+
--- | --- | --- | ---
|
119
|
+
[lite-statistics](https://github.com/drexed/lite-statistics) | 22 | 1.0 i/s | ---
|
120
|
+
[descriptive_statistics](https://github.com/thirtysixthspan/descriptive_statistics) | 13 | 0.6 i/s | 1.72x slower
|
121
|
+
[descriptive-statistics](https://github.com/jtescher/descriptive-statistics) | 16 | 0.9 i/s | 1.10x slower
|
122
|
+
[statistica](https://github.com/tab300/Statistica) | 19 | --- | Too slow to run
|
148
123
|
|
149
124
|
## Development
|
150
125
|
|
data/benchmarks/base.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
%w[lib benchmarks].each { |name| $LOAD_PATH.unshift(name) }
|
4
|
+
|
5
|
+
require 'benchmark/ips'
|
6
|
+
require 'lite/statistics'
|
7
|
+
|
8
|
+
# rubocop:disable Metrics/MethodLength
|
9
|
+
def ls_baseline_summary(data)
|
10
|
+
{
|
11
|
+
max: data.max,
|
12
|
+
mean: data.mean,
|
13
|
+
median: data.median,
|
14
|
+
min: data.min,
|
15
|
+
mode: data.mode,
|
16
|
+
quartile_1: data.value_from_percentile(25),
|
17
|
+
quartile_2: data.value_from_percentile(50),
|
18
|
+
quartile_3: data.value_from_percentile(75),
|
19
|
+
range: data.range,
|
20
|
+
size: data.size,
|
21
|
+
sum: data.sum,
|
22
|
+
standard_deviation: data.sample_standard_deviation,
|
23
|
+
variance: data.sample_variance
|
24
|
+
}
|
25
|
+
end
|
26
|
+
# rubocop:enable Metrics/MethodLength
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'descriptive-statistics'
|
4
|
+
require_relative 'base'
|
5
|
+
|
6
|
+
collection = []
|
7
|
+
1_000_000.times { collection << rand(1..99) }
|
8
|
+
|
9
|
+
puts '~~~ Baseline Summary Calculations ~~~'
|
10
|
+
puts
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
def baseline_summary(data)
|
14
|
+
{
|
15
|
+
max: data.max,
|
16
|
+
mean: data.mean,
|
17
|
+
median: data.median,
|
18
|
+
min: data.min,
|
19
|
+
mode: data.mode,
|
20
|
+
quartile_1: data.value_from_percentile(25),
|
21
|
+
quartile_2: data.value_from_percentile(50),
|
22
|
+
quartile_3: data.value_from_percentile(75),
|
23
|
+
range: data.range,
|
24
|
+
size: data.size,
|
25
|
+
sum: data.sum,
|
26
|
+
standard_deviation: data.standard_deviation,
|
27
|
+
variance: data.variance
|
28
|
+
}
|
29
|
+
end
|
30
|
+
# rubocop:enable Metrics/MethodLength
|
31
|
+
|
32
|
+
Benchmark.ips do |x|
|
33
|
+
x.report('LS.13') do
|
34
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
35
|
+
ls_baseline_summary(data)
|
36
|
+
end
|
37
|
+
|
38
|
+
x.report('DS.13') do
|
39
|
+
data = DescriptiveStatistics::Stats.new(collection)
|
40
|
+
baseline_summary(data)
|
41
|
+
end
|
42
|
+
|
43
|
+
x.compare!
|
44
|
+
end
|
45
|
+
|
46
|
+
puts '~~~ Full Summary Calculations ~~~'
|
47
|
+
puts
|
48
|
+
|
49
|
+
def full_summary(data)
|
50
|
+
baseline_summary(data).merge(
|
51
|
+
relative_standard_deviation: data.relative_standard_deviation,
|
52
|
+
skewness: data.skewness,
|
53
|
+
kurtosis: data.kurtosis
|
54
|
+
)
|
55
|
+
end
|
56
|
+
|
57
|
+
Benchmark.ips do |x|
|
58
|
+
x.report('LS.22') do
|
59
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
60
|
+
data.sample_summary
|
61
|
+
end
|
62
|
+
|
63
|
+
x.report('DS.16') do
|
64
|
+
data = DescriptiveStatistics::Stats.new(collection)
|
65
|
+
full_summary(data)
|
66
|
+
end
|
67
|
+
|
68
|
+
x.compare!
|
69
|
+
end
|
@@ -1,22 +1,20 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
%w[lib benchmarks].each { |name| $LOAD_PATH.unshift(name) }
|
4
|
-
|
5
|
-
require 'benchmark/ips'
|
6
|
-
require 'lite/statistics'
|
7
3
|
require 'descriptive_statistics/safe'
|
4
|
+
require_relative 'base'
|
8
5
|
|
9
6
|
collection = []
|
10
7
|
1_000_000.times { collection << rand(1..99) }
|
11
8
|
|
12
|
-
puts '~~~
|
9
|
+
puts '~~~ Baseline Summary Calculations ~~~'
|
10
|
+
puts
|
13
11
|
Benchmark.ips do |x|
|
14
|
-
x.report('
|
12
|
+
x.report('LS.13') do
|
15
13
|
data = Lite::Statistics::Descriptive.new(collection)
|
16
|
-
data
|
14
|
+
ls_baseline_summary(data)
|
17
15
|
end
|
18
16
|
|
19
|
-
x.report('
|
17
|
+
x.report('DS.13') do
|
20
18
|
data = DescriptiveStatistics::Stats.new(collection)
|
21
19
|
data.descriptive_statistics
|
22
20
|
end
|
@@ -24,34 +22,15 @@ Benchmark.ips do |x|
|
|
24
22
|
x.compare!
|
25
23
|
end
|
26
24
|
|
27
|
-
|
28
|
-
|
29
|
-
{
|
30
|
-
max: data.max,
|
31
|
-
mean: data.mean,
|
32
|
-
median: data.median,
|
33
|
-
min: data.min,
|
34
|
-
mode: data.mode,
|
35
|
-
quartile_1: data.value_from_percentile(25),
|
36
|
-
quartile_2: data.value_from_percentile(50),
|
37
|
-
quartile_3: data.value_from_percentile(75),
|
38
|
-
range: data.range,
|
39
|
-
size: data.size,
|
40
|
-
sum: data.sum,
|
41
|
-
sample_standard_deviation: data.sample_standard_deviation,
|
42
|
-
sample_variance: data.sample_variance
|
43
|
-
}
|
44
|
-
end
|
45
|
-
# rubocop:enable Metrics/MethodLength
|
46
|
-
|
47
|
-
puts '~~~ Equal Summary Size Calculations ~~~'
|
25
|
+
puts '~~~ Full Summary Calculations ~~~'
|
26
|
+
puts
|
48
27
|
Benchmark.ips do |x|
|
49
|
-
x.report('
|
28
|
+
x.report('LS.22') do
|
50
29
|
data = Lite::Statistics::Descriptive.new(collection)
|
51
|
-
|
30
|
+
data.sample_summary
|
52
31
|
end
|
53
32
|
|
54
|
-
x.report('
|
33
|
+
x.report('DS.13') do
|
55
34
|
data = DescriptiveStatistics::Stats.new(collection)
|
56
35
|
data.descriptive_statistics
|
57
36
|
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'statistica'
|
4
|
+
require_relative 'base'
|
5
|
+
|
6
|
+
collection = []
|
7
|
+
1_000_000.times { collection << rand(1..99) }
|
8
|
+
|
9
|
+
puts '~~~ Baseline Summary Calculations ~~~'
|
10
|
+
puts
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
def baseline_summary(data)
|
14
|
+
{
|
15
|
+
max: data.max,
|
16
|
+
mean: data.get_arith_mean,
|
17
|
+
median: data.get_median,
|
18
|
+
min: data.min,
|
19
|
+
mode: data.get_mode,
|
20
|
+
quartile_1: data.get_percentile(25),
|
21
|
+
quartile_2: data.get_percentile(50),
|
22
|
+
quartile_3: data.get_percentile(75),
|
23
|
+
range: data.get_range,
|
24
|
+
size: data.size,
|
25
|
+
sum: data.get_sum,
|
26
|
+
standard_deviation: data.get_stdev,
|
27
|
+
variance: data.get_variance
|
28
|
+
}
|
29
|
+
end
|
30
|
+
# rubocop:enable Metrics/MethodLength
|
31
|
+
|
32
|
+
Benchmark.ips do |x|
|
33
|
+
x.report('LS.13') do
|
34
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
35
|
+
ls_baseline_summary(data)
|
36
|
+
end
|
37
|
+
|
38
|
+
x.report('ST.13') do
|
39
|
+
baseline_summary(collection)
|
40
|
+
end
|
41
|
+
|
42
|
+
x.compare!
|
43
|
+
end
|
44
|
+
|
45
|
+
puts '~~~ Full Summary Calculations ~~~'
|
46
|
+
puts
|
47
|
+
|
48
|
+
def full_summary(data)
|
49
|
+
baseline_summary(data).merge(
|
50
|
+
geometric_mean: data.get_geo_mean,
|
51
|
+
harmonic_mean: data.get_harm_mean,
|
52
|
+
product: data.get_product,
|
53
|
+
interquartile_range: data.get_interquartile_range,
|
54
|
+
relative_variance: data.get_relative_variance,
|
55
|
+
coeff_variation: data.get_coeff_variation
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
Benchmark.ips do |x|
|
60
|
+
x.report('LS.22') do
|
61
|
+
data = Lite::Statistics::Descriptive.new(collection)
|
62
|
+
data.sample_summary
|
63
|
+
end
|
64
|
+
|
65
|
+
x.report('ST.19') do
|
66
|
+
full_summary(collection)
|
67
|
+
end
|
68
|
+
|
69
|
+
x.compare!
|
70
|
+
end
|
data/lib/lite/statistics.rb
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
module Lite
|
4
4
|
module Statistics
|
5
|
-
class Descriptive
|
5
|
+
class Descriptive
|
6
|
+
|
7
|
+
extend Lite::Memoize::Alias
|
6
8
|
|
7
9
|
CALCULATIONS ||= %i[
|
8
10
|
frequencies max mean median midrange min mode proportions percentile_from_value
|
@@ -29,125 +31,123 @@ module Lite
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def frequencies
|
32
|
-
|
33
|
-
return if @collection.empty?
|
34
|
+
return if @collection.empty?
|
34
35
|
|
35
|
-
|
36
|
-
end
|
36
|
+
@collection.each_with_object(Hash.new(0)) { |val, hash| hash[val] += 1 }
|
37
37
|
end
|
38
38
|
|
39
|
+
memoize :frequencies
|
40
|
+
|
39
41
|
# rubocop:disable Style/UnneededSort
|
40
42
|
def max
|
41
|
-
|
42
|
-
return if @collection.empty?
|
43
|
+
return if @collection.empty?
|
43
44
|
|
44
|
-
|
45
|
-
end
|
45
|
+
sort.last
|
46
46
|
end
|
47
|
+
|
48
|
+
memoize :max
|
47
49
|
# rubocop:enable Style/UnneededSort
|
48
50
|
|
49
51
|
def mean
|
50
|
-
|
51
|
-
return if @collection.empty?
|
52
|
+
return if @collection.empty?
|
52
53
|
|
53
|
-
|
54
|
-
end
|
54
|
+
sum / size.to_f
|
55
55
|
end
|
56
56
|
|
57
|
+
memoize :mean
|
57
58
|
alias average mean
|
58
59
|
|
59
60
|
# rubocop:disable Metrics/AbcSize
|
60
61
|
def median
|
61
|
-
|
62
|
-
|
63
|
-
return sort[size / 2] unless size.even?
|
62
|
+
return if @collection.empty?
|
63
|
+
return sort[size / 2] unless size.even?
|
64
64
|
|
65
|
-
|
66
|
-
end
|
65
|
+
(sort[(size / 2) - 1] + sort[size / 2]) / 2.0
|
67
66
|
end
|
67
|
+
|
68
|
+
memoize :median
|
68
69
|
# rubocop:enable Metrics/AbcSize
|
69
70
|
|
70
71
|
# rubocop:disable Style/UnneededSort
|
71
72
|
def min
|
72
|
-
|
73
|
-
return if @collection.empty?
|
73
|
+
return if @collection.empty?
|
74
74
|
|
75
|
-
|
76
|
-
end
|
75
|
+
sort.first
|
77
76
|
end
|
77
|
+
|
78
|
+
memoize :min
|
78
79
|
# rubocop:enable Style/UnneededSort
|
79
80
|
|
80
81
|
def mode
|
81
|
-
|
82
|
-
return if @collection.empty?
|
82
|
+
return if @collection.empty?
|
83
83
|
|
84
|
-
|
85
|
-
|
84
|
+
top_two = frequencies.sort_by { |_, val| -val }.take(2)
|
85
|
+
return if top_two.first.last == top_two.last.last
|
86
86
|
|
87
|
-
|
88
|
-
end
|
87
|
+
top_two.first.first
|
89
88
|
end
|
90
89
|
|
90
|
+
memoize :mode
|
91
|
+
|
91
92
|
def percentile_from_value(value)
|
92
|
-
|
93
|
-
return if @collection.empty?
|
93
|
+
return if @collection.empty?
|
94
94
|
|
95
|
-
|
96
|
-
end
|
95
|
+
(sort.index(value) / size.to_f * 100).ceil
|
97
96
|
end
|
98
97
|
|
98
|
+
memoize :percentile_from_value
|
99
99
|
alias percentile percentile_from_value
|
100
100
|
|
101
101
|
def population_coefficient_of_variation
|
102
|
-
|
103
|
-
return if @collection.empty?
|
102
|
+
return if @collection.empty?
|
104
103
|
|
105
|
-
|
106
|
-
end
|
104
|
+
population_standard_deviation / mean
|
107
105
|
end
|
108
106
|
|
107
|
+
memoize :population_coefficient_of_variation
|
108
|
+
|
109
109
|
def population_size
|
110
|
-
|
111
|
-
@collection.size - 1
|
112
|
-
end
|
110
|
+
@collection.size - 1
|
113
111
|
end
|
114
112
|
|
113
|
+
memoize :population_size
|
114
|
+
|
115
115
|
def population_kurtosis
|
116
|
-
|
117
|
-
|
118
|
-
return 0 if size == 1
|
116
|
+
return if @collection.empty?
|
117
|
+
return 0 if size == 1
|
119
118
|
|
120
|
-
|
121
|
-
|
122
|
-
end
|
119
|
+
quarted_standard_deviation = population_standard_deviation**4
|
120
|
+
sum_of_power(4) / (population_size * quarted_standard_deviation.to_f)
|
123
121
|
end
|
124
122
|
|
123
|
+
memoize :population_kurtosis
|
124
|
+
|
125
125
|
def population_skewness
|
126
|
-
|
127
|
-
|
128
|
-
return 0 if size == 1
|
126
|
+
return if @collection.empty?
|
127
|
+
return 0 if size == 1
|
129
128
|
|
130
|
-
|
131
|
-
|
132
|
-
end
|
129
|
+
cubed_standard_deviation = population_standard_deviation**3
|
130
|
+
sum_of_power(3) / (population_size * cubed_standard_deviation.to_f)
|
133
131
|
end
|
134
132
|
|
133
|
+
memoize :population_skewness
|
134
|
+
|
135
135
|
def population_standard_deviation
|
136
|
-
|
137
|
-
return if @collection.empty?
|
136
|
+
return if @collection.empty?
|
138
137
|
|
139
|
-
|
140
|
-
end
|
138
|
+
Math.sqrt(population_variance)
|
141
139
|
end
|
142
140
|
|
141
|
+
memoize :population_standard_deviation
|
142
|
+
|
143
143
|
def population_standard_error
|
144
|
-
|
145
|
-
return if @collection.empty?
|
144
|
+
return if @collection.empty?
|
146
145
|
|
147
|
-
|
148
|
-
end
|
146
|
+
population_standard_deviation / Math.sqrt(population_size)
|
149
147
|
end
|
150
148
|
|
149
|
+
memoize :population_standard_error
|
150
|
+
|
151
151
|
def population_summary
|
152
152
|
base_summary.merge(
|
153
153
|
population_coefficient_of_variation: population_coefficient_of_variation,
|
@@ -162,110 +162,103 @@ module Lite
|
|
162
162
|
end
|
163
163
|
|
164
164
|
def population_variance
|
165
|
-
|
166
|
-
return if @collection.empty?
|
165
|
+
return if @collection.empty?
|
167
166
|
|
168
|
-
|
169
|
-
end
|
167
|
+
sum_of_power(2) / population_size.to_f
|
170
168
|
end
|
171
169
|
|
170
|
+
memoize :population_variance
|
171
|
+
|
172
172
|
def population_zscore
|
173
|
-
|
174
|
-
|
175
|
-
return Hash.new(0) if population_standard_deviation.zero?
|
173
|
+
return if size < 2
|
174
|
+
return Hash.new(0) if population_standard_deviation.zero?
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
end
|
176
|
+
@collection.each_with_object({}) do |val, hash|
|
177
|
+
hash[val] ||= (val - mean) / population_standard_deviation
|
180
178
|
end
|
181
179
|
end
|
182
180
|
|
181
|
+
memoize :population_zscore
|
182
|
+
|
183
183
|
def midrange
|
184
|
-
|
185
|
-
return if @collection.empty?
|
184
|
+
return if @collection.empty?
|
186
185
|
|
187
|
-
|
188
|
-
end
|
186
|
+
[min, max].sum / 2.0
|
189
187
|
end
|
190
188
|
|
189
|
+
memoize :midrange
|
191
190
|
alias midextreme midrange
|
192
191
|
|
193
192
|
def proportions
|
194
|
-
|
195
|
-
return if @collection.empty?
|
193
|
+
return if @collection.empty?
|
196
194
|
|
197
|
-
|
198
|
-
end
|
195
|
+
frequencies.each_with_object({}) { |(key, val), hash| hash[key] = val / size.to_f }
|
199
196
|
end
|
200
197
|
|
198
|
+
memoize :proportions
|
199
|
+
|
201
200
|
def range
|
202
|
-
|
203
|
-
return if @collection.empty?
|
201
|
+
return if @collection.empty?
|
204
202
|
|
205
|
-
|
206
|
-
end
|
203
|
+
max - min
|
207
204
|
end
|
208
205
|
|
206
|
+
memoize :range
|
207
|
+
|
209
208
|
def sample_coefficient_of_variation
|
210
|
-
|
211
|
-
return if @collection.empty?
|
209
|
+
return if @collection.empty?
|
212
210
|
|
213
|
-
|
214
|
-
end
|
211
|
+
sample_standard_deviation / mean
|
215
212
|
end
|
216
213
|
|
214
|
+
memoize :sample_coefficient_of_variation
|
217
215
|
alias coefficient_of_variation sample_coefficient_of_variation
|
218
216
|
|
219
217
|
def sample_size
|
220
|
-
|
221
|
-
@collection.size
|
222
|
-
end
|
218
|
+
@collection.size
|
223
219
|
end
|
224
220
|
|
221
|
+
memoize :sample_size
|
225
222
|
alias size sample_size
|
226
223
|
|
227
224
|
def sample_kurtosis
|
228
|
-
|
229
|
-
|
230
|
-
return 0 if size == 1
|
225
|
+
return if @collection.empty?
|
226
|
+
return 0 if size == 1
|
231
227
|
|
232
|
-
|
233
|
-
|
234
|
-
end
|
228
|
+
quarted_standard_deviation = sample_standard_deviation**4
|
229
|
+
sum_of_power(4) / (sample_size * quarted_standard_deviation.to_f)
|
235
230
|
end
|
236
231
|
|
232
|
+
memoize :sample_kurtosis
|
237
233
|
alias kurtosis sample_kurtosis
|
238
234
|
|
239
235
|
def sample_skewness
|
240
|
-
|
241
|
-
|
242
|
-
return 0 if size == 1
|
236
|
+
return if @collection.empty?
|
237
|
+
return 0 if size == 1
|
243
238
|
|
244
|
-
|
245
|
-
|
246
|
-
end
|
239
|
+
cubed_standard_deviation = sample_standard_deviation**3
|
240
|
+
sum_of_power(3) / (sample_size * cubed_standard_deviation.to_f)
|
247
241
|
end
|
248
242
|
|
243
|
+
memoize :sample_skewness
|
249
244
|
alias skewness sample_skewness
|
250
245
|
|
251
246
|
def sample_standard_deviation
|
252
|
-
|
253
|
-
return if @collection.empty?
|
247
|
+
return if @collection.empty?
|
254
248
|
|
255
|
-
|
256
|
-
end
|
249
|
+
Math.sqrt(sample_variance)
|
257
250
|
end
|
258
251
|
|
252
|
+
memoize :sample_standard_deviation
|
259
253
|
alias standard_deviation sample_standard_deviation
|
260
254
|
|
261
255
|
def sample_standard_error
|
262
|
-
|
263
|
-
return if @collection.empty?
|
256
|
+
return if @collection.empty?
|
264
257
|
|
265
|
-
|
266
|
-
end
|
258
|
+
sample_standard_deviation / Math.sqrt(sample_size)
|
267
259
|
end
|
268
260
|
|
261
|
+
memoize :sample_standard_error
|
269
262
|
alias standard_error sample_standard_error
|
270
263
|
|
271
264
|
def sample_summary
|
@@ -284,43 +277,41 @@ module Lite
|
|
284
277
|
alias summary sample_summary
|
285
278
|
|
286
279
|
def sample_variance
|
287
|
-
|
288
|
-
return if @collection.empty?
|
280
|
+
return if @collection.empty?
|
289
281
|
|
290
|
-
|
291
|
-
end
|
282
|
+
sum_of_power(2) / sample_size.to_f
|
292
283
|
end
|
293
284
|
|
285
|
+
memoize :sample_variance
|
294
286
|
alias variance sample_variance
|
295
287
|
|
288
|
+
# TODO: rename this to zscores
|
296
289
|
def sample_zscore
|
297
|
-
|
298
|
-
|
299
|
-
return Hash.new(0) if sample_standard_deviation.zero?
|
290
|
+
return if size < 2
|
291
|
+
return Hash.new(0) if sample_standard_deviation.zero?
|
300
292
|
|
301
|
-
|
302
|
-
|
303
|
-
end
|
293
|
+
@collection.each_with_object({}) do |val, hash|
|
294
|
+
hash[val] ||= (val - mean) / sample_standard_deviation
|
304
295
|
end
|
305
296
|
end
|
306
297
|
|
298
|
+
memoize :sample_zscore
|
307
299
|
alias zscore sample_zscore
|
308
300
|
|
309
301
|
def sum
|
310
|
-
|
311
|
-
@collection.sum
|
312
|
-
end
|
302
|
+
@collection.sum
|
313
303
|
end
|
314
304
|
|
305
|
+
memoize :sum
|
306
|
+
|
315
307
|
def value_from_percentile(percentile)
|
316
|
-
|
317
|
-
return if @collection.empty?
|
308
|
+
return if @collection.empty?
|
318
309
|
|
319
|
-
|
320
|
-
|
321
|
-
end
|
310
|
+
index = (percentile.to_f / 100 * size).ceil
|
311
|
+
sort[index]
|
322
312
|
end
|
323
313
|
|
314
|
+
memoize :value_from_percentile
|
324
315
|
alias percentile_rank value_from_percentile
|
325
316
|
|
326
317
|
private
|
@@ -347,11 +338,11 @@ module Lite
|
|
347
338
|
# rubocop:enable Metrics/MethodLength
|
348
339
|
|
349
340
|
def sort
|
350
|
-
|
351
|
-
@collection.sort
|
352
|
-
end
|
341
|
+
@collection.sort
|
353
342
|
end
|
354
343
|
|
344
|
+
memoize :sort
|
345
|
+
|
355
346
|
def sum_of_power(power)
|
356
347
|
@collection.inject(0) { |acc, val| acc + (val - mean)**power }
|
357
348
|
end
|
data/lite-statistics.gemspec
CHANGED
@@ -37,6 +37,8 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
38
38
|
spec.require_paths = %w[lib]
|
39
39
|
|
40
|
+
spec.add_runtime_dependency 'lite-memoize'
|
41
|
+
|
40
42
|
spec.add_development_dependency 'bundler'
|
41
43
|
spec.add_development_dependency 'fasterer'
|
42
44
|
spec.add_development_dependency 'generator_spec'
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lite-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Juan Gomez
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: lite-memoize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,7 +156,10 @@ files:
|
|
142
156
|
- README.md
|
143
157
|
- Rakefile
|
144
158
|
- _config.yml
|
159
|
+
- benchmarks/base.rb
|
160
|
+
- benchmarks/descriptive-statistics.rb
|
145
161
|
- benchmarks/descriptive_statistics.rb
|
162
|
+
- benchmarks/statistica.rb
|
146
163
|
- bin/console
|
147
164
|
- bin/setup
|
148
165
|
- docs/.DS_Store
|
@@ -170,7 +187,6 @@ files:
|
|
170
187
|
- lib/generators/lite/statistics/install_generator.rb
|
171
188
|
- lib/generators/lite/statistics/templates/install.rb
|
172
189
|
- lib/lite/statistics.rb
|
173
|
-
- lib/lite/statistics/base.rb
|
174
190
|
- lib/lite/statistics/configuration.rb
|
175
191
|
- lib/lite/statistics/descriptive.rb
|
176
192
|
- lib/lite/statistics/enumerable.rb
|
data/lib/lite/statistics/base.rb
DELETED