lite-statistics 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fasterer.yml +19 -0
- data/.gitignore +11 -0
- data/.rspec +4 -0
- data/.rubocop.yml +24 -0
- data/.travis.yml +24 -0
- data/CHANGELOG.md +11 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +119 -0
- data/LICENSE.txt +21 -0
- data/README.md +165 -0
- data/Rakefile +8 -0
- data/_config.yml +1 -0
- data/benchmarks/descriptive_statistics.rb +60 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/docs/.DS_Store +0 -0
- data/docs/descriptive/COEFFICIENT_OF_VARIATION.md +19 -0
- data/docs/descriptive/FREQUENCIES.md +22 -0
- data/docs/descriptive/KURTOSIS.md +19 -0
- data/docs/descriptive/MAX.md +17 -0
- data/docs/descriptive/MEAN.md +19 -0
- data/docs/descriptive/MEDIAN.md +17 -0
- data/docs/descriptive/MIDRANGE.md +19 -0
- data/docs/descriptive/MIN.md +17 -0
- data/docs/descriptive/MODE.md +17 -0
- data/docs/descriptive/PERCENTILE_FROM_VALUE.md +19 -0
- data/docs/descriptive/PROPORTIONS.md +22 -0
- data/docs/descriptive/RANGE.md +17 -0
- data/docs/descriptive/SIZE.md +19 -0
- data/docs/descriptive/SKEWNESS.md +19 -0
- data/docs/descriptive/STANDARD_DEVIATION.md +19 -0
- data/docs/descriptive/STANDARD_ERROR.md +19 -0
- data/docs/descriptive/SUM.md +17 -0
- data/docs/descriptive/SUMMARY.md +25 -0
- data/docs/descriptive/VALUE_FROM_PERCENTILE.md +19 -0
- data/docs/descriptive/VARIANCE.md +19 -0
- data/docs/descriptive/ZSCORE.md +24 -0
- data/lib/generators/lite/statistics/install_generator.rb +17 -0
- data/lib/generators/lite/statistics/templates/install.rb +5 -0
- data/lib/lite/statistics.rb +9 -0
- data/lib/lite/statistics/base.rb +19 -0
- data/lib/lite/statistics/configuration.rb +29 -0
- data/lib/lite/statistics/descriptive.rb +361 -0
- data/lib/lite/statistics/enumerable.rb +24 -0
- data/lib/lite/statistics/version.rb +9 -0
- data/lite-statistics.gemspec +48 -0
- metadata +202 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
# Sample|Population Summary
|
2
|
+
|
3
|
+
Alias: `summary`
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
collection = [1, 1, 2, 3, 10]
|
7
|
+
results = {
|
8
|
+
# - all documented calculations - ...
|
9
|
+
# - including value_from_percentile of 25, 50, and 75 (quartile) -
|
10
|
+
quartile_1: 2,
|
11
|
+
quartile_2: 3,
|
12
|
+
quartile_3: 10
|
13
|
+
}
|
14
|
+
|
15
|
+
klass = Lite::Statistics::Descriptive.new(collection)
|
16
|
+
klass.sample_sumary
|
17
|
+
|
18
|
+
# - or -
|
19
|
+
|
20
|
+
Lite::Statistics::Descriptive.population_summary(collection)
|
21
|
+
|
22
|
+
# - or -
|
23
|
+
|
24
|
+
collection.sample_summary
|
25
|
+
```
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Value from Percentile
|
2
|
+
|
3
|
+
Alias: `percentile_rank`
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
collection = [1, 1, 2, 3, 10]
|
7
|
+
results = 3
|
8
|
+
|
9
|
+
klass = Lite::Statistics::Descriptive.new(collection)
|
10
|
+
klass.value_from_percentile(60)
|
11
|
+
|
12
|
+
# - or -
|
13
|
+
|
14
|
+
Lite::Statistics::Descriptive.value_from_percentile(collection, 60)
|
15
|
+
|
16
|
+
# - or -
|
17
|
+
|
18
|
+
collection.value_from_percentile(60)
|
19
|
+
```
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Sample|Population Variance
|
2
|
+
|
3
|
+
Alias: `variance`
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
collection = [1, 1, 2, 3, 10]
|
7
|
+
results = 11.44
|
8
|
+
|
9
|
+
klass = Lite::Statistics::Descriptive.new(collection)
|
10
|
+
klass.sample_variance
|
11
|
+
|
12
|
+
# - or -
|
13
|
+
|
14
|
+
Lite::Statistics::Descriptive.population_variance(collection)
|
15
|
+
|
16
|
+
# - or -
|
17
|
+
|
18
|
+
collection.sample_variance
|
19
|
+
```
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Sample|Population Z-Score
|
2
|
+
|
3
|
+
Alias: `zscore`
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
collection = [1, 1, 2, 3, 10]
|
7
|
+
results = {
|
8
|
+
1 => -0.7095748751868991,
|
9
|
+
2 => -0.4139186771923578,
|
10
|
+
3 => -0.11826247919781649,
|
11
|
+
10 => 1.9513309067639724
|
12
|
+
}
|
13
|
+
|
14
|
+
klass = Lite::Statistics::Descriptive.new(collection)
|
15
|
+
klass.sample_zscore
|
16
|
+
|
17
|
+
# - or -
|
18
|
+
|
19
|
+
Lite::Statistics::Descriptive.population_zscore(collection)
|
20
|
+
|
21
|
+
# - or -
|
22
|
+
|
23
|
+
collection.sample_zscore
|
24
|
+
```
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rails/generators'
|
4
|
+
|
5
|
+
module Lite
|
6
|
+
module Statistics
|
7
|
+
class InstallGenerator < Rails::Generators::Base
|
8
|
+
|
9
|
+
source_root File.expand_path('../templates', __FILE__)
|
10
|
+
|
11
|
+
def copy_initializer_file
|
12
|
+
copy_file('install.rb', 'config/initializers/lite-statistics.rb')
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
%w[version configuration base descriptive].each do |filename|
|
4
|
+
require "lite/statistics/#{filename}"
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'lite/statistics/enumerable' if Lite::Statistics.configuration.monkey_patches
|
8
|
+
|
9
|
+
require 'generators/lite/statistics/install_generator'
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lite
|
4
|
+
module Statistics
|
5
|
+
|
6
|
+
class Configuration
|
7
|
+
|
8
|
+
attr_accessor :monkey_patches
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@monkey_patches = true
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.configuration
|
17
|
+
@configuration ||= Configuration.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.configuration=(config)
|
21
|
+
@configuration = config
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.configure
|
25
|
+
yield(configuration)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,361 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lite
|
4
|
+
module Statistics
|
5
|
+
class Descriptive < Lite::Statistics::Base
|
6
|
+
|
7
|
+
CALCULATIONS ||= %i[
|
8
|
+
frequencies max mean median midrange min mode proportions percentile_from_value
|
9
|
+
population_coefficient_of_variation population_kurtosis population_size population_skewness
|
10
|
+
population_standard_deviation population_standard_error population_summary
|
11
|
+
population_variance population_zscore range sample_coefficient_of_variation sample_kurtosis
|
12
|
+
sample_size sample_skewness sample_standard_deviation sample_standard_error sample_summary
|
13
|
+
sample_variance sample_zscore sum value_from_percentile
|
14
|
+
].freeze
|
15
|
+
|
16
|
+
def initialize(collection)
|
17
|
+
@collection = collection
|
18
|
+
end
|
19
|
+
|
20
|
+
class << self
|
21
|
+
|
22
|
+
CALCULATIONS.each do |name|
|
23
|
+
define_method(name) do |collection, *args|
|
24
|
+
klass = new(collection)
|
25
|
+
klass.send(name, *args)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
def frequencies
|
32
|
+
memoize(:frequencies) do
|
33
|
+
return if @collection.empty?
|
34
|
+
|
35
|
+
@collection.each_with_object(Hash.new(0)) { |val, hash| hash[val] += 1 }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# rubocop:disable Style/UnneededSort
|
40
|
+
def max
|
41
|
+
memoize(:max) do
|
42
|
+
return if @collection.empty?
|
43
|
+
|
44
|
+
sort.last
|
45
|
+
end
|
46
|
+
end
|
47
|
+
# rubocop:enable Style/UnneededSort
|
48
|
+
|
49
|
+
def mean
|
50
|
+
memoize(:mean) do
|
51
|
+
return if @collection.empty?
|
52
|
+
|
53
|
+
sum / size.to_f
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
alias average mean
|
58
|
+
|
59
|
+
# rubocop:disable Metrics/AbcSize
|
60
|
+
def median
|
61
|
+
memoize(:median) do
|
62
|
+
return if @collection.empty?
|
63
|
+
return sort[size / 2] unless size.even?
|
64
|
+
|
65
|
+
(sort[(size / 2) - 1] + sort[size / 2]) / 2.0
|
66
|
+
end
|
67
|
+
end
|
68
|
+
# rubocop:enable Metrics/AbcSize
|
69
|
+
|
70
|
+
# rubocop:disable Style/UnneededSort
|
71
|
+
def min
|
72
|
+
memoize(:min) do
|
73
|
+
return if @collection.empty?
|
74
|
+
|
75
|
+
sort.first
|
76
|
+
end
|
77
|
+
end
|
78
|
+
# rubocop:enable Style/UnneededSort
|
79
|
+
|
80
|
+
def mode
|
81
|
+
memoize(:mode) do
|
82
|
+
return if @collection.empty?
|
83
|
+
|
84
|
+
top_two = frequencies.sort_by { |_, val| -val }.take(2)
|
85
|
+
return if top_two.first.last == top_two.last.last
|
86
|
+
|
87
|
+
top_two.first.first
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def percentile_from_value(value)
|
92
|
+
memoize("percentile_from_value_#{value}".to_sym) do
|
93
|
+
return if @collection.empty?
|
94
|
+
|
95
|
+
(sort.index(value) / size.to_f * 100).ceil
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
alias percentile percentile_from_value
|
100
|
+
|
101
|
+
def population_coefficient_of_variation
|
102
|
+
memoize(:population_coefficient_of_variation) do
|
103
|
+
return if @collection.empty?
|
104
|
+
|
105
|
+
population_standard_deviation / mean
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def population_size
|
110
|
+
memoize(:population_size) do
|
111
|
+
@collection.size - 1
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def population_kurtosis
|
116
|
+
memoize(:population_kurtosis) do
|
117
|
+
return if @collection.empty?
|
118
|
+
return 0 if size == 1
|
119
|
+
|
120
|
+
quarted_standard_deviation = population_standard_deviation**4
|
121
|
+
sum_of_power(4) / (population_size * quarted_standard_deviation.to_f)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def population_skewness
|
126
|
+
memoize(:population_skewness) do
|
127
|
+
return if @collection.empty?
|
128
|
+
return 0 if size == 1
|
129
|
+
|
130
|
+
cubed_standard_deviation = population_standard_deviation**3
|
131
|
+
sum_of_power(3) / (population_size * cubed_standard_deviation.to_f)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def population_standard_deviation
|
136
|
+
memoize(:population_standard_deviation) do
|
137
|
+
return if @collection.empty?
|
138
|
+
|
139
|
+
Math.sqrt(population_variance)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def population_standard_error
|
144
|
+
memoize(:population_standard_error) do
|
145
|
+
return if @collection.empty?
|
146
|
+
|
147
|
+
population_standard_deviation / Math.sqrt(population_size)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def population_summary
|
152
|
+
base_summary.merge(
|
153
|
+
population_coefficient_of_variation: population_coefficient_of_variation,
|
154
|
+
population_kurtosis: population_kurtosis,
|
155
|
+
population_size: population_size,
|
156
|
+
population_skewness: population_skewness,
|
157
|
+
population_standard_deviation: population_standard_deviation,
|
158
|
+
population_standard_error: population_standard_error,
|
159
|
+
population_variance: population_variance,
|
160
|
+
population_zscore: population_zscore
|
161
|
+
)
|
162
|
+
end
|
163
|
+
|
164
|
+
def population_variance
|
165
|
+
memoize(:population_variance) do
|
166
|
+
return if @collection.empty?
|
167
|
+
|
168
|
+
sum_of_power(2) / population_size.to_f
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def population_zscore
|
173
|
+
memoize(:population_zscore) do
|
174
|
+
return if size < 2
|
175
|
+
return Hash.new(0) if population_standard_deviation.zero?
|
176
|
+
|
177
|
+
@collection.each_with_object({}) do |val, hash|
|
178
|
+
hash[val] ||= (val - mean) / population_standard_deviation
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def midrange
|
184
|
+
memoize(:midrange) do
|
185
|
+
return if @collection.empty?
|
186
|
+
|
187
|
+
[min, max].sum / 2.0
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
alias midextreme midrange
|
192
|
+
|
193
|
+
def proportions
|
194
|
+
memoize(:proportions) do
|
195
|
+
return if @collection.empty?
|
196
|
+
|
197
|
+
frequencies.each_with_object({}) { |(key, val), hash| hash[key] = val / size.to_f }
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def range
|
202
|
+
memoize(:range) do
|
203
|
+
return if @collection.empty?
|
204
|
+
|
205
|
+
max - min
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def sample_coefficient_of_variation
|
210
|
+
memoize(:sample_coefficient_of_variation) do
|
211
|
+
return if @collection.empty?
|
212
|
+
|
213
|
+
sample_standard_deviation / mean
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
alias coefficient_of_variation sample_coefficient_of_variation
|
218
|
+
|
219
|
+
def sample_size
|
220
|
+
memoize(:sample_size) do
|
221
|
+
@collection.size
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
alias size sample_size
|
226
|
+
|
227
|
+
def sample_kurtosis
|
228
|
+
memoize(:sample_kurtosis) do
|
229
|
+
return if @collection.empty?
|
230
|
+
return 0 if size == 1
|
231
|
+
|
232
|
+
quarted_standard_deviation = sample_standard_deviation**4
|
233
|
+
sum_of_power(4) / (sample_size * quarted_standard_deviation.to_f)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
alias kurtosis sample_kurtosis
|
238
|
+
|
239
|
+
def sample_skewness
|
240
|
+
memoize(:sample_skewness) do
|
241
|
+
return if @collection.empty?
|
242
|
+
return 0 if size == 1
|
243
|
+
|
244
|
+
cubed_standard_deviation = sample_standard_deviation**3
|
245
|
+
sum_of_power(3) / (sample_size * cubed_standard_deviation.to_f)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
alias skewness sample_skewness
|
250
|
+
|
251
|
+
def sample_standard_deviation
|
252
|
+
memoize(:sample_standard_deviation) do
|
253
|
+
return if @collection.empty?
|
254
|
+
|
255
|
+
Math.sqrt(sample_variance)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
alias standard_deviation sample_standard_deviation
|
260
|
+
|
261
|
+
def sample_standard_error
|
262
|
+
memoize(:sample_standard_error) do
|
263
|
+
return if @collection.empty?
|
264
|
+
|
265
|
+
sample_standard_deviation / Math.sqrt(sample_size)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
alias standard_error sample_standard_error
|
270
|
+
|
271
|
+
def sample_summary
|
272
|
+
base_summary.merge(
|
273
|
+
sample_coefficient_of_variation: sample_coefficient_of_variation,
|
274
|
+
sample_kurtosis: sample_kurtosis,
|
275
|
+
sample_size: sample_size,
|
276
|
+
sample_skewness: sample_skewness,
|
277
|
+
sample_standard_deviation: sample_standard_deviation,
|
278
|
+
sample_standard_error: sample_standard_error,
|
279
|
+
sample_variance: sample_variance,
|
280
|
+
sample_zscore: sample_zscore
|
281
|
+
)
|
282
|
+
end
|
283
|
+
|
284
|
+
alias summary sample_summary
|
285
|
+
|
286
|
+
def sample_variance
|
287
|
+
memoize(:sample_variance) do
|
288
|
+
return if @collection.empty?
|
289
|
+
|
290
|
+
sum_of_power(2) / sample_size.to_f
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
alias variance sample_variance
|
295
|
+
|
296
|
+
def sample_zscore
|
297
|
+
memoize(:sample_zscore) do
|
298
|
+
return if size < 2
|
299
|
+
return Hash.new(0) if sample_standard_deviation.zero?
|
300
|
+
|
301
|
+
@collection.each_with_object({}) do |val, hash|
|
302
|
+
hash[val] ||= (val - mean) / sample_standard_deviation
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
alias zscore sample_zscore
|
308
|
+
|
309
|
+
def sum
|
310
|
+
memoize(:sum) do
|
311
|
+
@collection.sum
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def value_from_percentile(percentile)
|
316
|
+
memoize("value_from_percentile_#{percentile}".to_sym) do
|
317
|
+
return if @collection.empty?
|
318
|
+
|
319
|
+
index = (percentile.to_f / 100 * size).ceil
|
320
|
+
sort[index]
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
alias percentile_rank value_from_percentile
|
325
|
+
|
326
|
+
private
|
327
|
+
|
328
|
+
# rubocop:disable Metrics/MethodLength
|
329
|
+
def base_summary
|
330
|
+
{
|
331
|
+
frequencies: frequencies,
|
332
|
+
max: max,
|
333
|
+
mean: mean,
|
334
|
+
median: median,
|
335
|
+
midrange: midrange,
|
336
|
+
min: min,
|
337
|
+
mode: mode,
|
338
|
+
proportions: proportions,
|
339
|
+
quartile_1: value_from_percentile(25),
|
340
|
+
quartile_2: value_from_percentile(50),
|
341
|
+
quartile_3: value_from_percentile(75),
|
342
|
+
range: range,
|
343
|
+
size: size,
|
344
|
+
sum: sum
|
345
|
+
}
|
346
|
+
end
|
347
|
+
# rubocop:enable Metrics/MethodLength
|
348
|
+
|
349
|
+
def sort
|
350
|
+
memoize(:sort) do
|
351
|
+
@collection.sort
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def sum_of_power(power)
|
356
|
+
@collection.inject(0) { |acc, val| acc + (val - mean)**power }
|
357
|
+
end
|
358
|
+
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|