lite-statistics 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.fasterer.yml +19 -0
  3. data/.gitignore +11 -0
  4. data/.rspec +4 -0
  5. data/.rubocop.yml +24 -0
  6. data/.travis.yml +24 -0
  7. data/CHANGELOG.md +11 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +6 -0
  10. data/Gemfile.lock +119 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.md +165 -0
  13. data/Rakefile +8 -0
  14. data/_config.yml +1 -0
  15. data/benchmarks/descriptive_statistics.rb +60 -0
  16. data/bin/console +15 -0
  17. data/bin/setup +8 -0
  18. data/docs/.DS_Store +0 -0
  19. data/docs/descriptive/COEFFICIENT_OF_VARIATION.md +19 -0
  20. data/docs/descriptive/FREQUENCIES.md +22 -0
  21. data/docs/descriptive/KURTOSIS.md +19 -0
  22. data/docs/descriptive/MAX.md +17 -0
  23. data/docs/descriptive/MEAN.md +19 -0
  24. data/docs/descriptive/MEDIAN.md +17 -0
  25. data/docs/descriptive/MIDRANGE.md +19 -0
  26. data/docs/descriptive/MIN.md +17 -0
  27. data/docs/descriptive/MODE.md +17 -0
  28. data/docs/descriptive/PERCENTILE_FROM_VALUE.md +19 -0
  29. data/docs/descriptive/PROPORTIONS.md +22 -0
  30. data/docs/descriptive/RANGE.md +17 -0
  31. data/docs/descriptive/SIZE.md +19 -0
  32. data/docs/descriptive/SKEWNESS.md +19 -0
  33. data/docs/descriptive/STANDARD_DEVIATION.md +19 -0
  34. data/docs/descriptive/STANDARD_ERROR.md +19 -0
  35. data/docs/descriptive/SUM.md +17 -0
  36. data/docs/descriptive/SUMMARY.md +25 -0
  37. data/docs/descriptive/VALUE_FROM_PERCENTILE.md +19 -0
  38. data/docs/descriptive/VARIANCE.md +19 -0
  39. data/docs/descriptive/ZSCORE.md +24 -0
  40. data/lib/generators/lite/statistics/install_generator.rb +17 -0
  41. data/lib/generators/lite/statistics/templates/install.rb +5 -0
  42. data/lib/lite/statistics.rb +9 -0
  43. data/lib/lite/statistics/base.rb +19 -0
  44. data/lib/lite/statistics/configuration.rb +29 -0
  45. data/lib/lite/statistics/descriptive.rb +361 -0
  46. data/lib/lite/statistics/enumerable.rb +24 -0
  47. data/lib/lite/statistics/version.rb +9 -0
  48. data/lite-statistics.gemspec +48 -0
  49. metadata +202 -0
@@ -0,0 +1,25 @@
1
+ # Sample|Population Summary
2
+
3
+ Alias: `summary`
4
+
5
+ ```ruby
6
+ collection = [1, 1, 2, 3, 10]
7
+ results = {
8
+ # - all documented calculations - ...
9
+ # - including value_from_percentile of 25, 50, and 75 (quartile) -
10
+ quartile_1: 2,
11
+ quartile_2: 3,
12
+ quartile_3: 10
13
+ }
14
+
15
+ klass = Lite::Statistics::Descriptive.new(collection)
16
+ klass.sample_sumary
17
+
18
+ # - or -
19
+
20
+ Lite::Statistics::Descriptive.population_summary(collection)
21
+
22
+ # - or -
23
+
24
+ collection.sample_summary
25
+ ```
@@ -0,0 +1,19 @@
1
+ # Value from Percentile
2
+
3
+ Alias: `percentile_rank`
4
+
5
+ ```ruby
6
+ collection = [1, 1, 2, 3, 10]
7
+ results = 3
8
+
9
+ klass = Lite::Statistics::Descriptive.new(collection)
10
+ klass.value_from_percentile(60)
11
+
12
+ # - or -
13
+
14
+ Lite::Statistics::Descriptive.value_from_percentile(collection, 60)
15
+
16
+ # - or -
17
+
18
+ collection.value_from_percentile(60)
19
+ ```
@@ -0,0 +1,19 @@
1
+ # Sample|Population Variance
2
+
3
+ Alias: `variance`
4
+
5
+ ```ruby
6
+ collection = [1, 1, 2, 3, 10]
7
+ results = 11.44
8
+
9
+ klass = Lite::Statistics::Descriptive.new(collection)
10
+ klass.sample_variance
11
+
12
+ # - or -
13
+
14
+ Lite::Statistics::Descriptive.population_variance(collection)
15
+
16
+ # - or -
17
+
18
+ collection.sample_variance
19
+ ```
@@ -0,0 +1,24 @@
1
+ # Sample|Population Z-Score
2
+
3
+ Alias: `zscore`
4
+
5
+ ```ruby
6
+ collection = [1, 1, 2, 3, 10]
7
+ results = {
8
+ 1 => -0.7095748751868991,
9
+ 2 => -0.4139186771923578,
10
+ 3 => -0.11826247919781649,
11
+ 10 => 1.9513309067639724
12
+ }
13
+
14
+ klass = Lite::Statistics::Descriptive.new(collection)
15
+ klass.sample_zscore
16
+
17
+ # - or -
18
+
19
+ Lite::Statistics::Descriptive.population_zscore(collection)
20
+
21
+ # - or -
22
+
23
+ collection.sample_zscore
24
+ ```
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+
5
+ module Lite
6
+ module Statistics
7
+ class InstallGenerator < Rails::Generators::Base
8
+
9
+ source_root File.expand_path('../templates', __FILE__)
10
+
11
+ def copy_initializer_file
12
+ copy_file('install.rb', 'config/initializers/lite-statistics.rb')
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ Lite::Statistics.configure do |config|
4
+ config.monkey_patches = true
5
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ %w[version configuration base descriptive].each do |filename|
4
+ require "lite/statistics/#{filename}"
5
+ end
6
+
7
+ require 'lite/statistics/enumerable' if Lite::Statistics.configuration.monkey_patches
8
+
9
+ require 'generators/lite/statistics/install_generator'
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lite
4
+ module Statistics
5
+ class Base
6
+
7
+ private
8
+
9
+ def cache
10
+ @cache ||= {}
11
+ end
12
+
13
+ def memoize(key, &block)
14
+ cache[key] ||= yield(block)
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lite
4
+ module Statistics
5
+
6
+ class Configuration
7
+
8
+ attr_accessor :monkey_patches
9
+
10
+ def initialize
11
+ @monkey_patches = true
12
+ end
13
+
14
+ end
15
+
16
+ def self.configuration
17
+ @configuration ||= Configuration.new
18
+ end
19
+
20
+ def self.configuration=(config)
21
+ @configuration = config
22
+ end
23
+
24
+ def self.configure
25
+ yield(configuration)
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,361 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lite
4
+ module Statistics
5
+ class Descriptive < Lite::Statistics::Base
6
+
7
+ CALCULATIONS ||= %i[
8
+ frequencies max mean median midrange min mode proportions percentile_from_value
9
+ population_coefficient_of_variation population_kurtosis population_size population_skewness
10
+ population_standard_deviation population_standard_error population_summary
11
+ population_variance population_zscore range sample_coefficient_of_variation sample_kurtosis
12
+ sample_size sample_skewness sample_standard_deviation sample_standard_error sample_summary
13
+ sample_variance sample_zscore sum value_from_percentile
14
+ ].freeze
15
+
16
+ def initialize(collection)
17
+ @collection = collection
18
+ end
19
+
20
+ class << self
21
+
22
+ CALCULATIONS.each do |name|
23
+ define_method(name) do |collection, *args|
24
+ klass = new(collection)
25
+ klass.send(name, *args)
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ def frequencies
32
+ memoize(:frequencies) do
33
+ return if @collection.empty?
34
+
35
+ @collection.each_with_object(Hash.new(0)) { |val, hash| hash[val] += 1 }
36
+ end
37
+ end
38
+
39
+ # rubocop:disable Style/UnneededSort
40
+ def max
41
+ memoize(:max) do
42
+ return if @collection.empty?
43
+
44
+ sort.last
45
+ end
46
+ end
47
+ # rubocop:enable Style/UnneededSort
48
+
49
+ def mean
50
+ memoize(:mean) do
51
+ return if @collection.empty?
52
+
53
+ sum / size.to_f
54
+ end
55
+ end
56
+
57
+ alias average mean
58
+
59
+ # rubocop:disable Metrics/AbcSize
60
+ def median
61
+ memoize(:median) do
62
+ return if @collection.empty?
63
+ return sort[size / 2] unless size.even?
64
+
65
+ (sort[(size / 2) - 1] + sort[size / 2]) / 2.0
66
+ end
67
+ end
68
+ # rubocop:enable Metrics/AbcSize
69
+
70
+ # rubocop:disable Style/UnneededSort
71
+ def min
72
+ memoize(:min) do
73
+ return if @collection.empty?
74
+
75
+ sort.first
76
+ end
77
+ end
78
+ # rubocop:enable Style/UnneededSort
79
+
80
+ def mode
81
+ memoize(:mode) do
82
+ return if @collection.empty?
83
+
84
+ top_two = frequencies.sort_by { |_, val| -val }.take(2)
85
+ return if top_two.first.last == top_two.last.last
86
+
87
+ top_two.first.first
88
+ end
89
+ end
90
+
91
+ def percentile_from_value(value)
92
+ memoize("percentile_from_value_#{value}".to_sym) do
93
+ return if @collection.empty?
94
+
95
+ (sort.index(value) / size.to_f * 100).ceil
96
+ end
97
+ end
98
+
99
+ alias percentile percentile_from_value
100
+
101
+ def population_coefficient_of_variation
102
+ memoize(:population_coefficient_of_variation) do
103
+ return if @collection.empty?
104
+
105
+ population_standard_deviation / mean
106
+ end
107
+ end
108
+
109
+ def population_size
110
+ memoize(:population_size) do
111
+ @collection.size - 1
112
+ end
113
+ end
114
+
115
+ def population_kurtosis
116
+ memoize(:population_kurtosis) do
117
+ return if @collection.empty?
118
+ return 0 if size == 1
119
+
120
+ quarted_standard_deviation = population_standard_deviation**4
121
+ sum_of_power(4) / (population_size * quarted_standard_deviation.to_f)
122
+ end
123
+ end
124
+
125
+ def population_skewness
126
+ memoize(:population_skewness) do
127
+ return if @collection.empty?
128
+ return 0 if size == 1
129
+
130
+ cubed_standard_deviation = population_standard_deviation**3
131
+ sum_of_power(3) / (population_size * cubed_standard_deviation.to_f)
132
+ end
133
+ end
134
+
135
+ def population_standard_deviation
136
+ memoize(:population_standard_deviation) do
137
+ return if @collection.empty?
138
+
139
+ Math.sqrt(population_variance)
140
+ end
141
+ end
142
+
143
+ def population_standard_error
144
+ memoize(:population_standard_error) do
145
+ return if @collection.empty?
146
+
147
+ population_standard_deviation / Math.sqrt(population_size)
148
+ end
149
+ end
150
+
151
+ def population_summary
152
+ base_summary.merge(
153
+ population_coefficient_of_variation: population_coefficient_of_variation,
154
+ population_kurtosis: population_kurtosis,
155
+ population_size: population_size,
156
+ population_skewness: population_skewness,
157
+ population_standard_deviation: population_standard_deviation,
158
+ population_standard_error: population_standard_error,
159
+ population_variance: population_variance,
160
+ population_zscore: population_zscore
161
+ )
162
+ end
163
+
164
+ def population_variance
165
+ memoize(:population_variance) do
166
+ return if @collection.empty?
167
+
168
+ sum_of_power(2) / population_size.to_f
169
+ end
170
+ end
171
+
172
+ def population_zscore
173
+ memoize(:population_zscore) do
174
+ return if size < 2
175
+ return Hash.new(0) if population_standard_deviation.zero?
176
+
177
+ @collection.each_with_object({}) do |val, hash|
178
+ hash[val] ||= (val - mean) / population_standard_deviation
179
+ end
180
+ end
181
+ end
182
+
183
+ def midrange
184
+ memoize(:midrange) do
185
+ return if @collection.empty?
186
+
187
+ [min, max].sum / 2.0
188
+ end
189
+ end
190
+
191
+ alias midextreme midrange
192
+
193
+ def proportions
194
+ memoize(:proportions) do
195
+ return if @collection.empty?
196
+
197
+ frequencies.each_with_object({}) { |(key, val), hash| hash[key] = val / size.to_f }
198
+ end
199
+ end
200
+
201
+ def range
202
+ memoize(:range) do
203
+ return if @collection.empty?
204
+
205
+ max - min
206
+ end
207
+ end
208
+
209
+ def sample_coefficient_of_variation
210
+ memoize(:sample_coefficient_of_variation) do
211
+ return if @collection.empty?
212
+
213
+ sample_standard_deviation / mean
214
+ end
215
+ end
216
+
217
+ alias coefficient_of_variation sample_coefficient_of_variation
218
+
219
+ def sample_size
220
+ memoize(:sample_size) do
221
+ @collection.size
222
+ end
223
+ end
224
+
225
+ alias size sample_size
226
+
227
+ def sample_kurtosis
228
+ memoize(:sample_kurtosis) do
229
+ return if @collection.empty?
230
+ return 0 if size == 1
231
+
232
+ quarted_standard_deviation = sample_standard_deviation**4
233
+ sum_of_power(4) / (sample_size * quarted_standard_deviation.to_f)
234
+ end
235
+ end
236
+
237
+ alias kurtosis sample_kurtosis
238
+
239
+ def sample_skewness
240
+ memoize(:sample_skewness) do
241
+ return if @collection.empty?
242
+ return 0 if size == 1
243
+
244
+ cubed_standard_deviation = sample_standard_deviation**3
245
+ sum_of_power(3) / (sample_size * cubed_standard_deviation.to_f)
246
+ end
247
+ end
248
+
249
+ alias skewness sample_skewness
250
+
251
+ def sample_standard_deviation
252
+ memoize(:sample_standard_deviation) do
253
+ return if @collection.empty?
254
+
255
+ Math.sqrt(sample_variance)
256
+ end
257
+ end
258
+
259
+ alias standard_deviation sample_standard_deviation
260
+
261
+ def sample_standard_error
262
+ memoize(:sample_standard_error) do
263
+ return if @collection.empty?
264
+
265
+ sample_standard_deviation / Math.sqrt(sample_size)
266
+ end
267
+ end
268
+
269
+ alias standard_error sample_standard_error
270
+
271
+ def sample_summary
272
+ base_summary.merge(
273
+ sample_coefficient_of_variation: sample_coefficient_of_variation,
274
+ sample_kurtosis: sample_kurtosis,
275
+ sample_size: sample_size,
276
+ sample_skewness: sample_skewness,
277
+ sample_standard_deviation: sample_standard_deviation,
278
+ sample_standard_error: sample_standard_error,
279
+ sample_variance: sample_variance,
280
+ sample_zscore: sample_zscore
281
+ )
282
+ end
283
+
284
+ alias summary sample_summary
285
+
286
+ def sample_variance
287
+ memoize(:sample_variance) do
288
+ return if @collection.empty?
289
+
290
+ sum_of_power(2) / sample_size.to_f
291
+ end
292
+ end
293
+
294
+ alias variance sample_variance
295
+
296
+ def sample_zscore
297
+ memoize(:sample_zscore) do
298
+ return if size < 2
299
+ return Hash.new(0) if sample_standard_deviation.zero?
300
+
301
+ @collection.each_with_object({}) do |val, hash|
302
+ hash[val] ||= (val - mean) / sample_standard_deviation
303
+ end
304
+ end
305
+ end
306
+
307
+ alias zscore sample_zscore
308
+
309
+ def sum
310
+ memoize(:sum) do
311
+ @collection.sum
312
+ end
313
+ end
314
+
315
+ def value_from_percentile(percentile)
316
+ memoize("value_from_percentile_#{percentile}".to_sym) do
317
+ return if @collection.empty?
318
+
319
+ index = (percentile.to_f / 100 * size).ceil
320
+ sort[index]
321
+ end
322
+ end
323
+
324
+ alias percentile_rank value_from_percentile
325
+
326
+ private
327
+
328
+ # rubocop:disable Metrics/MethodLength
329
+ def base_summary
330
+ {
331
+ frequencies: frequencies,
332
+ max: max,
333
+ mean: mean,
334
+ median: median,
335
+ midrange: midrange,
336
+ min: min,
337
+ mode: mode,
338
+ proportions: proportions,
339
+ quartile_1: value_from_percentile(25),
340
+ quartile_2: value_from_percentile(50),
341
+ quartile_3: value_from_percentile(75),
342
+ range: range,
343
+ size: size,
344
+ sum: sum
345
+ }
346
+ end
347
+ # rubocop:enable Metrics/MethodLength
348
+
349
+ def sort
350
+ memoize(:sort) do
351
+ @collection.sort
352
+ end
353
+ end
354
+
355
+ def sum_of_power(power)
356
+ @collection.inject(0) { |acc, val| acc + (val - mean)**power }
357
+ end
358
+
359
+ end
360
+ end
361
+ end