rails-data-explorer 0.2.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -3
  3. data/README.md +2 -0
  4. data/lib/rails-data-explorer-no-rails.rb +36 -32
  5. data/lib/rails-data-explorer.rb +38 -35
  6. data/lib/rails_data_explorer.rb +29 -10
  7. data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
  8. data/lib/rails_data_explorer/active_record_extension.rb +19 -0
  9. data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
  10. data/lib/rails_data_explorer/chart/anova.rb +1 -0
  11. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
  12. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
  13. data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
  14. data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
  15. data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
  16. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
  17. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
  18. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
  19. data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
  20. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
  21. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
  22. data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
  23. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
  24. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
  25. data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
  26. data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
  27. data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
  28. data/lib/rails_data_explorer/data_series.rb +241 -0
  29. data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
  30. data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
  31. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
  32. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
  33. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
  34. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
  35. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
  36. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
  37. data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
  38. data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
  39. data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
  40. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
  41. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
  42. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
  43. data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
  44. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
  45. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
  46. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
  47. data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
  48. data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
  49. data/rails-data-explorer.gemspec +5 -6
  50. data/spec/rails_data_explorer/chart_spec.rb +11 -0
  51. data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
  52. data/spec/rails_data_explorer/data_set_spec.rb +31 -0
  53. data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
  54. data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
  55. data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
  56. data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
  57. data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
  58. data/spec/rails_data_explorer/data_type_spec.rb +7 -0
  59. data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
  60. data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
  61. data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
  62. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
  63. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
  64. data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
  65. data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
  66. metadata +91 -82
  67. data/lib/rails-data-explorer/active_record_extension.rb +0 -14
  68. data/lib/rails-data-explorer/constants.rb +0 -5
  69. data/lib/rails-data-explorer/data_series.rb +0 -156
  70. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
  71. data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -1,10 +1,19 @@
1
- # Container for data series
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
4
+
5
+ # Responsibilities:
6
+ # * Container for DataSeries
7
+ #
8
+ # Collaborators:
9
+ # * DataSeries
10
+ # * Exploration
11
+ #
3
12
  class DataSet
4
13
 
5
14
  attr_reader :data_series
6
15
 
7
- # @param[Array<Numeric, String, Symbol, Nil, Hash, DataSeries>] values_or_data_series
16
+ # @param values_or_data_series [Array<Numeric, String, Symbol, Nil, Hash, DataSeries>]
8
17
  # Array can contain the following:
9
18
  # * Numeric, String, Symbol, Nil - for a single data series
10
19
  # * Hash - for multiple data series with the following keys:
@@ -13,7 +22,7 @@ class RailsDataExplorer
13
22
  # * :chart_roles [Array<Symbol>, optional] - what to use this series for. possible values: :x, :y, :color
14
23
  # * :data_type (optional) - :quantitative, :categorical, :temporal
15
24
  # * DataSeries
16
- # @param[String] exploration_title used as fall back for data series name
25
+ # @param exploration_title [String] used as fall back for data series name
17
26
  def initialize(values_or_data_series, exploration_title)
18
27
  @data_series = initialize_data_series(values_or_data_series, exploration_title)
19
28
  validate_data_series
@@ -39,7 +48,7 @@ class RailsDataExplorer
39
48
  else
40
49
  raise(
41
50
  ArgumentError.new(
42
- "Invalid datum. Only Hash, Numeric, String, Symbol, and Nil are allowed. " + \
51
+ "Invalid datum. Only DataSeries, Hash, ActiveSupport::TimeWithZone, DateTime, Numeric, NilClass, String, or Symbol are allowed. " + \
43
52
  "Found #{ values_or_data_series.first.class.to_s }."
44
53
  )
45
54
  )
@@ -1,4 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
4
+
5
+ # Responsibilities:
6
+ # * Represent a type of data
7
+ # * Determine available chart types
8
+ # * Compute descriptive statistics
9
+ # * Compute modified values
10
+ #
11
+ # Collaborators:
12
+ # * DataSeries
13
+ # * Chart
14
+ #
2
15
  class DataType
3
16
 
4
17
  # @param[Hash, optional] constraints
@@ -1,9 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
5
+
6
+ # Responsibilities:
7
+ # * Provide available charts and statistics for categorical data type.
8
+ # * Provide methods for categorical data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
3
13
  class Categorical < DataType
4
14
 
5
- # TODO: when there are too many categories, only separate the N most
6
- # significant ones and group all other values under "Other"
7
15
  def all_available_chart_types
8
16
  [
9
17
  {
@@ -12,12 +20,12 @@ class RailsDataExplorer
12
20
  dimensions_count_min: 1,
13
21
  dimensions_count_max: 1,
14
22
  },
15
- # {
16
- # chart_class: Chart::PieChart,
17
- # chart_roles: [:any],
18
- # dimensions_count_min: 1,
19
- # dimensions_count_max: 1,
20
- # },
23
+ {
24
+ chart_class: Chart::PieChart,
25
+ chart_roles: [:any],
26
+ dimensions_count_min: 1,
27
+ dimensions_count_max: 1,
28
+ },
21
29
  {
22
30
  chart_class: Chart::BoxPlotGroup,
23
31
  chart_roles: [:y],
@@ -34,6 +42,12 @@ class RailsDataExplorer
34
42
  chart_roles: [:dimension],
35
43
  dimensions_count_min: 3,
36
44
  },
45
+ {
46
+ chart_class: Chart::StackedBarChartCategorical,
47
+ chart_roles: [:x, :y],
48
+ dimensions_count_min: 2,
49
+ dimensions_count_max: 2,
50
+ },
37
51
  {
38
52
  chart_class: Chart::StackedBarChartCategoricalPercent,
39
53
  chart_roles: [:x, :y],
@@ -67,7 +81,7 @@ class RailsDataExplorer
67
81
  end
68
82
 
69
83
  def descriptive_statistics(values)
70
- frequencies = values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
84
+ frequencies = compute_histogram(values)
71
85
  labels_ds = DataSeries.new('_', values.uniq)
72
86
  total_count = values.length
73
87
  ruby_formatters = {
@@ -168,25 +182,43 @@ class RailsDataExplorer
168
182
  %(function(d) { return d })
169
183
  end
170
184
 
171
- # @param[Symbol, nil] label_val_key the hash key to use to get the label value during sort (sent to a,b)
172
- # @param[DataSeries] data_series the ds that contains the uniq vals
173
- # @param[Proc] value_sorter the sorting proc to use if not sorted numerically
174
- # @return[Proc] a Proc that will be used by #sort
185
+ # @param label_val_key [Symbol, nil] the hash key to use to get the label value during sort (sent to a,b)
186
+ # @param data_series [DataSeries] the ds that contains the uniq vals
187
+ # @param value_sorter [Proc] the sorting proc to use if not sorted numerically
188
+ # @return [Proc] a Proc that will be used by #sort
175
189
  def label_sorter(label_val_key, data_series, value_sorter)
176
190
  if data_series.uniq_vals.any? { |e| e.to_s =~ /^[\+\-]?\d+/ }
177
191
  # Sort numerical categories by key ASC
192
+ # This lambda can be used in conjunction with `#sort`.
193
+ # It returns -1, 0, or 1
178
194
  lambda { |a,b|
179
195
  number_and_full_string_extractor = lambda { |val|
180
196
  str = label_val_key ? val[label_val_key] : val
181
197
  number = str.gsub(/^[^\d\+\-]*/, '') # remove non-digit leading chars
182
198
  .gsub(',', '') # remove delimiter commas, they throw off to_f parsing
183
- .to_f
184
- number += 1 if str =~ /^>/ # increase highest threshold by one for proper sorting
199
+ if '' != number
200
+ # label contains digits
201
+ number = number.to_f
202
+ number += 1 if str =~ /^>/ # increase highest threshold by one for proper sorting
203
+ number -= 1 if str =~ /^</ # decrease lowest threshold by one for proper sorting
204
+ else
205
+ # label doesn't contain digits, set to nil to sort at end
206
+ number = nil
207
+ end
185
208
  [number, str]
186
209
  }
187
- a_number_and_full_string = number_and_full_string_extractor.call(a)
188
- b_number_and_full_string = number_and_full_string_extractor.call(b)
189
- a_number_and_full_string <=> b_number_and_full_string
210
+ a_num, a_str = number_and_full_string_extractor.call(a)
211
+ b_num, b_str = number_and_full_string_extractor.call(b)
212
+ if a_num && b_num
213
+ # Both numbers are present, compare them
214
+ [a_num, a_str] <=> [b_num, b_str]
215
+ elsif a_num
216
+ # a_num is present, b_num isn't. Sort a before b
217
+ -1
218
+ else
219
+ # a_num is not present, b_num is, Sort a after b
220
+ 1
221
+ end
190
222
  }
191
223
  else
192
224
  # Use provided value sorter
@@ -194,6 +226,35 @@ class RailsDataExplorer
194
226
  end
195
227
  end
196
228
 
229
+ # Returns the top N max frequent distinct observations in values. Groups
230
+ # less frequent observations under val_for_others.
231
+ # @param values [Array]
232
+ # @param max_num_vals [Integer] the max number of distinct values to return (including val_for_others)
233
+ # @param val_for_others [String, optional] defaults to '[Other]'
234
+ def limit_distinct_values(values, max_num_vals, val_for_others = nil)
235
+ distinct_values = values.uniq
236
+ # Return values if they already have lte max_num_vals distinct observations
237
+ return values if distinct_values.length <= max_num_vals
238
+
239
+ val_for_others ||= '[Other]'
240
+ frequencies = compute_histogram(values)
241
+ top_vals = frequencies.to_a.sort { |a,b|
242
+ # a = [value, frequency]
243
+ # Sort by frequency DESC, value ASC
244
+ [b.last, a.first] <=> [a.last, b.first]
245
+ }.first(max_num_vals - 1).map { |e| e.first }
246
+ values.map { |e| top_vals.include?(e) ? e : val_for_others }
247
+ end
248
+
249
+ protected
250
+
251
+ # Computes a histogram for values
252
+ # @param values [Array]
253
+ # @return a Hash with distinct vals as keys and their frequency as value
254
+ def compute_histogram(values)
255
+ values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
256
+ end
257
+
197
258
  end
198
259
  end
199
260
  end
@@ -1 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  # For displaying data on maps.
@@ -1,8 +1,18 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
- class Quantitative < DataType
4
5
 
5
- # This is an abstract class. Use sub_classes
6
+ # This is an abstract class. Use sub_classes
7
+ #
8
+ # Responsibilities:
9
+ # * Provide available charts and statistics for quantitative data type.
10
+ # * Provide methods for quantitative data type.
11
+ #
12
+ # Collaborators:
13
+ # * DataSet
14
+ #
15
+ class Quantitative < DataType
6
16
 
7
17
  def all_available_chart_types
8
18
  [
@@ -122,9 +132,9 @@ class RailsDataExplorer
122
132
  raise "Implement me in sub_class"
123
133
  end
124
134
 
125
- def axis_scale(data_series, d3_or_vega)
135
+ def axis_scale(data_series, modification, d3_or_vega)
126
136
  # Log scales can't handle 0 values
127
- if data_series.min_val > 0.0 && data_series.has_large_dynamic_range?
137
+ if data_series.min_val(modification) > 0.0 && data_series.has_large_dynamic_range?(modification)
128
138
  { d3: 'd3.scale.log', vega: 'log' }[d3_or_vega]
129
139
  else
130
140
  { d3: 'd3.scale.linear', vega: 'linear' }[d3_or_vega]
@@ -1,6 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
5
  class Quantitative
6
+
7
+ # Responsibilities:
8
+ # * Provide methods for decimal quantitative data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
4
13
  class Decimal < Quantitative
5
14
 
6
15
  def axis_tick_format(values)
@@ -1,6 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
5
  class Quantitative
6
+
7
+ # Responsibilities:
8
+ # * Provide methods for integer quantitative data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
4
13
  class Integer < Quantitative
5
14
 
6
15
  def axis_tick_format(values)
@@ -1,6 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
5
  class Quantitative
6
+
7
+ # Responsibilities:
8
+ # * Provide methods for temporal quantitative data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
4
13
  class Temporal < Quantitative
5
14
 
6
15
  def all_available_chart_types
@@ -1,6 +1,18 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  require 'rails'
2
4
 
3
5
  class RailsDataExplorer
6
+
7
+ # Responsibilities:
8
+ # * Tie RailsDataExplorer into a Rails app
9
+ # * Initialize ActionViewExtension
10
+ # * Tell rails which assets to precompile
11
+ #
12
+ # Collaborators:
13
+ # * ActiveSupport
14
+ # * RailsDataExplorer
15
+ #
4
16
  class Engine < ::Rails::Engine
5
17
 
6
18
  # It's an engine so that we can add javascript and image assets
@@ -1,4 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
4
+
5
+ # Responsibilities:
6
+ # * Represent and initialize a data exploration
7
+ # * Initialize and render self (including charts)
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ # * Chart
12
+ #
2
13
  class Exploration
3
14
 
4
15
  attr_accessor :output_buffer # required for content_tag
@@ -0,0 +1,72 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class RailsDataExplorer
4
+ module Statistics
5
+
6
+ # From http://en.wikipedia.org/wiki/Pearson's_chi-squared_test
7
+
8
+ # Pearson's chi-squared test is used to assess whether paired observations on two
9
+ # variables, expressed in a contingency table, are independent of each other.
10
+
11
+ # An "observation" consists of the values of two outcomes and the null hypothesis
12
+ # is that the occurrence of these outcomes is statistically independent. Each
13
+ # observation is allocated to one cell of a two-dimensional array of cells (called
14
+ # a contingency table) according to the values of the two outcomes.
15
+
16
+ # Assumptions
17
+ # -----------
18
+
19
+ # The chi-squared test, when used with the standard approximation that a chi-
20
+ # squared distribution is applicable, has the following assumptions:
21
+
22
+ # * Simple random sample – The sample data is a random sampling from a fixed
23
+ # distribution or population where every collection of members of the population
24
+ # of the given sample size has an equal probability of selection. Variants of
25
+ # the test have been developed for complex samples, such as where the data is
26
+ # weighted. Other forms can be used such as purposive sampling.
27
+ # * Sample size (whole table) – A sample with a sufficiently large size is assumed.
28
+ # If a chi squared test is conducted on a sample with a smaller size, then the
29
+ # chi squared test will yield an inaccurate inference. The researcher, by using
30
+ # chi squared test on small samples, might end up committing a Type II error.
31
+ # * Expected cell count – Adequate expected cell counts. Some require 5 or more,
32
+ # and others require 10 or more. A common rule is 5 or more in all cells of a
33
+ # 2-by-2 table, and 5 or more in 80% of cells in larger tables, but no cells
34
+ # with zero expected count. When this assumption is not met, Yates's Correction
35
+ # is applied.
36
+ # * Independence – The observations are always assumed to be independent of each
37
+ # other. This means chi-squared cannot be used to test correlated data
38
+ # (like matched pairs or panel data). In those cases you might want to turn to
39
+ # McNemar's test.
40
+
41
+ # Problems
42
+ # --------
43
+
44
+ # The approximation to the chi-squared distribution breaks down if expected
45
+ # frequencies are too low. It will normally be acceptable so long as no more than
46
+ # 20% of the events have expected frequencies below 5. Where there is only 1
47
+ # degree of freedom, the approximation is not reliable if expected frequencies are
48
+ # below 10. In this case, a better approximation can be obtained by reducing the
49
+ # absolute value of each difference between observed and expected frequencies by
50
+ # 0.5 before squaring; this is called Yates's correction for continuity.
51
+
52
+ # In cases where the expected value, E, is found to be small (indicating a small
53
+ # underlying population probability, and/or a small number of observations), the
54
+ # normal approximation of the multinomial distribution can fail, and in such cases
55
+ # it is found to be more appropriate to use the G-test, a likelihood ratio-based
56
+ # test statistic. Where the total sample size is small, it is necessary to use an
57
+ # appropriate exact test, typically either the binomial test or (for contingency
58
+ # tables) Fisher's exact test. This test uses the conditional distribution of the
59
+ # test statistic given the marginal totals; however, it does not assume that the
60
+ # data were generated from an experiment in which the marginal totals are fixed
61
+ # and is valid whether or not that is the case.
62
+ class PearsonsChiSquaredIndependenceTest
63
+
64
+ def initialize(data_matrix, min_probability = 0.05)
65
+ end
66
+
67
+ def compute
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -1,7 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  module Statistics
5
+
6
+ # Responsibilities:
7
+ # * Provide random categorical data. Useful for testing and demo data.
8
+ #
3
9
  class RngCategory
4
10
 
11
+ # @param categories [Array<Object>] the pool of available categories.
12
+ # @param category_probabilities [Array, optional] probability of each category.
13
+ # @param rng [Proc, optional] lambda to generate random numbers which will
14
+ # be mapped to categories.
5
15
  def initialize(categories, category_probabilities = nil, rng = lambda { Kernel.rand })
6
16
  @categories, @category_probabilities, @rng = categories, category_probabilities, rng
7
17
  @category_probabilities ||= @categories.map { |e| @rng.call }
@@ -9,6 +19,7 @@ class RailsDataExplorer
9
19
  @category_order = compute_category_order
10
20
  end
11
21
 
22
+ # Returns a random category
12
23
  def rand
13
24
  r_v = @rng.call
14
25
  rnd = @category_order.detect { |e|
@@ -17,6 +28,8 @@ class RailsDataExplorer
17
28
  rnd[:category]
18
29
  end
19
30
 
31
+ protected
32
+
20
33
  def normalize_category_probabilities
21
34
  total = @category_probabilities.inject(0) { |m,e| m += e }
22
35
  @category_probabilities.map { |e| e / total.to_f }
@@ -1,12 +1,23 @@
1
- # From http://stackoverflow.com/a/9266488
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
3
4
  module Statistics
5
+
6
+ # Responsibilities:
7
+ # * Provide random numeric data, following a gaussian distribution.
8
+ #
9
+ # From http://stackoverflow.com/a/9266488
4
10
  class RngGaussian
11
+
12
+ # @param mean [Float] the expected mean
13
+ # @param sd [Float] the expected standard deviation
14
+ # @param rng [Proc, optional] a random number generator
5
15
  def initialize(mean = 0.0, sd = 1.0, rng = lambda { Kernel.rand })
6
16
  @mean, @sd, @rng = mean, sd, rng
7
17
  @compute_next_pair = false
8
18
  end
9
19
 
20
+ # Returns random numbers with a gaussian distribution.
10
21
  def rand
11
22
  if (@compute_next_pair = !@compute_next_pair)
12
23
  # Compute a pair of random values with normal distribution.