rails-data-explorer 0.2.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -3
  3. data/README.md +2 -0
  4. data/lib/rails-data-explorer-no-rails.rb +36 -32
  5. data/lib/rails-data-explorer.rb +38 -35
  6. data/lib/rails_data_explorer.rb +29 -10
  7. data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
  8. data/lib/rails_data_explorer/active_record_extension.rb +19 -0
  9. data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
  10. data/lib/rails_data_explorer/chart/anova.rb +1 -0
  11. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
  12. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
  13. data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
  14. data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
  15. data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
  16. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
  17. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
  18. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
  19. data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
  20. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
  21. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
  22. data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
  23. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
  24. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
  25. data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
  26. data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
  27. data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
  28. data/lib/rails_data_explorer/data_series.rb +241 -0
  29. data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
  30. data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
  31. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
  32. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
  33. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
  34. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
  35. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
  36. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
  37. data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
  38. data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
  39. data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
  40. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
  41. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
  42. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
  43. data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
  44. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
  45. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
  46. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
  47. data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
  48. data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
  49. data/rails-data-explorer.gemspec +5 -6
  50. data/spec/rails_data_explorer/chart_spec.rb +11 -0
  51. data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
  52. data/spec/rails_data_explorer/data_set_spec.rb +31 -0
  53. data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
  54. data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
  55. data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
  56. data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
  57. data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
  58. data/spec/rails_data_explorer/data_type_spec.rb +7 -0
  59. data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
  60. data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
  61. data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
  62. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
  63. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
  64. data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
  65. data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
  66. metadata +91 -82
  67. data/lib/rails-data-explorer/active_record_extension.rb +0 -14
  68. data/lib/rails-data-explorer/constants.rb +0 -5
  69. data/lib/rails-data-explorer/data_series.rb +0 -156
  70. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
  71. data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -1,10 +1,19 @@
1
- # Container for data series
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
4
+
5
+ # Responsibilities:
6
+ # * Container for DataSeries
7
+ #
8
+ # Collaborators:
9
+ # * DataSeries
10
+ # * Exploration
11
+ #
3
12
  class DataSet
4
13
 
5
14
  attr_reader :data_series
6
15
 
7
- # @param[Array<Numeric, String, Symbol, Nil, Hash, DataSeries>] values_or_data_series
16
+ # @param values_or_data_series [Array<Numeric, String, Symbol, Nil, Hash, DataSeries>]
8
17
  # Array can contain the following:
9
18
  # * Numeric, String, Symbol, Nil - for a single data series
10
19
  # * Hash - for multiple data series with the following keys:
@@ -13,7 +22,7 @@ class RailsDataExplorer
13
22
  # * :chart_roles [Array<Symbol>, optional] - what to use this series for. possible values: :x, :y, :color
14
23
  # * :data_type (optional) - :quantitative, :categorical, :temporal
15
24
  # * DataSeries
16
- # @param[String] exploration_title used as fall back for data series name
25
+ # @param exploration_title [String] used as fall back for data series name
17
26
  def initialize(values_or_data_series, exploration_title)
18
27
  @data_series = initialize_data_series(values_or_data_series, exploration_title)
19
28
  validate_data_series
@@ -39,7 +48,7 @@ class RailsDataExplorer
39
48
  else
40
49
  raise(
41
50
  ArgumentError.new(
42
- "Invalid datum. Only Hash, Numeric, String, Symbol, and Nil are allowed. " + \
51
+ "Invalid datum. Only DataSeries, Hash, ActiveSupport::TimeWithZone, DateTime, Numeric, NilClass, String, or Symbol are allowed. " + \
43
52
  "Found #{ values_or_data_series.first.class.to_s }."
44
53
  )
45
54
  )
@@ -1,4 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
4
+
5
+ # Responsibilities:
6
+ # * Represent a type of data
7
+ # * Determine available chart types
8
+ # * Compute descriptive statistics
9
+ # * Compute modified values
10
+ #
11
+ # Collaborators:
12
+ # * DataSeries
13
+ # * Chart
14
+ #
2
15
  class DataType
3
16
 
4
17
  # @param[Hash, optional] constraints
@@ -1,9 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
5
+
6
+ # Responsibilities:
7
+ # * Provide available charts and statistics for categorical data type.
8
+ # * Provide methods for categorical data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
3
13
  class Categorical < DataType
4
14
 
5
- # TODO: when there are too many categories, only separate the N most
6
- # significant ones and group all other values under "Other"
7
15
  def all_available_chart_types
8
16
  [
9
17
  {
@@ -12,12 +20,12 @@ class RailsDataExplorer
12
20
  dimensions_count_min: 1,
13
21
  dimensions_count_max: 1,
14
22
  },
15
- # {
16
- # chart_class: Chart::PieChart,
17
- # chart_roles: [:any],
18
- # dimensions_count_min: 1,
19
- # dimensions_count_max: 1,
20
- # },
23
+ {
24
+ chart_class: Chart::PieChart,
25
+ chart_roles: [:any],
26
+ dimensions_count_min: 1,
27
+ dimensions_count_max: 1,
28
+ },
21
29
  {
22
30
  chart_class: Chart::BoxPlotGroup,
23
31
  chart_roles: [:y],
@@ -34,6 +42,12 @@ class RailsDataExplorer
34
42
  chart_roles: [:dimension],
35
43
  dimensions_count_min: 3,
36
44
  },
45
+ {
46
+ chart_class: Chart::StackedBarChartCategorical,
47
+ chart_roles: [:x, :y],
48
+ dimensions_count_min: 2,
49
+ dimensions_count_max: 2,
50
+ },
37
51
  {
38
52
  chart_class: Chart::StackedBarChartCategoricalPercent,
39
53
  chart_roles: [:x, :y],
@@ -67,7 +81,7 @@ class RailsDataExplorer
67
81
  end
68
82
 
69
83
  def descriptive_statistics(values)
70
- frequencies = values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
84
+ frequencies = compute_histogram(values)
71
85
  labels_ds = DataSeries.new('_', values.uniq)
72
86
  total_count = values.length
73
87
  ruby_formatters = {
@@ -168,25 +182,43 @@ class RailsDataExplorer
168
182
  %(function(d) { return d })
169
183
  end
170
184
 
171
- # @param[Symbol, nil] label_val_key the hash key to use to get the label value during sort (sent to a,b)
172
- # @param[DataSeries] data_series the ds that contains the uniq vals
173
- # @param[Proc] value_sorter the sorting proc to use if not sorted numerically
174
- # @return[Proc] a Proc that will be used by #sort
185
+ # @param label_val_key [Symbol, nil] the hash key to use to get the label value during sort (sent to a,b)
186
+ # @param data_series [DataSeries] the ds that contains the uniq vals
187
+ # @param value_sorter [Proc] the sorting proc to use if not sorted numerically
188
+ # @return [Proc] a Proc that will be used by #sort
175
189
  def label_sorter(label_val_key, data_series, value_sorter)
176
190
  if data_series.uniq_vals.any? { |e| e.to_s =~ /^[\+\-]?\d+/ }
177
191
  # Sort numerical categories by key ASC
192
+ # This lambda can be used in conjunction with `#sort`.
193
+ # It returns -1, 0, or 1
178
194
  lambda { |a,b|
179
195
  number_and_full_string_extractor = lambda { |val|
180
196
  str = label_val_key ? val[label_val_key] : val
181
197
  number = str.gsub(/^[^\d\+\-]*/, '') # remove non-digit leading chars
182
198
  .gsub(',', '') # remove delimiter commas, they throw off to_f parsing
183
- .to_f
184
- number += 1 if str =~ /^>/ # increase highest threshold by one for proper sorting
199
+ if '' != number
200
+ # label contains digits
201
+ number = number.to_f
202
+ number += 1 if str =~ /^>/ # increase highest threshold by one for proper sorting
203
+ number -= 1 if str =~ /^</ # decrease lowest threshold by one for proper sorting
204
+ else
205
+ # label doesn't contain digits, set to nil to sort at end
206
+ number = nil
207
+ end
185
208
  [number, str]
186
209
  }
187
- a_number_and_full_string = number_and_full_string_extractor.call(a)
188
- b_number_and_full_string = number_and_full_string_extractor.call(b)
189
- a_number_and_full_string <=> b_number_and_full_string
210
+ a_num, a_str = number_and_full_string_extractor.call(a)
211
+ b_num, b_str = number_and_full_string_extractor.call(b)
212
+ if a_num && b_num
213
+ # Both numbers are present, compare them
214
+ [a_num, a_str] <=> [b_num, b_str]
215
+ elsif a_num
216
+ # a_num is present, b_num isn't. Sort a before b
217
+ -1
218
+ else
219
+ # a_num is not present, b_num is, Sort a after b
220
+ 1
221
+ end
190
222
  }
191
223
  else
192
224
  # Use provided value sorter
@@ -194,6 +226,35 @@ class RailsDataExplorer
194
226
  end
195
227
  end
196
228
 
229
+ # Returns the top N max frequent distinct observations in values. Groups
230
+ # less frequent observations under val_for_others.
231
+ # @param values [Array]
232
+ # @param max_num_vals [Integer] the max number of distinct values to return (including val_for_others)
233
+ # @param val_for_others [String, optional] defaults to '[Other]'
234
+ def limit_distinct_values(values, max_num_vals, val_for_others = nil)
235
+ distinct_values = values.uniq
236
+ # Return values if they already have lte max_num_vals distinct observations
237
+ return values if distinct_values.length <= max_num_vals
238
+
239
+ val_for_others ||= '[Other]'
240
+ frequencies = compute_histogram(values)
241
+ top_vals = frequencies.to_a.sort { |a,b|
242
+ # a = [value, frequency]
243
+ # Sort by frequency DESC, value ASC
244
+ [b.last, a.first] <=> [a.last, b.first]
245
+ }.first(max_num_vals - 1).map { |e| e.first }
246
+ values.map { |e| top_vals.include?(e) ? e : val_for_others }
247
+ end
248
+
249
+ protected
250
+
251
+ # Computes a histogram for values
252
+ # @param values [Array]
253
+ # @return a Hash with distinct vals as keys and their frequency as value
254
+ def compute_histogram(values)
255
+ values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
256
+ end
257
+
197
258
  end
198
259
  end
199
260
  end
@@ -1 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  # For displaying data on maps.
@@ -1,8 +1,18 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
- class Quantitative < DataType
4
5
 
5
- # This is an abstract class. Use sub_classes
6
+ # This is an abstract class. Use sub_classes
7
+ #
8
+ # Responsibilities:
9
+ # * Provide available charts and statistics for quantitative data type.
10
+ # * Provide methods for quantitative data type.
11
+ #
12
+ # Collaborators:
13
+ # * DataSet
14
+ #
15
+ class Quantitative < DataType
6
16
 
7
17
  def all_available_chart_types
8
18
  [
@@ -122,9 +132,9 @@ class RailsDataExplorer
122
132
  raise "Implement me in sub_class"
123
133
  end
124
134
 
125
- def axis_scale(data_series, d3_or_vega)
135
+ def axis_scale(data_series, modification, d3_or_vega)
126
136
  # Log scales can't handle 0 values
127
- if data_series.min_val > 0.0 && data_series.has_large_dynamic_range?
137
+ if data_series.min_val(modification) > 0.0 && data_series.has_large_dynamic_range?(modification)
128
138
  { d3: 'd3.scale.log', vega: 'log' }[d3_or_vega]
129
139
  else
130
140
  { d3: 'd3.scale.linear', vega: 'linear' }[d3_or_vega]
@@ -1,6 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
5
  class Quantitative
6
+
7
+ # Responsibilities:
8
+ # * Provide methods for decimal quantitative data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
4
13
  class Decimal < Quantitative
5
14
 
6
15
  def axis_tick_format(values)
@@ -1,6 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
5
  class Quantitative
6
+
7
+ # Responsibilities:
8
+ # * Provide methods for integer quantitative data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
4
13
  class Integer < Quantitative
5
14
 
6
15
  def axis_tick_format(values)
@@ -1,6 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class DataType
3
5
  class Quantitative
6
+
7
+ # Responsibilities:
8
+ # * Provide methods for temporal quantitative data type.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
4
13
  class Temporal < Quantitative
5
14
 
6
15
  def all_available_chart_types
@@ -1,6 +1,18 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  require 'rails'
2
4
 
3
5
  class RailsDataExplorer
6
+
7
+ # Responsibilities:
8
+ # * Tie RailsDataExplorer into a Rails app
9
+ # * Initialize ActionViewExtension
10
+ # * Tell rails which assets to precompile
11
+ #
12
+ # Collaborators:
13
+ # * ActiveSupport
14
+ # * RailsDataExplorer
15
+ #
4
16
  class Engine < ::Rails::Engine
5
17
 
6
18
  # It's an engine so that we can add javascript and image assets
@@ -1,4 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
4
+
5
+ # Responsibilities:
6
+ # * Represent and initialize a data exploration
7
+ # * Initialize and render self (including charts)
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ # * Chart
12
+ #
2
13
  class Exploration
3
14
 
4
15
  attr_accessor :output_buffer # required for content_tag
@@ -0,0 +1,72 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class RailsDataExplorer
4
+ module Statistics
5
+
6
+ # From http://en.wikipedia.org/wiki/Pearson's_chi-squared_test
7
+
8
+ # Pearson's chi-squared test is used to assess whether paired observations on two
9
+ # variables, expressed in a contingency table, are independent of each other.
10
+
11
+ # An "observation" consists of the values of two outcomes and the null hypothesis
12
+ # is that the occurrence of these outcomes is statistically independent. Each
13
+ # observation is allocated to one cell of a two-dimensional array of cells (called
14
+ # a contingency table) according to the values of the two outcomes.
15
+
16
+ # Assumptions
17
+ # -----------
18
+
19
+ # The chi-squared test, when used with the standard approximation that a chi-
20
+ # squared distribution is applicable, has the following assumptions:
21
+
22
+ # * Simple random sample – The sample data is a random sampling from a fixed
23
+ # distribution or population where every collection of members of the population
24
+ # of the given sample size has an equal probability of selection. Variants of
25
+ # the test have been developed for complex samples, such as where the data is
26
+ # weighted. Other forms can be used such as purposive sampling.
27
+ # * Sample size (whole table) – A sample with a sufficiently large size is assumed.
28
+ # If a chi squared test is conducted on a sample with a smaller size, then the
29
+ # chi squared test will yield an inaccurate inference. The researcher, by using
30
+ # chi squared test on small samples, might end up committing a Type II error.
31
+ # * Expected cell count – Adequate expected cell counts. Some require 5 or more,
32
+ # and others require 10 or more. A common rule is 5 or more in all cells of a
33
+ # 2-by-2 table, and 5 or more in 80% of cells in larger tables, but no cells
34
+ # with zero expected count. When this assumption is not met, Yates's Correction
35
+ # is applied.
36
+ # * Independence – The observations are always assumed to be independent of each
37
+ # other. This means chi-squared cannot be used to test correlated data
38
+ # (like matched pairs or panel data). In those cases you might want to turn to
39
+ # McNemar's test.
40
+
41
+ # Problems
42
+ # --------
43
+
44
+ # The approximation to the chi-squared distribution breaks down if expected
45
+ # frequencies are too low. It will normally be acceptable so long as no more than
46
+ # 20% of the events have expected frequencies below 5. Where there is only 1
47
+ # degree of freedom, the approximation is not reliable if expected frequencies are
48
+ # below 10. In this case, a better approximation can be obtained by reducing the
49
+ # absolute value of each difference between observed and expected frequencies by
50
+ # 0.5 before squaring; this is called Yates's correction for continuity.
51
+
52
+ # In cases where the expected value, E, is found to be small (indicating a small
53
+ # underlying population probability, and/or a small number of observations), the
54
+ # normal approximation of the multinomial distribution can fail, and in such cases
55
+ # it is found to be more appropriate to use the G-test, a likelihood ratio-based
56
+ # test statistic. Where the total sample size is small, it is necessary to use an
57
+ # appropriate exact test, typically either the binomial test or (for contingency
58
+ # tables) Fisher's exact test. This test uses the conditional distribution of the
59
+ # test statistic given the marginal totals; however, it does not assume that the
60
+ # data were generated from an experiment in which the marginal totals are fixed
61
+ # and is valid whether or not that is the case.
62
+ class PearsonsChiSquaredIndependenceTest
63
+
64
+ def initialize(data_matrix, min_probability = 0.05)
65
+ end
66
+
67
+ def compute
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -1,7 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  module Statistics
5
+
6
+ # Responsibilities:
7
+ # * Provide random categorical data. Useful for testing and demo data.
8
+ #
3
9
  class RngCategory
4
10
 
11
+ # @param categories [Array<Object>] the pool of available categories.
12
+ # @param category_probabilities [Array, optional] probability of each category.
13
+ # @param rng [Proc, optional] lambda to generate random numbers which will
14
+ # be mapped to categories.
5
15
  def initialize(categories, category_probabilities = nil, rng = lambda { Kernel.rand })
6
16
  @categories, @category_probabilities, @rng = categories, category_probabilities, rng
7
17
  @category_probabilities ||= @categories.map { |e| @rng.call }
@@ -9,6 +19,7 @@ class RailsDataExplorer
9
19
  @category_order = compute_category_order
10
20
  end
11
21
 
22
+ # Returns a random category
12
23
  def rand
13
24
  r_v = @rng.call
14
25
  rnd = @category_order.detect { |e|
@@ -17,6 +28,8 @@ class RailsDataExplorer
17
28
  rnd[:category]
18
29
  end
19
30
 
31
+ protected
32
+
20
33
  def normalize_category_probabilities
21
34
  total = @category_probabilities.inject(0) { |m,e| m += e }
22
35
  @category_probabilities.map { |e| e / total.to_f }
@@ -1,12 +1,23 @@
1
- # From http://stackoverflow.com/a/9266488
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
3
4
  module Statistics
5
+
6
+ # Responsibilities:
7
+ # * Provide random numeric data, following a gaussian distribution.
8
+ #
9
+ # From http://stackoverflow.com/a/9266488
4
10
  class RngGaussian
11
+
12
+ # @param mean [Float] the expected mean
13
+ # @param sd [Float] the expected standard deviation
14
+ # @param rng [Proc, optional] a random number generator
5
15
  def initialize(mean = 0.0, sd = 1.0, rng = lambda { Kernel.rand })
6
16
  @mean, @sd, @rng = mean, sd, rng
7
17
  @compute_next_pair = false
8
18
  end
9
19
 
20
+ # Returns random numbers with a gaussian distribution.
10
21
  def rand
11
22
  if (@compute_next_pair = !@compute_next_pair)
12
23
  # Compute a pair of random values with normal distribution.