rails-data-explorer 0.2.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -3
  3. data/README.md +2 -0
  4. data/lib/rails-data-explorer-no-rails.rb +36 -32
  5. data/lib/rails-data-explorer.rb +38 -35
  6. data/lib/rails_data_explorer.rb +29 -10
  7. data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
  8. data/lib/rails_data_explorer/active_record_extension.rb +19 -0
  9. data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
  10. data/lib/rails_data_explorer/chart/anova.rb +1 -0
  11. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
  12. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
  13. data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
  14. data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
  15. data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
  16. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
  17. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
  18. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
  19. data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
  20. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
  21. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
  22. data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
  23. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
  24. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
  25. data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
  26. data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
  27. data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
  28. data/lib/rails_data_explorer/data_series.rb +241 -0
  29. data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
  30. data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
  31. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
  32. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
  33. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
  34. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
  35. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
  36. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
  37. data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
  38. data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
  39. data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
  40. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
  41. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
  42. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
  43. data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
  44. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
  45. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
  46. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
  47. data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
  48. data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
  49. data/rails-data-explorer.gemspec +5 -6
  50. data/spec/rails_data_explorer/chart_spec.rb +11 -0
  51. data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
  52. data/spec/rails_data_explorer/data_set_spec.rb +31 -0
  53. data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
  54. data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
  55. data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
  56. data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
  57. data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
  58. data/spec/rails_data_explorer/data_type_spec.rb +7 -0
  59. data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
  60. data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
  61. data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
  62. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
  63. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
  64. data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
  65. data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
  66. metadata +91 -82
  67. data/lib/rails-data-explorer/active_record_extension.rb +0 -14
  68. data/lib/rails-data-explorer/constants.rb +0 -5
  69. data/lib/rails-data-explorer/data_series.rb +0 -156
  70. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
  71. data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -0,0 +1 @@
1
+ # http://en.wikipedia.org/wiki/Analysis_of_variance
@@ -1,7 +1,16 @@
1
- # http://johan.github.io/d3/ex/box.html
2
- # http://bl.ocks.org/mbostock/4061502
1
+ # -*- coding: utf-8 -*-
2
+
3
3
  class RailsDataExplorer
4
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a box plot for univariate analysis of a quantitative data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
12
+ # http://johan.github.io/d3/ex/box.html
13
+ # http://bl.ocks.org/mbostock/4061502
5
14
  class BoxPlot < Chart
6
15
 
7
16
  def initialize(_data_set, options = {})
@@ -18,7 +27,7 @@ class RailsDataExplorer
18
27
  min: x_ds.min_val,
19
28
  max: x_ds.max_val,
20
29
  base_width: 120,
21
- base_height: 800,
30
+ base_height: 960,
22
31
  axis_tick_format: x_ds.axis_tick_format,
23
32
  }
24
33
  end
@@ -1,16 +1,26 @@
1
- # http://bl.ocks.org/jensgrubert/7789216
2
- # http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
3
- # http://mbostock.github.io/protovis/ex/box-and-whisker.html
4
- # http://bl.ocks.org/mbostock/4061502
5
- # http://johan.github.io/d3/ex/box.html
6
- # http://johan.github.io/d3/ex/box.html
7
- # http://bl.ocks.org/mbostock/4061502
1
+ # -*- coding: utf-8 -*-
2
+
8
3
  class RailsDataExplorer
9
4
  class Chart
10
- class BoxPlotGroup < Chart
11
5
 
12
- # TODO: imitate this:
13
- # http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/
6
+ # Responsibilities:
7
+ # * Render a group of box plots for bivariate analysis of a categorical and
8
+ # a numerical data series. One box plot is rendered for each distinct
9
+ # categorical value.
10
+ #
11
+ # Collaborators:
12
+ # * DataSet
13
+ #
14
+ # http://bl.ocks.org/jensgrubert/7789216
15
+ # http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
16
+ # http://mbostock.github.io/protovis/ex/box-and-whisker.html
17
+ # http://bl.ocks.org/mbostock/4061502
18
+ # http://johan.github.io/d3/ex/box.html
19
+ # http://johan.github.io/d3/ex/box.html
20
+ # http://bl.ocks.org/mbostock/4061502
21
+ # TODO: imitate this:
22
+ # http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/
23
+ class BoxPlotGroup < Chart
14
24
 
15
25
  def initialize(_data_set, options = {})
16
26
  @data_set = _data_set
@@ -24,19 +34,16 @@ class RailsDataExplorer
24
34
  y_candidates = @data_set.data_series.find_all { |ds|
25
35
  (ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any?
26
36
  }
27
-
28
37
  x_ds = x_candidates.first
29
38
  y_ds = (y_candidates - [x_ds]).first
30
39
  return false if x_ds.nil? || y_ds.nil?
31
40
 
32
- min = x_ds.min_val # get global min
33
- max = x_ds.max_val # get global max
34
-
41
+ # initialize values_hash
35
42
  values_hash = y_ds.uniq_vals.inject({}) { |m,y_val|
36
43
  m[y_val] = []
37
44
  m
38
45
  }
39
-
46
+ # populate values hash
40
47
  y_ds.values.each_with_index { |y_val, idx|
41
48
  next if (y_val.nil? || Float::NAN == y_val)
42
49
  values_hash[y_val] << x_ds.values[idx]
@@ -49,16 +56,36 @@ class RailsDataExplorer
49
56
  )
50
57
  sorted_values = y_sorted_keys.map { |y_val| values_hash[y_val] }
51
58
 
59
+ # Compute min and max values based on interquartile range of each
60
+ # boxplot. Objective is to normalize boxplots so that the widest chart
61
+ # uses almost the entire space available.
62
+ # Iterate over all individual boxplots
63
+ global_min = Float::INFINITY
64
+ global_max = -Float::INFINITY
65
+ sorted_values.each { |x_vals|
66
+ ds = DataSeries.new('_', x_vals)
67
+ desc_stats = ds.descriptive_statistics
68
+ # compute first and third quartile. Use min and max if they are nil
69
+ # for very small data series with only one or two entries.
70
+ q1 = desc_stats.detect { |e| '25%ile' == e[:label] }[:value] || x_vals.min
71
+ q3 = desc_stats.detect { |e| '75%ile' == e[:label] }[:value] || x_vals.max
72
+ iqr = (q3 - q1) * 1.5
73
+ local_min = [x_vals.min, q1 - iqr].max
74
+ global_min = [global_min, local_min].min
75
+ local_max = [x_vals.max, q3 + iqr].min
76
+ global_max = [global_max, local_max].max
77
+ }
78
+
52
79
  {
53
80
  values: sorted_values,
54
81
  category_labels: y_sorted_keys,
55
- min: min,
56
- max: max,
57
- base_width: 120,
58
- base_height: 800,
82
+ min: global_min,
83
+ max: global_max,
84
+ base_width: 100,
85
+ base_height: 960,
59
86
  axis_tick_format: x_ds.axis_tick_format,
60
87
  num_box_plots: y_ds.uniq_vals_count,
61
- axis_scale: DataSeries.new('_', [min, max]).axis_scale(:d3)
88
+ axis_scale: DataSeries.new('_', [global_min, global_max]).axis_scale(:d3)
62
89
  }
63
90
  end
64
91
 
@@ -85,7 +112,7 @@ class RailsDataExplorer
85
112
  (function() {
86
113
  var base_width = #{ ca[:base_width] },
87
114
  base_height = #{ ca[:base_height] },
88
- margin = { top: 10, right: 50, bottom: 95, left: 50 },
115
+ margin = { top: 10, right: 40, bottom: 10, left: 40 },
89
116
  width = base_width - margin.left - margin.right,
90
117
  height = base_height - margin.top - margin.bottom;
91
118
 
@@ -106,7 +133,7 @@ class RailsDataExplorer
106
133
  var svg = d3.select("##{ dom_id }").selectAll("svg")
107
134
  .data(data)
108
135
  .append("g")
109
- .attr("transform", "rotate(90) translate(" + (width + margin.left) + " -" + (height + margin.bottom) + ")")
136
+ .attr("transform", "rotate(90) translate(" + margin.left + " -" + (height + margin.bottom) + ")")
110
137
  .call(chart);
111
138
 
112
139
  // Function to compute the interquartile range.
@@ -1,14 +1,25 @@
1
- # See this project for code to compute chi_square and contingency_coefficient
2
- # https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
3
- #
4
- # Resources for Chi Squared Test
5
- # * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
6
- # * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
7
- # * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
1
+ # -*- coding: utf-8 -*-
8
2
 
9
- # Contingency table and chi squared test is a good tool for interpreting A/B tests.
10
3
  class RailsDataExplorer
11
4
  class Chart
5
+
6
+ # Contingency table and chi squared test are great tools for interpreting
7
+ # A/B tests.
8
+ #
9
+ # Responsibilities:
10
+ # * Render a contingency table for bivariate analysis of two categorical
11
+ # data series.
12
+ #
13
+ # Collaborators:
14
+ # * DataSet
15
+ #
16
+ # See this project for code to compute chi_square and contingency_coefficient
17
+ # https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
18
+ #
19
+ # Resources for Chi Squared Test
20
+ # * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
21
+ # * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
22
+ # * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
12
23
  class ContingencyTable < Chart
13
24
 
14
25
  def initialize(_data_set, options = {})
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a table with descriptive statistics for a data series of any type.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class DescriptiveStatisticsTable < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a histogram for univariate analysis of a categorical data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class HistogramCategorical < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -12,7 +21,8 @@ class RailsDataExplorer
12
21
  return false if x_ds.nil?
13
22
 
14
23
  # compute histogram
15
- h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
24
+ val_mod = { name: :limit_distinct_values }
25
+ h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
16
26
  histogram_values_ds = DataSeries.new('_', h.values)
17
27
  y_scale_type = histogram_values_ds.axis_scale(:vega)
18
28
  bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
@@ -50,7 +60,7 @@ class RailsDataExplorer
50
60
  <script type="text/javascript">
51
61
  (function() {
52
62
  var spec = {
53
- "width": 800,
63
+ "width": 960,
54
64
  "height": 200,
55
65
  "padding": {"top": 10, "left": 70, "bottom": 50, "right": 10},
56
66
  "data": [
@@ -169,12 +179,6 @@ class RailsDataExplorer
169
179
  )
170
180
  end
171
181
 
172
- # Render HistogramCategorical only if there is a fairly small number of
173
- # distinct values.
174
- def render?
175
- !@data_set.data_series.first.has_many_uniq_vals?
176
- end
177
-
178
182
  end
179
183
  end
180
184
  end
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a histogram for univariate analysis of a quantitative data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class HistogramQuantitative < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -15,7 +24,7 @@ class RailsDataExplorer
15
24
  quantizer = Utils::DataQuantizer.new(x_ds, max_number_of_bins: 100)
16
25
  quantized_values = quantizer.values
17
26
  number_of_bars = quantizer.number_of_bins
18
- width = 800
27
+ width = 960
19
28
  h = quantized_values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
20
29
  histogram_values_ds = DataSeries.new('_', h.values)
21
30
  y_scale_type = histogram_values_ds.axis_scale(:vega)
@@ -33,6 +42,7 @@ class RailsDataExplorer
33
42
  y_scale_type: y_scale_type,
34
43
  y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
35
44
  bar_y2_val: bar_y2_val,
45
+ css_class: 'rde-histogram-quantitative',
36
46
  }
37
47
  end
38
48
 
@@ -45,7 +55,7 @@ class RailsDataExplorer
45
55
 
46
56
  def render_vega(ca)
47
57
  %(
48
- <div class="rde-chart rde-histogram-quantitative">
58
+ <div class="rde-chart #{ ca[:css_class] }">
49
59
  <h3 class="rde-chart-title">Histogram</h3>
50
60
  <div id="#{ dom_id }"></div>
51
61
  <script type="text/javascript">
@@ -1,6 +1,14 @@
1
- # TODO: could I use histogram_quantitative instead and just tweak the tick mark format?
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
3
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a histogram for univariate analysis of a temporal data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
4
12
  class HistogramTemporal < HistogramQuantitative
5
13
 
6
14
  def compute_chart_attrs
@@ -17,7 +25,7 @@ class RailsDataExplorer
17
25
  histogram_values_ds = DataSeries.new('_', h.values)
18
26
  y_scale_type = histogram_values_ds.axis_scale(:vega)
19
27
  bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
20
- width = 800
28
+ width = 960
21
29
  {
22
30
  values: h.map { |k,v| { x: k, y: v } },
23
31
  width: width,
@@ -31,6 +39,7 @@ class RailsDataExplorer
31
39
  y_scale_type: y_scale_type,
32
40
  y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
33
41
  bar_y2_val: bar_y2_val,
42
+ css_class: 'rde-histogram-temporal',
34
43
  }
35
44
  end
36
45
 
@@ -1 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  # http://dc-js.github.io/dc.js/
@@ -1,6 +1,16 @@
1
- # TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
3
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a parallel coordinates chart for multivariate analysis of
8
+ # a mix of quantitative, temporal, and categorical data series.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
13
+ # TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
4
14
  class ParallelCoordinates < Chart
5
15
 
6
16
  def initialize(_data_set, options = {})
@@ -1,7 +1,16 @@
1
- # http://www.jasondavies.com/parallel-sets/
2
- # Suitable when all data series are categorical
1
+ # -*- coding: utf-8 -*-
2
+
3
3
  class RailsDataExplorer
4
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a parallel set chart for multivariate analysis of categorical
8
+ # data series.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
13
+ # http://www.jasondavies.com/parallel-sets/
5
14
  class ParallelSet < Chart
6
15
 
7
16
  def initialize(_data_set, options = {})
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a pie chart for univariate analysis of a categorical data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class PieChart < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -11,9 +20,10 @@ class RailsDataExplorer
11
20
  x_ds = @data_set.data_series.first
12
21
  return false if x_ds.nil?
13
22
 
14
- total_count = x_ds.values.length
23
+ val_mod = { name: :limit_distinct_values }
24
+ total_count = x_ds.values(val_mod).length
15
25
  # compute histogram
16
- h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
26
+ h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
17
27
  {
18
28
  values: h.map { |k,v|
19
29
  { key: k, value: (v / total_count.to_f) }
@@ -137,12 +147,6 @@ class RailsDataExplorer
137
147
  )
138
148
  end
139
149
 
140
- # Render PieChart only if there is a fairly small number of
141
- # distinct values.
142
- def render?
143
- !@data_set.data_series.first.has_many_uniq_vals?
144
- end
145
-
146
150
  end
147
151
  end
148
152
  end
@@ -1,5 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a scatter plot for either
8
+ # * bivariate analysis of two quantitative data series or
9
+ # * multivariate analysis of two quantitative and one categorical
10
+ # data series.
11
+ #
12
+ # Collaborators:
13
+ # * DataSet
14
+ #
3
15
  class Scatterplot < Chart
4
16
 
5
17
  def initialize(_data_set, options = {})
@@ -81,7 +93,7 @@ class RailsDataExplorer
81
93
  <script type="text/javascript">
82
94
  (function() {
83
95
  var spec = {
84
- "width": 800,
96
+ "width": 960,
85
97
  "height": 200,
86
98
  "data": [
87
99
  {
@@ -1 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  # http://benjiec.github.io/scatter-matrix/demo/demo.html#
@@ -1,6 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
3
- class StackedBarChartCategoricalPercent < Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a stacked bar chart for bivariate analysis of two categorical
8
+ # data series. Renders absolute frequencies of y-data series.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
13
+ class StackedBarChartCategorical < Chart
4
14
 
5
15
  def initialize(_data_set, options = {})
6
16
  @data_set = _data_set
@@ -8,9 +18,11 @@ class RailsDataExplorer
8
18
  end
9
19
 
10
20
  def compute_chart_attrs
21
+ val_mod = { name: :limit_distinct_values }
22
+
11
23
  x_candidates = @data_set.data_series.find_all { |ds|
12
24
  (ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:x, :any]).any?
13
- }.sort { |a,b| b.uniq_vals.length <=> a.uniq_vals.length }
25
+ }.sort { |a,b| b.uniq_vals_count(val_mod) <=> a.uniq_vals_count(val_mod) }
14
26
  y_candidates = @data_set.data_series.find_all { |ds|
15
27
  (ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:y, :any]).any?
16
28
  }
@@ -21,31 +33,30 @@ class RailsDataExplorer
21
33
 
22
34
  # initialize data_matrix
23
35
  data_matrix = { _sum: { _sum: 0 } }
24
- x_ds.uniq_vals.each { |x_val|
36
+ x_ds.uniq_vals(val_mod).each { |x_val|
25
37
  data_matrix[x_val] = {}
26
38
  data_matrix[x_val][:_sum] = 0
27
- y_ds.uniq_vals.each { |y_val|
39
+ y_ds.uniq_vals(val_mod).each { |y_val|
28
40
  data_matrix[x_val][y_val] = 0
29
41
  data_matrix[:_sum][y_val] = 0
30
42
  }
31
43
  }
32
44
  # populate data_matrix
33
- x_ds.values.length.times { |idx|
34
- x_val = x_ds.values[idx]
35
- y_val = y_ds.values[idx]
45
+ x_ds.values(val_mod).length.times { |idx|
46
+ x_val = x_ds.values(val_mod)[idx]
47
+ y_val = y_ds.values(val_mod)[idx]
36
48
  data_matrix[x_val][y_val] += 1
37
49
  data_matrix[:_sum][y_val] += 1
38
50
  data_matrix[x_val][:_sum] += 1
39
51
  data_matrix[:_sum][:_sum] += 1
40
52
  }
41
-
42
- x_sorted_keys = x_ds.uniq_vals.sort(
53
+ x_sorted_keys = x_ds.uniq_vals(val_mod).sort(
43
54
  &x_ds.label_sorter(
44
55
  nil,
45
56
  lambda { |a,b| data_matrix[b][:_sum] <=> data_matrix[a][:_sum] }
46
57
  )
47
58
  )
48
- y_sorted_keys = y_ds.uniq_vals.sort(
59
+ y_sorted_keys = y_ds.uniq_vals(val_mod).sort(
49
60
  &y_ds.label_sorter(
50
61
  nil,
51
62
  lambda { |a,b| data_matrix[:_sum][b] <=> data_matrix[:_sum][a] }
@@ -58,7 +69,7 @@ class RailsDataExplorer
58
69
  x_sorted_keys.map { |x_val|
59
70
  {
60
71
  x: x_val,
61
- y: (data_matrix[x_val][y_val] / data_matrix[x_val][:_sum].to_f) * 100,
72
+ y: compute_y_value(data_matrix, x_val, y_val),
62
73
  c: y_val
63
74
  }
64
75
  }
@@ -70,11 +81,22 @@ class RailsDataExplorer
70
81
  values: values,
71
82
  x_axis_label: x_ds.name,
72
83
  x_axis_tick_format: 'function(d) { return d }',
73
- y_axis_label: "#{ y_ds.name } distribution [%]",
84
+ y_axis_label: compute_y_axis_label(y_ds.name),
74
85
  y_axis_tick_format: "d3.format('.1%')",
75
86
  }
76
87
  end
77
88
 
89
+ # Override this method to change how the y value is computed. E.g., to
90
+ # change from absolute values to percentages.
91
+ def compute_y_value(data_matrix, x_val, y_val)
92
+ data_matrix[x_val][y_val]
93
+ end
94
+
95
+ # @param y_ds_name [String] name of the y data series
96
+ def compute_y_axis_label(y_ds_name)
97
+ "Frequency"
98
+ end
99
+
78
100
  def render
79
101
  return '' unless render?
80
102
  ca = compute_chart_attrs
@@ -90,9 +112,9 @@ class RailsDataExplorer
90
112
  <script type="text/javascript">
91
113
  (function() {
92
114
  var spec = {
93
- "width": 800,
115
+ "width": 960,
94
116
  "height": 200,
95
- "padding": {"top": 10, "left": 50, "bottom": 50, "right": 100},
117
+ "padding": {"top": 10, "left": 70, "bottom": 50, "right": 100},
96
118
  "data": [
97
119
  {
98
120
  "name": "table",
@@ -139,6 +161,7 @@ class RailsDataExplorer
139
161
  "scale": "y",
140
162
  "title": "#{ ca[:y_axis_label] }",
141
163
  "format": #{ ca[:y_axis_tick_format] },
164
+ "titleOffset": 60,
142
165
  }
143
166
  ],
144
167
  "marks": [