rails-data-explorer 0.2.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -3
  3. data/README.md +2 -0
  4. data/lib/rails-data-explorer-no-rails.rb +36 -32
  5. data/lib/rails-data-explorer.rb +38 -35
  6. data/lib/rails_data_explorer.rb +29 -10
  7. data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
  8. data/lib/rails_data_explorer/active_record_extension.rb +19 -0
  9. data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
  10. data/lib/rails_data_explorer/chart/anova.rb +1 -0
  11. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
  12. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
  13. data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
  14. data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
  15. data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
  16. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
  17. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
  18. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
  19. data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
  20. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
  21. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
  22. data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
  23. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
  24. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
  25. data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
  26. data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
  27. data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
  28. data/lib/rails_data_explorer/data_series.rb +241 -0
  29. data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
  30. data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
  31. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
  32. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
  33. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
  34. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
  35. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
  36. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
  37. data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
  38. data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
  39. data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
  40. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
  41. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
  42. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
  43. data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
  44. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
  45. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
  46. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
  47. data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
  48. data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
  49. data/rails-data-explorer.gemspec +5 -6
  50. data/spec/rails_data_explorer/chart_spec.rb +11 -0
  51. data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
  52. data/spec/rails_data_explorer/data_set_spec.rb +31 -0
  53. data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
  54. data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
  55. data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
  56. data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
  57. data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
  58. data/spec/rails_data_explorer/data_type_spec.rb +7 -0
  59. data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
  60. data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
  61. data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
  62. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
  63. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
  64. data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
  65. data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
  66. metadata +91 -82
  67. data/lib/rails-data-explorer/active_record_extension.rb +0 -14
  68. data/lib/rails-data-explorer/constants.rb +0 -5
  69. data/lib/rails-data-explorer/data_series.rb +0 -156
  70. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
  71. data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -0,0 +1 @@
1
+ # http://en.wikipedia.org/wiki/Analysis_of_variance
@@ -1,7 +1,16 @@
1
- # http://johan.github.io/d3/ex/box.html
2
- # http://bl.ocks.org/mbostock/4061502
1
+ # -*- coding: utf-8 -*-
2
+
3
3
  class RailsDataExplorer
4
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a box plot for univariate analysis of a quantitative data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
12
+ # http://johan.github.io/d3/ex/box.html
13
+ # http://bl.ocks.org/mbostock/4061502
5
14
  class BoxPlot < Chart
6
15
 
7
16
  def initialize(_data_set, options = {})
@@ -18,7 +27,7 @@ class RailsDataExplorer
18
27
  min: x_ds.min_val,
19
28
  max: x_ds.max_val,
20
29
  base_width: 120,
21
- base_height: 800,
30
+ base_height: 960,
22
31
  axis_tick_format: x_ds.axis_tick_format,
23
32
  }
24
33
  end
@@ -1,16 +1,26 @@
1
- # http://bl.ocks.org/jensgrubert/7789216
2
- # http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
3
- # http://mbostock.github.io/protovis/ex/box-and-whisker.html
4
- # http://bl.ocks.org/mbostock/4061502
5
- # http://johan.github.io/d3/ex/box.html
6
- # http://johan.github.io/d3/ex/box.html
7
- # http://bl.ocks.org/mbostock/4061502
1
+ # -*- coding: utf-8 -*-
2
+
8
3
  class RailsDataExplorer
9
4
  class Chart
10
- class BoxPlotGroup < Chart
11
5
 
12
- # TODO: imitate this:
13
- # http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/
6
+ # Responsibilities:
7
+ # * Render a group of box plots for bivariate analysis of a categorical and
8
+ # a numerical data series. One box plot is rendered for each distinct
9
+ # categorical value.
10
+ #
11
+ # Collaborators:
12
+ # * DataSet
13
+ #
14
+ # http://bl.ocks.org/jensgrubert/7789216
15
+ # http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
16
+ # http://mbostock.github.io/protovis/ex/box-and-whisker.html
17
+ # http://bl.ocks.org/mbostock/4061502
18
+ # http://johan.github.io/d3/ex/box.html
19
+ # http://johan.github.io/d3/ex/box.html
20
+ # http://bl.ocks.org/mbostock/4061502
21
+ # TODO: imitate this:
22
+ # http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/
23
+ class BoxPlotGroup < Chart
14
24
 
15
25
  def initialize(_data_set, options = {})
16
26
  @data_set = _data_set
@@ -24,19 +34,16 @@ class RailsDataExplorer
24
34
  y_candidates = @data_set.data_series.find_all { |ds|
25
35
  (ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any?
26
36
  }
27
-
28
37
  x_ds = x_candidates.first
29
38
  y_ds = (y_candidates - [x_ds]).first
30
39
  return false if x_ds.nil? || y_ds.nil?
31
40
 
32
- min = x_ds.min_val # get global min
33
- max = x_ds.max_val # get global max
34
-
41
+ # initialize values_hash
35
42
  values_hash = y_ds.uniq_vals.inject({}) { |m,y_val|
36
43
  m[y_val] = []
37
44
  m
38
45
  }
39
-
46
+ # populate values hash
40
47
  y_ds.values.each_with_index { |y_val, idx|
41
48
  next if (y_val.nil? || Float::NAN == y_val)
42
49
  values_hash[y_val] << x_ds.values[idx]
@@ -49,16 +56,36 @@ class RailsDataExplorer
49
56
  )
50
57
  sorted_values = y_sorted_keys.map { |y_val| values_hash[y_val] }
51
58
 
59
+ # Compute min and max values based on interquartile range of each
60
+ # boxplot. Objective is to normalize boxplots so that the widest chart
61
+ # uses almost the entire space available.
62
+ # Iterate over all individual boxplots
63
+ global_min = Float::INFINITY
64
+ global_max = -Float::INFINITY
65
+ sorted_values.each { |x_vals|
66
+ ds = DataSeries.new('_', x_vals)
67
+ desc_stats = ds.descriptive_statistics
68
+ # compute first and third quartile. Use min and max if they are nil
69
+ # for very small data series with only one or two entries.
70
+ q1 = desc_stats.detect { |e| '25%ile' == e[:label] }[:value] || x_vals.min
71
+ q3 = desc_stats.detect { |e| '75%ile' == e[:label] }[:value] || x_vals.max
72
+ iqr = (q3 - q1) * 1.5
73
+ local_min = [x_vals.min, q1 - iqr].max
74
+ global_min = [global_min, local_min].min
75
+ local_max = [x_vals.max, q3 + iqr].min
76
+ global_max = [global_max, local_max].max
77
+ }
78
+
52
79
  {
53
80
  values: sorted_values,
54
81
  category_labels: y_sorted_keys,
55
- min: min,
56
- max: max,
57
- base_width: 120,
58
- base_height: 800,
82
+ min: global_min,
83
+ max: global_max,
84
+ base_width: 100,
85
+ base_height: 960,
59
86
  axis_tick_format: x_ds.axis_tick_format,
60
87
  num_box_plots: y_ds.uniq_vals_count,
61
- axis_scale: DataSeries.new('_', [min, max]).axis_scale(:d3)
88
+ axis_scale: DataSeries.new('_', [global_min, global_max]).axis_scale(:d3)
62
89
  }
63
90
  end
64
91
 
@@ -85,7 +112,7 @@ class RailsDataExplorer
85
112
  (function() {
86
113
  var base_width = #{ ca[:base_width] },
87
114
  base_height = #{ ca[:base_height] },
88
- margin = { top: 10, right: 50, bottom: 95, left: 50 },
115
+ margin = { top: 10, right: 40, bottom: 10, left: 40 },
89
116
  width = base_width - margin.left - margin.right,
90
117
  height = base_height - margin.top - margin.bottom;
91
118
 
@@ -106,7 +133,7 @@ class RailsDataExplorer
106
133
  var svg = d3.select("##{ dom_id }").selectAll("svg")
107
134
  .data(data)
108
135
  .append("g")
109
- .attr("transform", "rotate(90) translate(" + (width + margin.left) + " -" + (height + margin.bottom) + ")")
136
+ .attr("transform", "rotate(90) translate(" + margin.left + " -" + (height + margin.bottom) + ")")
110
137
  .call(chart);
111
138
 
112
139
  // Function to compute the interquartile range.
@@ -1,14 +1,25 @@
1
- # See this project for code to compute chi_square and contingency_coefficient
2
- # https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
3
- #
4
- # Resources for Chi Squared Test
5
- # * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
6
- # * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
7
- # * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
1
+ # -*- coding: utf-8 -*-
8
2
 
9
- # Contingency table and chi squared test is a good tool for interpreting A/B tests.
10
3
  class RailsDataExplorer
11
4
  class Chart
5
+
6
+ # Contingency table and chi squared test are great tools for interpreting
7
+ # A/B tests.
8
+ #
9
+ # Responsibilities:
10
+ # * Render a contingency table for bivariate analysis of two categorical
11
+ # data series.
12
+ #
13
+ # Collaborators:
14
+ # * DataSet
15
+ #
16
+ # See this project for code to compute chi_square and contingency_coefficient
17
+ # https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
18
+ #
19
+ # Resources for Chi Squared Test
20
+ # * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
21
+ # * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
22
+ # * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
12
23
  class ContingencyTable < Chart
13
24
 
14
25
  def initialize(_data_set, options = {})
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a table with descriptive statistics for a data series of any type.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class DescriptiveStatisticsTable < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a histogram for univariate analysis of a categorical data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class HistogramCategorical < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -12,7 +21,8 @@ class RailsDataExplorer
12
21
  return false if x_ds.nil?
13
22
 
14
23
  # compute histogram
15
- h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
24
+ val_mod = { name: :limit_distinct_values }
25
+ h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
16
26
  histogram_values_ds = DataSeries.new('_', h.values)
17
27
  y_scale_type = histogram_values_ds.axis_scale(:vega)
18
28
  bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
@@ -50,7 +60,7 @@ class RailsDataExplorer
50
60
  <script type="text/javascript">
51
61
  (function() {
52
62
  var spec = {
53
- "width": 800,
63
+ "width": 960,
54
64
  "height": 200,
55
65
  "padding": {"top": 10, "left": 70, "bottom": 50, "right": 10},
56
66
  "data": [
@@ -169,12 +179,6 @@ class RailsDataExplorer
169
179
  )
170
180
  end
171
181
 
172
- # Render HistogramCategorical only if there is a fairly small number of
173
- # distinct values.
174
- def render?
175
- !@data_set.data_series.first.has_many_uniq_vals?
176
- end
177
-
178
182
  end
179
183
  end
180
184
  end
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a histogram for univariate analysis of a quantitative data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class HistogramQuantitative < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -15,7 +24,7 @@ class RailsDataExplorer
15
24
  quantizer = Utils::DataQuantizer.new(x_ds, max_number_of_bins: 100)
16
25
  quantized_values = quantizer.values
17
26
  number_of_bars = quantizer.number_of_bins
18
- width = 800
27
+ width = 960
19
28
  h = quantized_values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
20
29
  histogram_values_ds = DataSeries.new('_', h.values)
21
30
  y_scale_type = histogram_values_ds.axis_scale(:vega)
@@ -33,6 +42,7 @@ class RailsDataExplorer
33
42
  y_scale_type: y_scale_type,
34
43
  y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
35
44
  bar_y2_val: bar_y2_val,
45
+ css_class: 'rde-histogram-quantitative',
36
46
  }
37
47
  end
38
48
 
@@ -45,7 +55,7 @@ class RailsDataExplorer
45
55
 
46
56
  def render_vega(ca)
47
57
  %(
48
- <div class="rde-chart rde-histogram-quantitative">
58
+ <div class="rde-chart #{ ca[:css_class] }">
49
59
  <h3 class="rde-chart-title">Histogram</h3>
50
60
  <div id="#{ dom_id }"></div>
51
61
  <script type="text/javascript">
@@ -1,6 +1,14 @@
1
- # TODO: could I use histogram_quantitative instead and just tweak the tick mark format?
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
3
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a histogram for univariate analysis of a temporal data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
4
12
  class HistogramTemporal < HistogramQuantitative
5
13
 
6
14
  def compute_chart_attrs
@@ -17,7 +25,7 @@ class RailsDataExplorer
17
25
  histogram_values_ds = DataSeries.new('_', h.values)
18
26
  y_scale_type = histogram_values_ds.axis_scale(:vega)
19
27
  bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
20
- width = 800
28
+ width = 960
21
29
  {
22
30
  values: h.map { |k,v| { x: k, y: v } },
23
31
  width: width,
@@ -31,6 +39,7 @@ class RailsDataExplorer
31
39
  y_scale_type: y_scale_type,
32
40
  y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
33
41
  bar_y2_val: bar_y2_val,
42
+ css_class: 'rde-histogram-temporal',
34
43
  }
35
44
  end
36
45
 
@@ -1 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  # http://dc-js.github.io/dc.js/
@@ -1,6 +1,16 @@
1
- # TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
1
+ # -*- coding: utf-8 -*-
2
+
2
3
  class RailsDataExplorer
3
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a parallel coordinates chart for multivariate analysis of
8
+ # a mix of quantitative, temporal, and categorical data series.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
13
+ # TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
4
14
  class ParallelCoordinates < Chart
5
15
 
6
16
  def initialize(_data_set, options = {})
@@ -1,7 +1,16 @@
1
- # http://www.jasondavies.com/parallel-sets/
2
- # Suitable when all data series are categorical
1
+ # -*- coding: utf-8 -*-
2
+
3
3
  class RailsDataExplorer
4
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a parallel set chart for multivariate analysis of categorical
8
+ # data series.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
13
+ # http://www.jasondavies.com/parallel-sets/
5
14
  class ParallelSet < Chart
6
15
 
7
16
  def initialize(_data_set, options = {})
@@ -1,5 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a pie chart for univariate analysis of a categorical data series.
8
+ #
9
+ # Collaborators:
10
+ # * DataSet
11
+ #
3
12
  class PieChart < Chart
4
13
 
5
14
  def initialize(_data_set, options = {})
@@ -11,9 +20,10 @@ class RailsDataExplorer
11
20
  x_ds = @data_set.data_series.first
12
21
  return false if x_ds.nil?
13
22
 
14
- total_count = x_ds.values.length
23
+ val_mod = { name: :limit_distinct_values }
24
+ total_count = x_ds.values(val_mod).length
15
25
  # compute histogram
16
- h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
26
+ h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
17
27
  {
18
28
  values: h.map { |k,v|
19
29
  { key: k, value: (v / total_count.to_f) }
@@ -137,12 +147,6 @@ class RailsDataExplorer
137
147
  )
138
148
  end
139
149
 
140
- # Render PieChart only if there is a fairly small number of
141
- # distinct values.
142
- def render?
143
- !@data_set.data_series.first.has_many_uniq_vals?
144
- end
145
-
146
150
  end
147
151
  end
148
152
  end
@@ -1,5 +1,17 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a scatter plot for either
8
+ # * bivariate analysis of two quantitative data series or
9
+ # * multivariate analysis of two quantitative and one categorical
10
+ # data series.
11
+ #
12
+ # Collaborators:
13
+ # * DataSet
14
+ #
3
15
  class Scatterplot < Chart
4
16
 
5
17
  def initialize(_data_set, options = {})
@@ -81,7 +93,7 @@ class RailsDataExplorer
81
93
  <script type="text/javascript">
82
94
  (function() {
83
95
  var spec = {
84
- "width": 800,
96
+ "width": 960,
85
97
  "height": 200,
86
98
  "data": [
87
99
  {
@@ -1 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  # http://benjiec.github.io/scatter-matrix/demo/demo.html#
@@ -1,6 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+
1
3
  class RailsDataExplorer
2
4
  class Chart
3
- class StackedBarChartCategoricalPercent < Chart
5
+
6
+ # Responsibilities:
7
+ # * Render a stacked bar chart for bivariate analysis of two categorical
8
+ # data series. Renders absolute frequencies of y-data series.
9
+ #
10
+ # Collaborators:
11
+ # * DataSet
12
+ #
13
+ class StackedBarChartCategorical < Chart
4
14
 
5
15
  def initialize(_data_set, options = {})
6
16
  @data_set = _data_set
@@ -8,9 +18,11 @@ class RailsDataExplorer
8
18
  end
9
19
 
10
20
  def compute_chart_attrs
21
+ val_mod = { name: :limit_distinct_values }
22
+
11
23
  x_candidates = @data_set.data_series.find_all { |ds|
12
24
  (ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:x, :any]).any?
13
- }.sort { |a,b| b.uniq_vals.length <=> a.uniq_vals.length }
25
+ }.sort { |a,b| b.uniq_vals_count(val_mod) <=> a.uniq_vals_count(val_mod) }
14
26
  y_candidates = @data_set.data_series.find_all { |ds|
15
27
  (ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:y, :any]).any?
16
28
  }
@@ -21,31 +33,30 @@ class RailsDataExplorer
21
33
 
22
34
  # initialize data_matrix
23
35
  data_matrix = { _sum: { _sum: 0 } }
24
- x_ds.uniq_vals.each { |x_val|
36
+ x_ds.uniq_vals(val_mod).each { |x_val|
25
37
  data_matrix[x_val] = {}
26
38
  data_matrix[x_val][:_sum] = 0
27
- y_ds.uniq_vals.each { |y_val|
39
+ y_ds.uniq_vals(val_mod).each { |y_val|
28
40
  data_matrix[x_val][y_val] = 0
29
41
  data_matrix[:_sum][y_val] = 0
30
42
  }
31
43
  }
32
44
  # populate data_matrix
33
- x_ds.values.length.times { |idx|
34
- x_val = x_ds.values[idx]
35
- y_val = y_ds.values[idx]
45
+ x_ds.values(val_mod).length.times { |idx|
46
+ x_val = x_ds.values(val_mod)[idx]
47
+ y_val = y_ds.values(val_mod)[idx]
36
48
  data_matrix[x_val][y_val] += 1
37
49
  data_matrix[:_sum][y_val] += 1
38
50
  data_matrix[x_val][:_sum] += 1
39
51
  data_matrix[:_sum][:_sum] += 1
40
52
  }
41
-
42
- x_sorted_keys = x_ds.uniq_vals.sort(
53
+ x_sorted_keys = x_ds.uniq_vals(val_mod).sort(
43
54
  &x_ds.label_sorter(
44
55
  nil,
45
56
  lambda { |a,b| data_matrix[b][:_sum] <=> data_matrix[a][:_sum] }
46
57
  )
47
58
  )
48
- y_sorted_keys = y_ds.uniq_vals.sort(
59
+ y_sorted_keys = y_ds.uniq_vals(val_mod).sort(
49
60
  &y_ds.label_sorter(
50
61
  nil,
51
62
  lambda { |a,b| data_matrix[:_sum][b] <=> data_matrix[:_sum][a] }
@@ -58,7 +69,7 @@ class RailsDataExplorer
58
69
  x_sorted_keys.map { |x_val|
59
70
  {
60
71
  x: x_val,
61
- y: (data_matrix[x_val][y_val] / data_matrix[x_val][:_sum].to_f) * 100,
72
+ y: compute_y_value(data_matrix, x_val, y_val),
62
73
  c: y_val
63
74
  }
64
75
  }
@@ -70,11 +81,22 @@ class RailsDataExplorer
70
81
  values: values,
71
82
  x_axis_label: x_ds.name,
72
83
  x_axis_tick_format: 'function(d) { return d }',
73
- y_axis_label: "#{ y_ds.name } distribution [%]",
84
+ y_axis_label: compute_y_axis_label(y_ds.name),
74
85
  y_axis_tick_format: "d3.format('.1%')",
75
86
  }
76
87
  end
77
88
 
89
+ # Override this method to change how the y value is computed. E.g., to
90
+ # change from absolute values to percentages.
91
+ def compute_y_value(data_matrix, x_val, y_val)
92
+ data_matrix[x_val][y_val]
93
+ end
94
+
95
+ # @param y_ds_name [String] name of the y data series
96
+ def compute_y_axis_label(y_ds_name)
97
+ "Frequency"
98
+ end
99
+
78
100
  def render
79
101
  return '' unless render?
80
102
  ca = compute_chart_attrs
@@ -90,9 +112,9 @@ class RailsDataExplorer
90
112
  <script type="text/javascript">
91
113
  (function() {
92
114
  var spec = {
93
- "width": 800,
115
+ "width": 960,
94
116
  "height": 200,
95
- "padding": {"top": 10, "left": 50, "bottom": 50, "right": 100},
117
+ "padding": {"top": 10, "left": 70, "bottom": 50, "right": 100},
96
118
  "data": [
97
119
  {
98
120
  "name": "table",
@@ -139,6 +161,7 @@ class RailsDataExplorer
139
161
  "scale": "y",
140
162
  "title": "#{ ca[:y_axis_label] }",
141
163
  "format": #{ ca[:y_axis_tick_format] },
164
+ "titleOffset": 60,
142
165
  }
143
166
  ],
144
167
  "marks": [