rails-data-explorer 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -3
- data/README.md +2 -0
- data/lib/rails-data-explorer-no-rails.rb +36 -32
- data/lib/rails-data-explorer.rb +38 -35
- data/lib/rails_data_explorer.rb +29 -10
- data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
- data/lib/rails_data_explorer/active_record_extension.rb +19 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
- data/lib/rails_data_explorer/chart/anova.rb +1 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
- data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
- data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
- data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
- data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
- data/lib/rails_data_explorer/data_series.rb +241 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
- data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
- data/rails-data-explorer.gemspec +5 -6
- data/spec/rails_data_explorer/chart_spec.rb +11 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
- data/spec/rails_data_explorer/data_set_spec.rb +31 -0
- data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
- data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
- data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
- data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
- data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
- data/spec/rails_data_explorer/data_type_spec.rb +7 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
- data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
- data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
- data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
- data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
- metadata +91 -82
- data/lib/rails-data-explorer/active_record_extension.rb +0 -14
- data/lib/rails-data-explorer/constants.rb +0 -5
- data/lib/rails-data-explorer/data_series.rb +0 -156
- data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
- data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -0,0 +1 @@
|
|
1
|
+
# http://en.wikipedia.org/wiki/Analysis_of_variance
|
@@ -1,7 +1,16 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
3
|
class RailsDataExplorer
|
4
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a box plot for univariate analysis of a quantitative data series.
|
8
|
+
#
|
9
|
+
# Collaborators:
|
10
|
+
# * DataSet
|
11
|
+
#
|
12
|
+
# http://johan.github.io/d3/ex/box.html
|
13
|
+
# http://bl.ocks.org/mbostock/4061502
|
5
14
|
class BoxPlot < Chart
|
6
15
|
|
7
16
|
def initialize(_data_set, options = {})
|
@@ -18,7 +27,7 @@ class RailsDataExplorer
|
|
18
27
|
min: x_ds.min_val,
|
19
28
|
max: x_ds.max_val,
|
20
29
|
base_width: 120,
|
21
|
-
base_height:
|
30
|
+
base_height: 960,
|
22
31
|
axis_tick_format: x_ds.axis_tick_format,
|
23
32
|
}
|
24
33
|
end
|
@@ -1,16 +1,26 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
# http://mbostock.github.io/protovis/ex/box-and-whisker.html
|
4
|
-
# http://bl.ocks.org/mbostock/4061502
|
5
|
-
# http://johan.github.io/d3/ex/box.html
|
6
|
-
# http://johan.github.io/d3/ex/box.html
|
7
|
-
# http://bl.ocks.org/mbostock/4061502
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
8
3
|
class RailsDataExplorer
|
9
4
|
class Chart
|
10
|
-
class BoxPlotGroup < Chart
|
11
5
|
|
12
|
-
#
|
13
|
-
#
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a group of box plots for bivariate analysis of a categorical and
|
8
|
+
# a numerical data series. One box plot is rendered for each distinct
|
9
|
+
# categorical value.
|
10
|
+
#
|
11
|
+
# Collaborators:
|
12
|
+
# * DataSet
|
13
|
+
#
|
14
|
+
# http://bl.ocks.org/jensgrubert/7789216
|
15
|
+
# http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
|
16
|
+
# http://mbostock.github.io/protovis/ex/box-and-whisker.html
|
17
|
+
# http://bl.ocks.org/mbostock/4061502
|
18
|
+
# http://johan.github.io/d3/ex/box.html
|
19
|
+
# http://johan.github.io/d3/ex/box.html
|
20
|
+
# http://bl.ocks.org/mbostock/4061502
|
21
|
+
# TODO: imitate this:
|
22
|
+
# http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/
|
23
|
+
class BoxPlotGroup < Chart
|
14
24
|
|
15
25
|
def initialize(_data_set, options = {})
|
16
26
|
@data_set = _data_set
|
@@ -24,19 +34,16 @@ class RailsDataExplorer
|
|
24
34
|
y_candidates = @data_set.data_series.find_all { |ds|
|
25
35
|
(ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any?
|
26
36
|
}
|
27
|
-
|
28
37
|
x_ds = x_candidates.first
|
29
38
|
y_ds = (y_candidates - [x_ds]).first
|
30
39
|
return false if x_ds.nil? || y_ds.nil?
|
31
40
|
|
32
|
-
|
33
|
-
max = x_ds.max_val # get global max
|
34
|
-
|
41
|
+
# initialize values_hash
|
35
42
|
values_hash = y_ds.uniq_vals.inject({}) { |m,y_val|
|
36
43
|
m[y_val] = []
|
37
44
|
m
|
38
45
|
}
|
39
|
-
|
46
|
+
# populate values hash
|
40
47
|
y_ds.values.each_with_index { |y_val, idx|
|
41
48
|
next if (y_val.nil? || Float::NAN == y_val)
|
42
49
|
values_hash[y_val] << x_ds.values[idx]
|
@@ -49,16 +56,36 @@ class RailsDataExplorer
|
|
49
56
|
)
|
50
57
|
sorted_values = y_sorted_keys.map { |y_val| values_hash[y_val] }
|
51
58
|
|
59
|
+
# Compute min and max values based on interquartile range of each
|
60
|
+
# boxplot. Objective is to normalize boxplots so that the widest chart
|
61
|
+
# uses almost the entire space available.
|
62
|
+
# Iterate over all individual boxplots
|
63
|
+
global_min = Float::INFINITY
|
64
|
+
global_max = -Float::INFINITY
|
65
|
+
sorted_values.each { |x_vals|
|
66
|
+
ds = DataSeries.new('_', x_vals)
|
67
|
+
desc_stats = ds.descriptive_statistics
|
68
|
+
# compute first and third quartile. Use min and max if they are nil
|
69
|
+
# for very small data series with only one or two entries.
|
70
|
+
q1 = desc_stats.detect { |e| '25%ile' == e[:label] }[:value] || x_vals.min
|
71
|
+
q3 = desc_stats.detect { |e| '75%ile' == e[:label] }[:value] || x_vals.max
|
72
|
+
iqr = (q3 - q1) * 1.5
|
73
|
+
local_min = [x_vals.min, q1 - iqr].max
|
74
|
+
global_min = [global_min, local_min].min
|
75
|
+
local_max = [x_vals.max, q3 + iqr].min
|
76
|
+
global_max = [global_max, local_max].max
|
77
|
+
}
|
78
|
+
|
52
79
|
{
|
53
80
|
values: sorted_values,
|
54
81
|
category_labels: y_sorted_keys,
|
55
|
-
min:
|
56
|
-
max:
|
57
|
-
base_width:
|
58
|
-
base_height:
|
82
|
+
min: global_min,
|
83
|
+
max: global_max,
|
84
|
+
base_width: 100,
|
85
|
+
base_height: 960,
|
59
86
|
axis_tick_format: x_ds.axis_tick_format,
|
60
87
|
num_box_plots: y_ds.uniq_vals_count,
|
61
|
-
axis_scale: DataSeries.new('_', [
|
88
|
+
axis_scale: DataSeries.new('_', [global_min, global_max]).axis_scale(:d3)
|
62
89
|
}
|
63
90
|
end
|
64
91
|
|
@@ -85,7 +112,7 @@ class RailsDataExplorer
|
|
85
112
|
(function() {
|
86
113
|
var base_width = #{ ca[:base_width] },
|
87
114
|
base_height = #{ ca[:base_height] },
|
88
|
-
margin = { top: 10, right:
|
115
|
+
margin = { top: 10, right: 40, bottom: 10, left: 40 },
|
89
116
|
width = base_width - margin.left - margin.right,
|
90
117
|
height = base_height - margin.top - margin.bottom;
|
91
118
|
|
@@ -106,7 +133,7 @@ class RailsDataExplorer
|
|
106
133
|
var svg = d3.select("##{ dom_id }").selectAll("svg")
|
107
134
|
.data(data)
|
108
135
|
.append("g")
|
109
|
-
.attr("transform", "rotate(90) translate(" +
|
136
|
+
.attr("transform", "rotate(90) translate(" + margin.left + " -" + (height + margin.bottom) + ")")
|
110
137
|
.call(chart);
|
111
138
|
|
112
139
|
// Function to compute the interquartile range.
|
@@ -1,14 +1,25 @@
|
|
1
|
-
#
|
2
|
-
# https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
|
3
|
-
#
|
4
|
-
# Resources for Chi Squared Test
|
5
|
-
# * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
|
6
|
-
# * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
|
7
|
-
# * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
|
1
|
+
# -*- coding: utf-8 -*-
|
8
2
|
|
9
|
-
# Contingency table and chi squared test is a good tool for interpreting A/B tests.
|
10
3
|
class RailsDataExplorer
|
11
4
|
class Chart
|
5
|
+
|
6
|
+
# Contingency table and chi squared test are great tools for interpreting
|
7
|
+
# A/B tests.
|
8
|
+
#
|
9
|
+
# Responsibilities:
|
10
|
+
# * Render a contingency table for bivariate analysis of two categorical
|
11
|
+
# data series.
|
12
|
+
#
|
13
|
+
# Collaborators:
|
14
|
+
# * DataSet
|
15
|
+
#
|
16
|
+
# See this project for code to compute chi_square and contingency_coefficient
|
17
|
+
# https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
|
18
|
+
#
|
19
|
+
# Resources for Chi Squared Test
|
20
|
+
# * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
|
21
|
+
# * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
|
22
|
+
# * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
|
12
23
|
class ContingencyTable < Chart
|
13
24
|
|
14
25
|
def initialize(_data_set, options = {})
|
@@ -1,5 +1,14 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
class RailsDataExplorer
|
2
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a table with descriptive statistics for a data series of any type.
|
8
|
+
#
|
9
|
+
# Collaborators:
|
10
|
+
# * DataSet
|
11
|
+
#
|
3
12
|
class DescriptiveStatisticsTable < Chart
|
4
13
|
|
5
14
|
def initialize(_data_set, options = {})
|
@@ -0,0 +1 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
@@ -1,5 +1,14 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
class RailsDataExplorer
|
2
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a histogram for univariate analysis of a categorical data series.
|
8
|
+
#
|
9
|
+
# Collaborators:
|
10
|
+
# * DataSet
|
11
|
+
#
|
3
12
|
class HistogramCategorical < Chart
|
4
13
|
|
5
14
|
def initialize(_data_set, options = {})
|
@@ -12,7 +21,8 @@ class RailsDataExplorer
|
|
12
21
|
return false if x_ds.nil?
|
13
22
|
|
14
23
|
# compute histogram
|
15
|
-
|
24
|
+
val_mod = { name: :limit_distinct_values }
|
25
|
+
h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
16
26
|
histogram_values_ds = DataSeries.new('_', h.values)
|
17
27
|
y_scale_type = histogram_values_ds.axis_scale(:vega)
|
18
28
|
bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
|
@@ -50,7 +60,7 @@ class RailsDataExplorer
|
|
50
60
|
<script type="text/javascript">
|
51
61
|
(function() {
|
52
62
|
var spec = {
|
53
|
-
"width":
|
63
|
+
"width": 960,
|
54
64
|
"height": 200,
|
55
65
|
"padding": {"top": 10, "left": 70, "bottom": 50, "right": 10},
|
56
66
|
"data": [
|
@@ -169,12 +179,6 @@ class RailsDataExplorer
|
|
169
179
|
)
|
170
180
|
end
|
171
181
|
|
172
|
-
# Render HistogramCategorical only if there is a fairly small number of
|
173
|
-
# distinct values.
|
174
|
-
def render?
|
175
|
-
!@data_set.data_series.first.has_many_uniq_vals?
|
176
|
-
end
|
177
|
-
|
178
182
|
end
|
179
183
|
end
|
180
184
|
end
|
@@ -1,5 +1,14 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
class RailsDataExplorer
|
2
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a histogram for univariate analysis of a quantitative data series.
|
8
|
+
#
|
9
|
+
# Collaborators:
|
10
|
+
# * DataSet
|
11
|
+
#
|
3
12
|
class HistogramQuantitative < Chart
|
4
13
|
|
5
14
|
def initialize(_data_set, options = {})
|
@@ -15,7 +24,7 @@ class RailsDataExplorer
|
|
15
24
|
quantizer = Utils::DataQuantizer.new(x_ds, max_number_of_bins: 100)
|
16
25
|
quantized_values = quantizer.values
|
17
26
|
number_of_bars = quantizer.number_of_bins
|
18
|
-
width =
|
27
|
+
width = 960
|
19
28
|
h = quantized_values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
20
29
|
histogram_values_ds = DataSeries.new('_', h.values)
|
21
30
|
y_scale_type = histogram_values_ds.axis_scale(:vega)
|
@@ -33,6 +42,7 @@ class RailsDataExplorer
|
|
33
42
|
y_scale_type: y_scale_type,
|
34
43
|
y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
|
35
44
|
bar_y2_val: bar_y2_val,
|
45
|
+
css_class: 'rde-histogram-quantitative',
|
36
46
|
}
|
37
47
|
end
|
38
48
|
|
@@ -45,7 +55,7 @@ class RailsDataExplorer
|
|
45
55
|
|
46
56
|
def render_vega(ca)
|
47
57
|
%(
|
48
|
-
<div class="rde-chart
|
58
|
+
<div class="rde-chart #{ ca[:css_class] }">
|
49
59
|
<h3 class="rde-chart-title">Histogram</h3>
|
50
60
|
<div id="#{ dom_id }"></div>
|
51
61
|
<script type="text/javascript">
|
@@ -1,6 +1,14 @@
|
|
1
|
-
#
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
2
3
|
class RailsDataExplorer
|
3
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a histogram for univariate analysis of a temporal data series.
|
8
|
+
#
|
9
|
+
# Collaborators:
|
10
|
+
# * DataSet
|
11
|
+
#
|
4
12
|
class HistogramTemporal < HistogramQuantitative
|
5
13
|
|
6
14
|
def compute_chart_attrs
|
@@ -17,7 +25,7 @@ class RailsDataExplorer
|
|
17
25
|
histogram_values_ds = DataSeries.new('_', h.values)
|
18
26
|
y_scale_type = histogram_values_ds.axis_scale(:vega)
|
19
27
|
bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
|
20
|
-
width =
|
28
|
+
width = 960
|
21
29
|
{
|
22
30
|
values: h.map { |k,v| { x: k, y: v } },
|
23
31
|
width: width,
|
@@ -31,6 +39,7 @@ class RailsDataExplorer
|
|
31
39
|
y_scale_type: y_scale_type,
|
32
40
|
y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
|
33
41
|
bar_y2_val: bar_y2_val,
|
42
|
+
css_class: 'rde-histogram-temporal',
|
34
43
|
}
|
35
44
|
end
|
36
45
|
|
@@ -1,6 +1,16 @@
|
|
1
|
-
#
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
2
3
|
class RailsDataExplorer
|
3
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a parallel coordinates chart for multivariate analysis of
|
8
|
+
# a mix of quantitative, temporal, and categorical data series.
|
9
|
+
#
|
10
|
+
# Collaborators:
|
11
|
+
# * DataSet
|
12
|
+
#
|
13
|
+
# TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
|
4
14
|
class ParallelCoordinates < Chart
|
5
15
|
|
6
16
|
def initialize(_data_set, options = {})
|
@@ -1,7 +1,16 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
3
|
class RailsDataExplorer
|
4
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a parallel set chart for multivariate analysis of categorical
|
8
|
+
# data series.
|
9
|
+
#
|
10
|
+
# Collaborators:
|
11
|
+
# * DataSet
|
12
|
+
#
|
13
|
+
# http://www.jasondavies.com/parallel-sets/
|
5
14
|
class ParallelSet < Chart
|
6
15
|
|
7
16
|
def initialize(_data_set, options = {})
|
@@ -1,5 +1,14 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
class RailsDataExplorer
|
2
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a pie chart for univariate analysis of a categorical data series.
|
8
|
+
#
|
9
|
+
# Collaborators:
|
10
|
+
# * DataSet
|
11
|
+
#
|
3
12
|
class PieChart < Chart
|
4
13
|
|
5
14
|
def initialize(_data_set, options = {})
|
@@ -11,9 +20,10 @@ class RailsDataExplorer
|
|
11
20
|
x_ds = @data_set.data_series.first
|
12
21
|
return false if x_ds.nil?
|
13
22
|
|
14
|
-
|
23
|
+
val_mod = { name: :limit_distinct_values }
|
24
|
+
total_count = x_ds.values(val_mod).length
|
15
25
|
# compute histogram
|
16
|
-
h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
26
|
+
h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
17
27
|
{
|
18
28
|
values: h.map { |k,v|
|
19
29
|
{ key: k, value: (v / total_count.to_f) }
|
@@ -137,12 +147,6 @@ class RailsDataExplorer
|
|
137
147
|
)
|
138
148
|
end
|
139
149
|
|
140
|
-
# Render PieChart only if there is a fairly small number of
|
141
|
-
# distinct values.
|
142
|
-
def render?
|
143
|
-
!@data_set.data_series.first.has_many_uniq_vals?
|
144
|
-
end
|
145
|
-
|
146
150
|
end
|
147
151
|
end
|
148
152
|
end
|
@@ -1,5 +1,17 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
class RailsDataExplorer
|
2
4
|
class Chart
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a scatter plot for either
|
8
|
+
# * bivariate analysis of two quantitative data series or
|
9
|
+
# * multivariate analysis of two quantitative and one categorical
|
10
|
+
# data series.
|
11
|
+
#
|
12
|
+
# Collaborators:
|
13
|
+
# * DataSet
|
14
|
+
#
|
3
15
|
class Scatterplot < Chart
|
4
16
|
|
5
17
|
def initialize(_data_set, options = {})
|
@@ -81,7 +93,7 @@ class RailsDataExplorer
|
|
81
93
|
<script type="text/javascript">
|
82
94
|
(function() {
|
83
95
|
var spec = {
|
84
|
-
"width":
|
96
|
+
"width": 960,
|
85
97
|
"height": 200,
|
86
98
|
"data": [
|
87
99
|
{
|
@@ -1,6 +1,16 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
class RailsDataExplorer
|
2
4
|
class Chart
|
3
|
-
|
5
|
+
|
6
|
+
# Responsibilities:
|
7
|
+
# * Render a stacked bar chart for bivariate analysis of two categorical
|
8
|
+
# data series. Renders absolute frequencies of y-data series.
|
9
|
+
#
|
10
|
+
# Collaborators:
|
11
|
+
# * DataSet
|
12
|
+
#
|
13
|
+
class StackedBarChartCategorical < Chart
|
4
14
|
|
5
15
|
def initialize(_data_set, options = {})
|
6
16
|
@data_set = _data_set
|
@@ -8,9 +18,11 @@ class RailsDataExplorer
|
|
8
18
|
end
|
9
19
|
|
10
20
|
def compute_chart_attrs
|
21
|
+
val_mod = { name: :limit_distinct_values }
|
22
|
+
|
11
23
|
x_candidates = @data_set.data_series.find_all { |ds|
|
12
24
|
(ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:x, :any]).any?
|
13
|
-
}.sort { |a,b| b.
|
25
|
+
}.sort { |a,b| b.uniq_vals_count(val_mod) <=> a.uniq_vals_count(val_mod) }
|
14
26
|
y_candidates = @data_set.data_series.find_all { |ds|
|
15
27
|
(ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:y, :any]).any?
|
16
28
|
}
|
@@ -21,31 +33,30 @@ class RailsDataExplorer
|
|
21
33
|
|
22
34
|
# initialize data_matrix
|
23
35
|
data_matrix = { _sum: { _sum: 0 } }
|
24
|
-
x_ds.uniq_vals.each { |x_val|
|
36
|
+
x_ds.uniq_vals(val_mod).each { |x_val|
|
25
37
|
data_matrix[x_val] = {}
|
26
38
|
data_matrix[x_val][:_sum] = 0
|
27
|
-
y_ds.uniq_vals.each { |y_val|
|
39
|
+
y_ds.uniq_vals(val_mod).each { |y_val|
|
28
40
|
data_matrix[x_val][y_val] = 0
|
29
41
|
data_matrix[:_sum][y_val] = 0
|
30
42
|
}
|
31
43
|
}
|
32
44
|
# populate data_matrix
|
33
|
-
x_ds.values.length.times { |idx|
|
34
|
-
x_val = x_ds.values[idx]
|
35
|
-
y_val = y_ds.values[idx]
|
45
|
+
x_ds.values(val_mod).length.times { |idx|
|
46
|
+
x_val = x_ds.values(val_mod)[idx]
|
47
|
+
y_val = y_ds.values(val_mod)[idx]
|
36
48
|
data_matrix[x_val][y_val] += 1
|
37
49
|
data_matrix[:_sum][y_val] += 1
|
38
50
|
data_matrix[x_val][:_sum] += 1
|
39
51
|
data_matrix[:_sum][:_sum] += 1
|
40
52
|
}
|
41
|
-
|
42
|
-
x_sorted_keys = x_ds.uniq_vals.sort(
|
53
|
+
x_sorted_keys = x_ds.uniq_vals(val_mod).sort(
|
43
54
|
&x_ds.label_sorter(
|
44
55
|
nil,
|
45
56
|
lambda { |a,b| data_matrix[b][:_sum] <=> data_matrix[a][:_sum] }
|
46
57
|
)
|
47
58
|
)
|
48
|
-
y_sorted_keys = y_ds.uniq_vals.sort(
|
59
|
+
y_sorted_keys = y_ds.uniq_vals(val_mod).sort(
|
49
60
|
&y_ds.label_sorter(
|
50
61
|
nil,
|
51
62
|
lambda { |a,b| data_matrix[:_sum][b] <=> data_matrix[:_sum][a] }
|
@@ -58,7 +69,7 @@ class RailsDataExplorer
|
|
58
69
|
x_sorted_keys.map { |x_val|
|
59
70
|
{
|
60
71
|
x: x_val,
|
61
|
-
y: (data_matrix
|
72
|
+
y: compute_y_value(data_matrix, x_val, y_val),
|
62
73
|
c: y_val
|
63
74
|
}
|
64
75
|
}
|
@@ -70,11 +81,22 @@ class RailsDataExplorer
|
|
70
81
|
values: values,
|
71
82
|
x_axis_label: x_ds.name,
|
72
83
|
x_axis_tick_format: 'function(d) { return d }',
|
73
|
-
y_axis_label:
|
84
|
+
y_axis_label: compute_y_axis_label(y_ds.name),
|
74
85
|
y_axis_tick_format: "d3.format('.1%')",
|
75
86
|
}
|
76
87
|
end
|
77
88
|
|
89
|
+
# Override this method to change how the y value is computed. E.g., to
|
90
|
+
# change from absolute values to percentages.
|
91
|
+
def compute_y_value(data_matrix, x_val, y_val)
|
92
|
+
data_matrix[x_val][y_val]
|
93
|
+
end
|
94
|
+
|
95
|
+
# @param y_ds_name [String] name of the y data series
|
96
|
+
def compute_y_axis_label(y_ds_name)
|
97
|
+
"Frequency"
|
98
|
+
end
|
99
|
+
|
78
100
|
def render
|
79
101
|
return '' unless render?
|
80
102
|
ca = compute_chart_attrs
|
@@ -90,9 +112,9 @@ class RailsDataExplorer
|
|
90
112
|
<script type="text/javascript">
|
91
113
|
(function() {
|
92
114
|
var spec = {
|
93
|
-
"width":
|
115
|
+
"width": 960,
|
94
116
|
"height": 200,
|
95
|
-
"padding": {"top": 10, "left":
|
117
|
+
"padding": {"top": 10, "left": 70, "bottom": 50, "right": 100},
|
96
118
|
"data": [
|
97
119
|
{
|
98
120
|
"name": "table",
|
@@ -139,6 +161,7 @@ class RailsDataExplorer
|
|
139
161
|
"scale": "y",
|
140
162
|
"title": "#{ ca[:y_axis_label] }",
|
141
163
|
"format": #{ ca[:y_axis_tick_format] },
|
164
|
+
"titleOffset": 60,
|
142
165
|
}
|
143
166
|
],
|
144
167
|
"marks": [
|