rails-data-explorer 0.2.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -3
- data/README.md +2 -0
- data/lib/rails-data-explorer-no-rails.rb +36 -32
- data/lib/rails-data-explorer.rb +38 -35
- data/lib/rails_data_explorer.rb +29 -10
- data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
- data/lib/rails_data_explorer/active_record_extension.rb +19 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
- data/lib/rails_data_explorer/chart/anova.rb +1 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
- data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
- data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
- data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
- data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
- data/lib/rails_data_explorer/data_series.rb +241 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
- data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
- data/rails-data-explorer.gemspec +5 -6
- data/spec/rails_data_explorer/chart_spec.rb +11 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
- data/spec/rails_data_explorer/data_set_spec.rb +31 -0
- data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
- data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
- data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
- data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
- data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
- data/spec/rails_data_explorer/data_type_spec.rb +7 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
- data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
- data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
- data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
- data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
- metadata +91 -82
- data/lib/rails-data-explorer/active_record_extension.rb +0 -14
- data/lib/rails-data-explorer/constants.rb +0 -5
- data/lib/rails-data-explorer/data_series.rb +0 -156
- data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
- data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# http://en.wikipedia.org/wiki/Analysis_of_variance
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
3
|
class RailsDataExplorer
|
|
4
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a box plot for univariate analysis of a quantitative data series.
|
|
8
|
+
#
|
|
9
|
+
# Collaborators:
|
|
10
|
+
# * DataSet
|
|
11
|
+
#
|
|
12
|
+
# http://johan.github.io/d3/ex/box.html
|
|
13
|
+
# http://bl.ocks.org/mbostock/4061502
|
|
5
14
|
class BoxPlot < Chart
|
|
6
15
|
|
|
7
16
|
def initialize(_data_set, options = {})
|
|
@@ -18,7 +27,7 @@ class RailsDataExplorer
|
|
|
18
27
|
min: x_ds.min_val,
|
|
19
28
|
max: x_ds.max_val,
|
|
20
29
|
base_width: 120,
|
|
21
|
-
base_height:
|
|
30
|
+
base_height: 960,
|
|
22
31
|
axis_tick_format: x_ds.axis_tick_format,
|
|
23
32
|
}
|
|
24
33
|
end
|
|
@@ -1,16 +1,26 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
# http://mbostock.github.io/protovis/ex/box-and-whisker.html
|
|
4
|
-
# http://bl.ocks.org/mbostock/4061502
|
|
5
|
-
# http://johan.github.io/d3/ex/box.html
|
|
6
|
-
# http://johan.github.io/d3/ex/box.html
|
|
7
|
-
# http://bl.ocks.org/mbostock/4061502
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
8
3
|
class RailsDataExplorer
|
|
9
4
|
class Chart
|
|
10
|
-
class BoxPlotGroup < Chart
|
|
11
5
|
|
|
12
|
-
#
|
|
13
|
-
#
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a group of box plots for bivariate analysis of a categorical and
|
|
8
|
+
# a numerical data series. One box plot is rendered for each distinct
|
|
9
|
+
# categorical value.
|
|
10
|
+
#
|
|
11
|
+
# Collaborators:
|
|
12
|
+
# * DataSet
|
|
13
|
+
#
|
|
14
|
+
# http://bl.ocks.org/jensgrubert/7789216
|
|
15
|
+
# http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
|
|
16
|
+
# http://mbostock.github.io/protovis/ex/box-and-whisker.html
|
|
17
|
+
# http://bl.ocks.org/mbostock/4061502
|
|
18
|
+
# http://johan.github.io/d3/ex/box.html
|
|
19
|
+
# http://johan.github.io/d3/ex/box.html
|
|
20
|
+
# http://bl.ocks.org/mbostock/4061502
|
|
21
|
+
# TODO: imitate this:
|
|
22
|
+
# http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/
|
|
23
|
+
class BoxPlotGroup < Chart
|
|
14
24
|
|
|
15
25
|
def initialize(_data_set, options = {})
|
|
16
26
|
@data_set = _data_set
|
|
@@ -24,19 +34,16 @@ class RailsDataExplorer
|
|
|
24
34
|
y_candidates = @data_set.data_series.find_all { |ds|
|
|
25
35
|
(ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any?
|
|
26
36
|
}
|
|
27
|
-
|
|
28
37
|
x_ds = x_candidates.first
|
|
29
38
|
y_ds = (y_candidates - [x_ds]).first
|
|
30
39
|
return false if x_ds.nil? || y_ds.nil?
|
|
31
40
|
|
|
32
|
-
|
|
33
|
-
max = x_ds.max_val # get global max
|
|
34
|
-
|
|
41
|
+
# initialize values_hash
|
|
35
42
|
values_hash = y_ds.uniq_vals.inject({}) { |m,y_val|
|
|
36
43
|
m[y_val] = []
|
|
37
44
|
m
|
|
38
45
|
}
|
|
39
|
-
|
|
46
|
+
# populate values hash
|
|
40
47
|
y_ds.values.each_with_index { |y_val, idx|
|
|
41
48
|
next if (y_val.nil? || Float::NAN == y_val)
|
|
42
49
|
values_hash[y_val] << x_ds.values[idx]
|
|
@@ -49,16 +56,36 @@ class RailsDataExplorer
|
|
|
49
56
|
)
|
|
50
57
|
sorted_values = y_sorted_keys.map { |y_val| values_hash[y_val] }
|
|
51
58
|
|
|
59
|
+
# Compute min and max values based on interquartile range of each
|
|
60
|
+
# boxplot. Objective is to normalize boxplots so that the widest chart
|
|
61
|
+
# uses almost the entire space available.
|
|
62
|
+
# Iterate over all individual boxplots
|
|
63
|
+
global_min = Float::INFINITY
|
|
64
|
+
global_max = -Float::INFINITY
|
|
65
|
+
sorted_values.each { |x_vals|
|
|
66
|
+
ds = DataSeries.new('_', x_vals)
|
|
67
|
+
desc_stats = ds.descriptive_statistics
|
|
68
|
+
# compute first and third quartile. Use min and max if they are nil
|
|
69
|
+
# for very small data series with only one or two entries.
|
|
70
|
+
q1 = desc_stats.detect { |e| '25%ile' == e[:label] }[:value] || x_vals.min
|
|
71
|
+
q3 = desc_stats.detect { |e| '75%ile' == e[:label] }[:value] || x_vals.max
|
|
72
|
+
iqr = (q3 - q1) * 1.5
|
|
73
|
+
local_min = [x_vals.min, q1 - iqr].max
|
|
74
|
+
global_min = [global_min, local_min].min
|
|
75
|
+
local_max = [x_vals.max, q3 + iqr].min
|
|
76
|
+
global_max = [global_max, local_max].max
|
|
77
|
+
}
|
|
78
|
+
|
|
52
79
|
{
|
|
53
80
|
values: sorted_values,
|
|
54
81
|
category_labels: y_sorted_keys,
|
|
55
|
-
min:
|
|
56
|
-
max:
|
|
57
|
-
base_width:
|
|
58
|
-
base_height:
|
|
82
|
+
min: global_min,
|
|
83
|
+
max: global_max,
|
|
84
|
+
base_width: 100,
|
|
85
|
+
base_height: 960,
|
|
59
86
|
axis_tick_format: x_ds.axis_tick_format,
|
|
60
87
|
num_box_plots: y_ds.uniq_vals_count,
|
|
61
|
-
axis_scale: DataSeries.new('_', [
|
|
88
|
+
axis_scale: DataSeries.new('_', [global_min, global_max]).axis_scale(:d3)
|
|
62
89
|
}
|
|
63
90
|
end
|
|
64
91
|
|
|
@@ -85,7 +112,7 @@ class RailsDataExplorer
|
|
|
85
112
|
(function() {
|
|
86
113
|
var base_width = #{ ca[:base_width] },
|
|
87
114
|
base_height = #{ ca[:base_height] },
|
|
88
|
-
margin = { top: 10, right:
|
|
115
|
+
margin = { top: 10, right: 40, bottom: 10, left: 40 },
|
|
89
116
|
width = base_width - margin.left - margin.right,
|
|
90
117
|
height = base_height - margin.top - margin.bottom;
|
|
91
118
|
|
|
@@ -106,7 +133,7 @@ class RailsDataExplorer
|
|
|
106
133
|
var svg = d3.select("##{ dom_id }").selectAll("svg")
|
|
107
134
|
.data(data)
|
|
108
135
|
.append("g")
|
|
109
|
-
.attr("transform", "rotate(90) translate(" +
|
|
136
|
+
.attr("transform", "rotate(90) translate(" + margin.left + " -" + (height + margin.bottom) + ")")
|
|
110
137
|
.call(chart);
|
|
111
138
|
|
|
112
139
|
// Function to compute the interquartile range.
|
|
@@ -1,14 +1,25 @@
|
|
|
1
|
-
#
|
|
2
|
-
# https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
|
|
3
|
-
#
|
|
4
|
-
# Resources for Chi Squared Test
|
|
5
|
-
# * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
|
|
6
|
-
# * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
|
|
7
|
-
# * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
8
2
|
|
|
9
|
-
# Contingency table and chi squared test is a good tool for interpreting A/B tests.
|
|
10
3
|
class RailsDataExplorer
|
|
11
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Contingency table and chi squared test are great tools for interpreting
|
|
7
|
+
# A/B tests.
|
|
8
|
+
#
|
|
9
|
+
# Responsibilities:
|
|
10
|
+
# * Render a contingency table for bivariate analysis of two categorical
|
|
11
|
+
# data series.
|
|
12
|
+
#
|
|
13
|
+
# Collaborators:
|
|
14
|
+
# * DataSet
|
|
15
|
+
#
|
|
16
|
+
# See this project for code to compute chi_square and contingency_coefficient
|
|
17
|
+
# https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
|
|
18
|
+
#
|
|
19
|
+
# Resources for Chi Squared Test
|
|
20
|
+
# * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
|
|
21
|
+
# * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
|
|
22
|
+
# * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
|
|
12
23
|
class ContingencyTable < Chart
|
|
13
24
|
|
|
14
25
|
def initialize(_data_set, options = {})
|
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
class RailsDataExplorer
|
|
2
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a table with descriptive statistics for a data series of any type.
|
|
8
|
+
#
|
|
9
|
+
# Collaborators:
|
|
10
|
+
# * DataSet
|
|
11
|
+
#
|
|
3
12
|
class DescriptiveStatisticsTable < Chart
|
|
4
13
|
|
|
5
14
|
def initialize(_data_set, options = {})
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
class RailsDataExplorer
|
|
2
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a histogram for univariate analysis of a categorical data series.
|
|
8
|
+
#
|
|
9
|
+
# Collaborators:
|
|
10
|
+
# * DataSet
|
|
11
|
+
#
|
|
3
12
|
class HistogramCategorical < Chart
|
|
4
13
|
|
|
5
14
|
def initialize(_data_set, options = {})
|
|
@@ -12,7 +21,8 @@ class RailsDataExplorer
|
|
|
12
21
|
return false if x_ds.nil?
|
|
13
22
|
|
|
14
23
|
# compute histogram
|
|
15
|
-
|
|
24
|
+
val_mod = { name: :limit_distinct_values }
|
|
25
|
+
h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
|
16
26
|
histogram_values_ds = DataSeries.new('_', h.values)
|
|
17
27
|
y_scale_type = histogram_values_ds.axis_scale(:vega)
|
|
18
28
|
bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
|
|
@@ -50,7 +60,7 @@ class RailsDataExplorer
|
|
|
50
60
|
<script type="text/javascript">
|
|
51
61
|
(function() {
|
|
52
62
|
var spec = {
|
|
53
|
-
"width":
|
|
63
|
+
"width": 960,
|
|
54
64
|
"height": 200,
|
|
55
65
|
"padding": {"top": 10, "left": 70, "bottom": 50, "right": 10},
|
|
56
66
|
"data": [
|
|
@@ -169,12 +179,6 @@ class RailsDataExplorer
|
|
|
169
179
|
)
|
|
170
180
|
end
|
|
171
181
|
|
|
172
|
-
# Render HistogramCategorical only if there is a fairly small number of
|
|
173
|
-
# distinct values.
|
|
174
|
-
def render?
|
|
175
|
-
!@data_set.data_series.first.has_many_uniq_vals?
|
|
176
|
-
end
|
|
177
|
-
|
|
178
182
|
end
|
|
179
183
|
end
|
|
180
184
|
end
|
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
class RailsDataExplorer
|
|
2
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a histogram for univariate analysis of a quantitative data series.
|
|
8
|
+
#
|
|
9
|
+
# Collaborators:
|
|
10
|
+
# * DataSet
|
|
11
|
+
#
|
|
3
12
|
class HistogramQuantitative < Chart
|
|
4
13
|
|
|
5
14
|
def initialize(_data_set, options = {})
|
|
@@ -15,7 +24,7 @@ class RailsDataExplorer
|
|
|
15
24
|
quantizer = Utils::DataQuantizer.new(x_ds, max_number_of_bins: 100)
|
|
16
25
|
quantized_values = quantizer.values
|
|
17
26
|
number_of_bars = quantizer.number_of_bins
|
|
18
|
-
width =
|
|
27
|
+
width = 960
|
|
19
28
|
h = quantized_values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
|
20
29
|
histogram_values_ds = DataSeries.new('_', h.values)
|
|
21
30
|
y_scale_type = histogram_values_ds.axis_scale(:vega)
|
|
@@ -33,6 +42,7 @@ class RailsDataExplorer
|
|
|
33
42
|
y_scale_type: y_scale_type,
|
|
34
43
|
y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
|
|
35
44
|
bar_y2_val: bar_y2_val,
|
|
45
|
+
css_class: 'rde-histogram-quantitative',
|
|
36
46
|
}
|
|
37
47
|
end
|
|
38
48
|
|
|
@@ -45,7 +55,7 @@ class RailsDataExplorer
|
|
|
45
55
|
|
|
46
56
|
def render_vega(ca)
|
|
47
57
|
%(
|
|
48
|
-
<div class="rde-chart
|
|
58
|
+
<div class="rde-chart #{ ca[:css_class] }">
|
|
49
59
|
<h3 class="rde-chart-title">Histogram</h3>
|
|
50
60
|
<div id="#{ dom_id }"></div>
|
|
51
61
|
<script type="text/javascript">
|
|
@@ -1,6 +1,14 @@
|
|
|
1
|
-
#
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
2
3
|
class RailsDataExplorer
|
|
3
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a histogram for univariate analysis of a temporal data series.
|
|
8
|
+
#
|
|
9
|
+
# Collaborators:
|
|
10
|
+
# * DataSet
|
|
11
|
+
#
|
|
4
12
|
class HistogramTemporal < HistogramQuantitative
|
|
5
13
|
|
|
6
14
|
def compute_chart_attrs
|
|
@@ -17,7 +25,7 @@ class RailsDataExplorer
|
|
|
17
25
|
histogram_values_ds = DataSeries.new('_', h.values)
|
|
18
26
|
y_scale_type = histogram_values_ds.axis_scale(:vega)
|
|
19
27
|
bar_y2_val = 'log' == y_scale_type ? histogram_values_ds.min_val / 10.0 : 0
|
|
20
|
-
width =
|
|
28
|
+
width = 960
|
|
21
29
|
{
|
|
22
30
|
values: h.map { |k,v| { x: k, y: v } },
|
|
23
31
|
width: width,
|
|
@@ -31,6 +39,7 @@ class RailsDataExplorer
|
|
|
31
39
|
y_scale_type: y_scale_type,
|
|
32
40
|
y_scale_domain: [bar_y2_val, histogram_values_ds.max_val],
|
|
33
41
|
bar_y2_val: bar_y2_val,
|
|
42
|
+
css_class: 'rde-histogram-temporal',
|
|
34
43
|
}
|
|
35
44
|
end
|
|
36
45
|
|
|
@@ -1,6 +1,16 @@
|
|
|
1
|
-
#
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
2
3
|
class RailsDataExplorer
|
|
3
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a parallel coordinates chart for multivariate analysis of
|
|
8
|
+
# a mix of quantitative, temporal, and categorical data series.
|
|
9
|
+
#
|
|
10
|
+
# Collaborators:
|
|
11
|
+
# * DataSet
|
|
12
|
+
#
|
|
13
|
+
# TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
|
|
4
14
|
class ParallelCoordinates < Chart
|
|
5
15
|
|
|
6
16
|
def initialize(_data_set, options = {})
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
3
|
class RailsDataExplorer
|
|
4
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a parallel set chart for multivariate analysis of categorical
|
|
8
|
+
# data series.
|
|
9
|
+
#
|
|
10
|
+
# Collaborators:
|
|
11
|
+
# * DataSet
|
|
12
|
+
#
|
|
13
|
+
# http://www.jasondavies.com/parallel-sets/
|
|
5
14
|
class ParallelSet < Chart
|
|
6
15
|
|
|
7
16
|
def initialize(_data_set, options = {})
|
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
class RailsDataExplorer
|
|
2
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a pie chart for univariate analysis of a categorical data series.
|
|
8
|
+
#
|
|
9
|
+
# Collaborators:
|
|
10
|
+
# * DataSet
|
|
11
|
+
#
|
|
3
12
|
class PieChart < Chart
|
|
4
13
|
|
|
5
14
|
def initialize(_data_set, options = {})
|
|
@@ -11,9 +20,10 @@ class RailsDataExplorer
|
|
|
11
20
|
x_ds = @data_set.data_series.first
|
|
12
21
|
return false if x_ds.nil?
|
|
13
22
|
|
|
14
|
-
|
|
23
|
+
val_mod = { name: :limit_distinct_values }
|
|
24
|
+
total_count = x_ds.values(val_mod).length
|
|
15
25
|
# compute histogram
|
|
16
|
-
h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
|
26
|
+
h = x_ds.values(val_mod).inject(Hash.new(0)) { |m,e| m[e] += 1; m }
|
|
17
27
|
{
|
|
18
28
|
values: h.map { |k,v|
|
|
19
29
|
{ key: k, value: (v / total_count.to_f) }
|
|
@@ -137,12 +147,6 @@ class RailsDataExplorer
|
|
|
137
147
|
)
|
|
138
148
|
end
|
|
139
149
|
|
|
140
|
-
# Render PieChart only if there is a fairly small number of
|
|
141
|
-
# distinct values.
|
|
142
|
-
def render?
|
|
143
|
-
!@data_set.data_series.first.has_many_uniq_vals?
|
|
144
|
-
end
|
|
145
|
-
|
|
146
150
|
end
|
|
147
151
|
end
|
|
148
152
|
end
|
|
@@ -1,5 +1,17 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
class RailsDataExplorer
|
|
2
4
|
class Chart
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a scatter plot for either
|
|
8
|
+
# * bivariate analysis of two quantitative data series or
|
|
9
|
+
# * multivariate analysis of two quantitative and one categorical
|
|
10
|
+
# data series.
|
|
11
|
+
#
|
|
12
|
+
# Collaborators:
|
|
13
|
+
# * DataSet
|
|
14
|
+
#
|
|
3
15
|
class Scatterplot < Chart
|
|
4
16
|
|
|
5
17
|
def initialize(_data_set, options = {})
|
|
@@ -81,7 +93,7 @@ class RailsDataExplorer
|
|
|
81
93
|
<script type="text/javascript">
|
|
82
94
|
(function() {
|
|
83
95
|
var spec = {
|
|
84
|
-
"width":
|
|
96
|
+
"width": 960,
|
|
85
97
|
"height": 200,
|
|
86
98
|
"data": [
|
|
87
99
|
{
|
|
@@ -1,6 +1,16 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
1
3
|
class RailsDataExplorer
|
|
2
4
|
class Chart
|
|
3
|
-
|
|
5
|
+
|
|
6
|
+
# Responsibilities:
|
|
7
|
+
# * Render a stacked bar chart for bivariate analysis of two categorical
|
|
8
|
+
# data series. Renders absolute frequencies of y-data series.
|
|
9
|
+
#
|
|
10
|
+
# Collaborators:
|
|
11
|
+
# * DataSet
|
|
12
|
+
#
|
|
13
|
+
class StackedBarChartCategorical < Chart
|
|
4
14
|
|
|
5
15
|
def initialize(_data_set, options = {})
|
|
6
16
|
@data_set = _data_set
|
|
@@ -8,9 +18,11 @@ class RailsDataExplorer
|
|
|
8
18
|
end
|
|
9
19
|
|
|
10
20
|
def compute_chart_attrs
|
|
21
|
+
val_mod = { name: :limit_distinct_values }
|
|
22
|
+
|
|
11
23
|
x_candidates = @data_set.data_series.find_all { |ds|
|
|
12
24
|
(ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:x, :any]).any?
|
|
13
|
-
}.sort { |a,b| b.
|
|
25
|
+
}.sort { |a,b| b.uniq_vals_count(val_mod) <=> a.uniq_vals_count(val_mod) }
|
|
14
26
|
y_candidates = @data_set.data_series.find_all { |ds|
|
|
15
27
|
(ds.chart_roles[Chart::StackedBarChartCategoricalPercent] & [:y, :any]).any?
|
|
16
28
|
}
|
|
@@ -21,31 +33,30 @@ class RailsDataExplorer
|
|
|
21
33
|
|
|
22
34
|
# initialize data_matrix
|
|
23
35
|
data_matrix = { _sum: { _sum: 0 } }
|
|
24
|
-
x_ds.uniq_vals.each { |x_val|
|
|
36
|
+
x_ds.uniq_vals(val_mod).each { |x_val|
|
|
25
37
|
data_matrix[x_val] = {}
|
|
26
38
|
data_matrix[x_val][:_sum] = 0
|
|
27
|
-
y_ds.uniq_vals.each { |y_val|
|
|
39
|
+
y_ds.uniq_vals(val_mod).each { |y_val|
|
|
28
40
|
data_matrix[x_val][y_val] = 0
|
|
29
41
|
data_matrix[:_sum][y_val] = 0
|
|
30
42
|
}
|
|
31
43
|
}
|
|
32
44
|
# populate data_matrix
|
|
33
|
-
x_ds.values.length.times { |idx|
|
|
34
|
-
x_val = x_ds.values[idx]
|
|
35
|
-
y_val = y_ds.values[idx]
|
|
45
|
+
x_ds.values(val_mod).length.times { |idx|
|
|
46
|
+
x_val = x_ds.values(val_mod)[idx]
|
|
47
|
+
y_val = y_ds.values(val_mod)[idx]
|
|
36
48
|
data_matrix[x_val][y_val] += 1
|
|
37
49
|
data_matrix[:_sum][y_val] += 1
|
|
38
50
|
data_matrix[x_val][:_sum] += 1
|
|
39
51
|
data_matrix[:_sum][:_sum] += 1
|
|
40
52
|
}
|
|
41
|
-
|
|
42
|
-
x_sorted_keys = x_ds.uniq_vals.sort(
|
|
53
|
+
x_sorted_keys = x_ds.uniq_vals(val_mod).sort(
|
|
43
54
|
&x_ds.label_sorter(
|
|
44
55
|
nil,
|
|
45
56
|
lambda { |a,b| data_matrix[b][:_sum] <=> data_matrix[a][:_sum] }
|
|
46
57
|
)
|
|
47
58
|
)
|
|
48
|
-
y_sorted_keys = y_ds.uniq_vals.sort(
|
|
59
|
+
y_sorted_keys = y_ds.uniq_vals(val_mod).sort(
|
|
49
60
|
&y_ds.label_sorter(
|
|
50
61
|
nil,
|
|
51
62
|
lambda { |a,b| data_matrix[:_sum][b] <=> data_matrix[:_sum][a] }
|
|
@@ -58,7 +69,7 @@ class RailsDataExplorer
|
|
|
58
69
|
x_sorted_keys.map { |x_val|
|
|
59
70
|
{
|
|
60
71
|
x: x_val,
|
|
61
|
-
y: (data_matrix
|
|
72
|
+
y: compute_y_value(data_matrix, x_val, y_val),
|
|
62
73
|
c: y_val
|
|
63
74
|
}
|
|
64
75
|
}
|
|
@@ -70,11 +81,22 @@ class RailsDataExplorer
|
|
|
70
81
|
values: values,
|
|
71
82
|
x_axis_label: x_ds.name,
|
|
72
83
|
x_axis_tick_format: 'function(d) { return d }',
|
|
73
|
-
y_axis_label:
|
|
84
|
+
y_axis_label: compute_y_axis_label(y_ds.name),
|
|
74
85
|
y_axis_tick_format: "d3.format('.1%')",
|
|
75
86
|
}
|
|
76
87
|
end
|
|
77
88
|
|
|
89
|
+
# Override this method to change how the y value is computed. E.g., to
|
|
90
|
+
# change from absolute values to percentages.
|
|
91
|
+
def compute_y_value(data_matrix, x_val, y_val)
|
|
92
|
+
data_matrix[x_val][y_val]
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# @param y_ds_name [String] name of the y data series
|
|
96
|
+
def compute_y_axis_label(y_ds_name)
|
|
97
|
+
"Frequency"
|
|
98
|
+
end
|
|
99
|
+
|
|
78
100
|
def render
|
|
79
101
|
return '' unless render?
|
|
80
102
|
ca = compute_chart_attrs
|
|
@@ -90,9 +112,9 @@ class RailsDataExplorer
|
|
|
90
112
|
<script type="text/javascript">
|
|
91
113
|
(function() {
|
|
92
114
|
var spec = {
|
|
93
|
-
"width":
|
|
115
|
+
"width": 960,
|
|
94
116
|
"height": 200,
|
|
95
|
-
"padding": {"top": 10, "left":
|
|
117
|
+
"padding": {"top": 10, "left": 70, "bottom": 50, "right": 100},
|
|
96
118
|
"data": [
|
|
97
119
|
{
|
|
98
120
|
"name": "table",
|
|
@@ -139,6 +161,7 @@ class RailsDataExplorer
|
|
|
139
161
|
"scale": "y",
|
|
140
162
|
"title": "#{ ca[:y_axis_label] }",
|
|
141
163
|
"format": #{ ca[:y_axis_tick_format] },
|
|
164
|
+
"titleOffset": 60,
|
|
142
165
|
}
|
|
143
166
|
],
|
|
144
167
|
"marks": [
|