rails-data-explorer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/.gitignore +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +52 -0
  6. data/Rakefile +18 -0
  7. data/lib/rails-data-explorer.rb +44 -0
  8. data/lib/rails-data-explorer/action_view_extension.rb +12 -0
  9. data/lib/rails-data-explorer/active_record_extension.rb +14 -0
  10. data/lib/rails-data-explorer/chart.rb +52 -0
  11. data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
  12. data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
  13. data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
  14. data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
  15. data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
  16. data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
  17. data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
  18. data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
  19. data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
  20. data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
  21. data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
  22. data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
  23. data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
  24. data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
  25. data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
  26. data/lib/rails-data-explorer/data_series.rb +115 -0
  27. data/lib/rails-data-explorer/data_set.rb +127 -0
  28. data/lib/rails-data-explorer/data_type.rb +34 -0
  29. data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
  30. data/lib/rails-data-explorer/data_type/geo.rb +1 -0
  31. data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
  32. data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
  33. data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
  34. data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
  35. data/lib/rails-data-explorer/engine.rb +24 -0
  36. data/lib/rails-data-explorer/exploration.rb +89 -0
  37. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
  38. data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
  39. data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
  40. data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
  41. data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
  42. data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
  43. data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
  44. data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
  45. data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
  46. data/rails-data-explorer.gemspec +30 -0
  47. data/vendor/assets/javascripts/d3.boxplot.js +302 -0
  48. data/vendor/assets/javascripts/d3.parcoords.js +585 -0
  49. data/vendor/assets/javascripts/d3.parsets.js +663 -0
  50. data/vendor/assets/javascripts/d3.v3.js +9294 -0
  51. data/vendor/assets/javascripts/nv.d3.js +14369 -0
  52. data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
  53. data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
  54. data/vendor/assets/stylesheets/bootstrap.css +1727 -0
  55. data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
  56. data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
  57. data/vendor/assets/stylesheets/d3.parsets.css +34 -0
  58. data/vendor/assets/stylesheets/nv.d3.css +769 -0
  59. data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
  60. data/vendor/assets/stylesheets/rde-default-style.css +42 -0
  61. metadata +250 -0
@@ -0,0 +1,22 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class DescriptiveStatisticsTable < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def render
11
+ return '' unless render?
12
+ content_tag(:div, :id => dom_id, :class => 'rde-chart rde-descriptive-statistics-table') do
13
+ @data_set.data_series.map { |data_series|
14
+ content_tag(:h3, "Descriptive Statistics", :class => 'rde-chart-title') +
15
+ render_html_table(data_series.descriptive_statistics_table)
16
+ }.join.html_safe
17
+ end
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,73 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class HistogramCategorical < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_ds = @data_set.data_series.first
12
+ # compute histogram
13
+ h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
14
+ {
15
+ values: h.map { |k,v| { x: k, y: v } }.sort { |a,b| b[:y] <=> a[:y] },
16
+ x_axis_label: x_ds.name,
17
+ x_axis_tick_format: "",
18
+ y_axis_label: 'Frequency',
19
+ y_axis_tick_format: "d3.format('r')",
20
+ }
21
+ end
22
+
23
+ def render
24
+ return '' unless render?
25
+ ca = compute_chart_attrs
26
+ %(
27
+ <div class="rde-chart rde-histogram">
28
+ <h3 class="rde-chart-title">Histogram</h3>
29
+ <div id="#{ dom_id }", style="height: 200px;">
30
+ <svg></svg>
31
+ </div>
32
+ <script type="text/javascript">
33
+ (function() {
34
+ var data = [
35
+ {
36
+ values: #{ ca[:values].to_json },
37
+ key: '#{ ca[:x_axis_label] }'
38
+ }
39
+ ];
40
+
41
+ nv.addGraph(function() {
42
+ var chart = nv.models.discreteBarChart()
43
+ ;
44
+
45
+ chart.xAxis
46
+ .axisLabel('#{ ca[:x_axis_label] }')
47
+ .tickFormat(#{ ca[:x_axis_tick_format] })
48
+ ;
49
+
50
+ chart.yAxis
51
+ .axisLabel('#{ ca[:y_axis_label] }')
52
+ .tickFormat(#{ ca[:y_axis_tick_format] })
53
+ ;
54
+
55
+ d3.select('##{ dom_id } svg')
56
+ .datum(data)
57
+ .transition().duration(100)
58
+ .call(chart)
59
+ ;
60
+
61
+ nv.utils.windowResize(chart.update);
62
+
63
+ return chart;
64
+ });
65
+ })();
66
+ </script>
67
+ </div>
68
+ )
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,73 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class HistogramQuantitative < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_ds = @data_set.data_series.first
12
+ # compute histogram
13
+ h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
14
+ {
15
+ values: h.map { |k,v| { x: k, y: v } },
16
+ x_axis_label: x_ds.name,
17
+ x_axis_tick_format: x_ds.axis_tick_format,
18
+ y_axis_label: 'Frequency',
19
+ y_axis_tick_format: "d3.format('r')",
20
+ }
21
+ end
22
+
23
+ def render
24
+ return '' unless render?
25
+ ca = compute_chart_attrs
26
+ %(
27
+ <div class="rde-chart rde-histogram">
28
+ <h3 class="rde-chart-title">Histogram</h3>
29
+ <div id="#{ dom_id }", style="height: 200px;">
30
+ <svg></svg>
31
+ </div>
32
+ <script type="text/javascript">
33
+ (function() {
34
+ var data = [
35
+ {
36
+ values: #{ ca[:values].to_json },
37
+ key: '#{ ca[:x_axis_label] }'
38
+ }
39
+ ];
40
+
41
+ nv.addGraph(function() {
42
+ var chart = nv.models.historicalBarChart()
43
+ ;
44
+
45
+ chart.xAxis
46
+ .axisLabel('#{ ca[:x_axis_label] }')
47
+ .tickFormat(#{ ca[:x_axis_tick_format] })
48
+ ;
49
+
50
+ chart.yAxis
51
+ .axisLabel('#{ ca[:y_axis_label] }')
52
+ .tickFormat(#{ ca[:y_axis_tick_format] })
53
+ ;
54
+
55
+ d3.select('##{ dom_id } svg')
56
+ .datum(data)
57
+ .transition().duration(100)
58
+ .call(chart)
59
+ ;
60
+
61
+ nv.utils.windowResize(chart.update);
62
+
63
+ return chart;
64
+ });
65
+ })();
66
+ </script>
67
+ </div>
68
+ )
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,78 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class HistogramTemporal < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_ds = @data_set.data_series.first
12
+ # compute histogram
13
+ h = x_ds.values.inject(Hash.new(0)) { |m,e|
14
+ # Round to day
15
+ key = (e.beginning_of_day).to_i * 1000
16
+ m[key] += 1
17
+ m
18
+ }
19
+ {
20
+ values: h.map { |k,v| { x: k, y: v } },
21
+ x_axis_label: x_ds.name,
22
+ x_axis_tick_format: x_ds.axis_tick_format,
23
+ y_axis_label: 'Frequency',
24
+ y_axis_tick_format: "d3.format('r')",
25
+ }
26
+ end
27
+
28
+ def render
29
+ return '' unless render?
30
+ ca = compute_chart_attrs
31
+ %(
32
+ <div class="rde-chart rde-histogram">
33
+ <h3 class="rde-chart-title">Histogram</h3>
34
+ <div id="#{ dom_id }", style="height: 200px;">
35
+ <svg></svg>
36
+ </div>
37
+ <script type="text/javascript">
38
+ (function() {
39
+ var data = [
40
+ {
41
+ values: #{ ca[:values].to_json },
42
+ key: '#{ ca[:x_axis_label] }'
43
+ }
44
+ ];
45
+
46
+ nv.addGraph(function() {
47
+ var chart = nv.models.historicalBarChart()
48
+ ;
49
+
50
+ chart.xAxis
51
+ .axisLabel('#{ ca[:x_axis_label] }')
52
+ .tickFormat(#{ ca[:x_axis_tick_format] })
53
+ ;
54
+
55
+ chart.yAxis
56
+ .axisLabel('#{ ca[:y_axis_label] }')
57
+ .tickFormat(#{ ca[:y_axis_tick_format] })
58
+ ;
59
+
60
+ d3.select('##{ dom_id } svg')
61
+ .datum(data)
62
+ .transition().duration(100)
63
+ .call(chart)
64
+ ;
65
+
66
+ nv.utils.windowResize(chart.update);
67
+
68
+ return chart;
69
+ });
70
+ })();
71
+ </script>
72
+ </div>
73
+ )
74
+ end
75
+
76
+ end
77
+ end
78
+ end
@@ -0,0 +1 @@
1
+ # http://dc-js.github.io/dc.js/
@@ -0,0 +1,89 @@
1
+ # TODO: add :color chart_role (test first if it makes sense, e.g., for 'pay')
2
+ class RailsDataExplorer
3
+ class Chart
4
+ class ParallelCoordinates < Chart
5
+
6
+ def initialize(_data_set, options = {})
7
+ @data_set = _data_set
8
+ @options = {}.merge(options)
9
+ end
10
+
11
+ def render
12
+ return '' unless render?
13
+ ca = compute_chart_attrs
14
+ %(
15
+ <div class="rde-chart rde-parallel-coordinates">
16
+ <h3 class="rde-chart-title">Parallel coordinates</h3>
17
+ <div id="#{ dom_id }" class="rde-chart-parallel-coordinates parcoords" style="height: 400px; width: 100%"></div>
18
+ <script type="text/javascript">
19
+ (function() {
20
+ var parcoords = d3.parcoords()("##{ dom_id }")
21
+ .dimensions(#{ ca[:dimensions ].to_json })
22
+ .types(#{ ca[:types].to_json })
23
+ .alpha(#{ ca[:alpha] })
24
+ ;
25
+
26
+ parcoords.data(#{ ca[:values].to_json })
27
+ .render()
28
+ .createAxes() // has to come before other methods that rely on axes (e.g., brushable)
29
+ // .shadows() // they don't redraw after reordering, so I'm turning them off for now.
30
+ .reorderable()
31
+ .brushable()
32
+ ;
33
+
34
+ })();
35
+ </script>
36
+ </div>
37
+ )
38
+ end
39
+
40
+ # Render ParallelCoordinates only when there is at least one data series
41
+ # with DataType Quantitative. If it's all Categorical, then ParallelSet
42
+ # is much better suited.
43
+ def render?
44
+ @data_set.data_series.any? { |ds|
45
+ ds.data_type.is_a?(RailsDataExplorer::DataType::Quantitative)
46
+ }
47
+ end
48
+
49
+ def compute_chart_attrs
50
+ dimension_data_series = @data_set.data_series.find_all { |ds|
51
+ (ds.chart_roles[Chart::ParallelCoordinates] & [:dimension, :any]).any?
52
+ }
53
+ dimension_names = dimension_data_series.map(&:name)
54
+ number_of_values = dimension_data_series.first.values.length
55
+ dimension_values = number_of_values.times.map do |idx|
56
+ dimension_data_series.inject({}) { |m,ds|
57
+ m[ds.name] = if ds.data_type.is_a?(RailsDataExplorer::DataType::Quantitative::Temporal)
58
+ ds.values[idx].to_i * 1000
59
+ else
60
+ ds.values[idx]
61
+ end
62
+ m
63
+ }
64
+ end
65
+ dimension_types = dimension_data_series.inject({}) { |m,ds|
66
+ m[ds.name] = case ds.data_type
67
+ when RailsDataExplorer::DataType::Categorical
68
+ 'string'
69
+ when RailsDataExplorer::DataType::Quantitative::Temporal
70
+ 'date'
71
+ when RailsDataExplorer::DataType::Quantitative::Integer,
72
+ RailsDataExplorer::DataType::Quantitative::Decimal
73
+ 'number'
74
+ else
75
+ raise "Unhandled data_type: #{ ds.data_type.inspect }"
76
+ end
77
+ m
78
+ }
79
+ {
80
+ :dimensions => dimension_names,
81
+ :values => dimension_values,
82
+ :types => dimension_types,
83
+ :alpha => 1 / ([Math.log([number_of_values, 2].max), 10].min) # from 1.0 to 0.1
84
+ }
85
+ end
86
+
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,65 @@
1
+ # http://www.jasondavies.com/parallel-sets/
2
+ # Suitable when all data series are categorical
3
+ class RailsDataExplorer
4
+ class Chart
5
+ class ParallelSet < Chart
6
+
7
+ def initialize(_data_set, options = {})
8
+ @data_set = _data_set
9
+ @options = {}.merge(options)
10
+ end
11
+
12
+ def compute_chart_attrs
13
+ dimension_data_series = @data_set.data_series.find_all { |ds|
14
+ (ds.chart_roles[Chart::ParallelCoordinates] & [:dimension, :any]).any?
15
+ }
16
+ number_of_values = dimension_data_series.first.values.length
17
+ dimension_names = dimension_data_series.map(&:name)
18
+ dimension_values = number_of_values.times.map do |idx|
19
+ dimension_data_series.inject({}) { |m,ds|
20
+ m[ds.name] = if ds.data_type.is_a?(RailsDataExplorer::DataType::Quantitative::Temporal)
21
+ ds.values[idx].to_i * 1000
22
+ else
23
+ ds.values[idx]
24
+ end
25
+ m
26
+ }
27
+ end
28
+ {
29
+ :dimensions => dimension_names,
30
+ :values => dimension_values
31
+ }
32
+ end
33
+
34
+ def render
35
+ return '' unless render?
36
+ ca = compute_chart_attrs
37
+ %(
38
+ <div class="rde-chart rde-parallel-set">
39
+ <h3 class="rde-chart-title">Parallel Set</h3>
40
+ <div id="#{ dom_id }" class="rde-chart-parallel-set" style="height: 600px; width: 100%"></div>
41
+ <script type="text/javascript">
42
+ (function() {
43
+ var parset = d3.parsets()
44
+ .dimensions(#{ ca[:dimensions ].to_json })
45
+ ;
46
+
47
+ var vis = d3.select("##{ dom_id }")
48
+ .append("svg")
49
+ .attr("width", parset.width())
50
+ .attr("height", parset.height())
51
+ ;
52
+
53
+ vis.datum(#{ ca[:values].to_json })
54
+ .call(parset)
55
+ ;
56
+
57
+ })();
58
+ </script>
59
+ </div>
60
+ )
61
+ end
62
+
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,67 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class PieChart < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_ds = @data_set.data_series.first
12
+ total_count = x_ds.values.length
13
+ # compute histogram
14
+ h = x_ds.values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
15
+ {
16
+ values: h.map { |k,v|
17
+ { x: k, y: (v / total_count.to_f) }
18
+ }.sort { |a,b|
19
+ b[:y] <=> a[:y]
20
+ },
21
+ x_axis_label: x_ds.name,
22
+ x_axis_tick_format: "",
23
+ y_axis_label: 'Frequency',
24
+ y_axis_tick_format: "d3.format('r')",
25
+ }
26
+ end
27
+
28
+ def render
29
+ return '' unless render?
30
+ ca = compute_chart_attrs
31
+ %(
32
+ <div class="rde-chart rde-pie-chart">
33
+ <h3 class="rde-chart-title">Pie Chart</h3>
34
+ <div id="#{ dom_id }", style="height: 400px; width: 400px;">
35
+ <svg></svg>
36
+ </div>
37
+ <script type="text/javascript">
38
+ (function() {
39
+ var data = #{ ca[:values].to_json };
40
+
41
+ nv.addGraph(function() {
42
+ var chart = nv.models.pieChart()
43
+ ;
44
+
45
+ chart.valueFormat(d3.format('.1%'))
46
+ .donut(true)
47
+ ;
48
+
49
+ d3.select('##{ dom_id } svg')
50
+ .datum(data)
51
+ .transition().duration(100)
52
+ .call(chart)
53
+ ;
54
+
55
+ nv.utils.windowResize(chart.update);
56
+
57
+ return chart;
58
+ });
59
+ })();
60
+ </script>
61
+ </div>
62
+ )
63
+ end
64
+
65
+ end
66
+ end
67
+ end