rails-data-explorer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/.gitignore +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +52 -0
  6. data/Rakefile +18 -0
  7. data/lib/rails-data-explorer.rb +44 -0
  8. data/lib/rails-data-explorer/action_view_extension.rb +12 -0
  9. data/lib/rails-data-explorer/active_record_extension.rb +14 -0
  10. data/lib/rails-data-explorer/chart.rb +52 -0
  11. data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
  12. data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
  13. data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
  14. data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
  15. data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
  16. data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
  17. data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
  18. data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
  19. data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
  20. data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
  21. data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
  22. data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
  23. data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
  24. data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
  25. data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
  26. data/lib/rails-data-explorer/data_series.rb +115 -0
  27. data/lib/rails-data-explorer/data_set.rb +127 -0
  28. data/lib/rails-data-explorer/data_type.rb +34 -0
  29. data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
  30. data/lib/rails-data-explorer/data_type/geo.rb +1 -0
  31. data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
  32. data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
  33. data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
  34. data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
  35. data/lib/rails-data-explorer/engine.rb +24 -0
  36. data/lib/rails-data-explorer/exploration.rb +89 -0
  37. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
  38. data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
  39. data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
  40. data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
  41. data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
  42. data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
  43. data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
  44. data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
  45. data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
  46. data/rails-data-explorer.gemspec +30 -0
  47. data/vendor/assets/javascripts/d3.boxplot.js +302 -0
  48. data/vendor/assets/javascripts/d3.parcoords.js +585 -0
  49. data/vendor/assets/javascripts/d3.parsets.js +663 -0
  50. data/vendor/assets/javascripts/d3.v3.js +9294 -0
  51. data/vendor/assets/javascripts/nv.d3.js +14369 -0
  52. data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
  53. data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
  54. data/vendor/assets/stylesheets/bootstrap.css +1727 -0
  55. data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
  56. data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
  57. data/vendor/assets/stylesheets/d3.parsets.css +34 -0
  58. data/vendor/assets/stylesheets/nv.d3.css +769 -0
  59. data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
  60. data/vendor/assets/stylesheets/rde-default-style.css +42 -0
  61. metadata +250 -0
@@ -0,0 +1,120 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class Scatterplot < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_candidates = @data_set.data_series.find_all { |ds|
12
+ (ds.chart_roles[Chart::Scatterplot] & [:x, :any]).any?
13
+ }
14
+ y_candidates = @data_set.data_series.find_all { |ds|
15
+ (ds.chart_roles[Chart::Scatterplot] & [:y, :any]).any?
16
+ }
17
+ color_candidates = @data_set.data_series.find_all { |ds|
18
+ (ds.chart_roles[Chart::Scatterplot] & [:color, :any]).any?
19
+ }
20
+ size_candidates = @data_set.data_series.find_all { |ds|
21
+ (ds.chart_roles[Chart::Scatterplot] & [:size, :any]).any?
22
+ }
23
+
24
+ x_ds = x_candidates.first
25
+ y_ds = (y_candidates - [x_ds]).first
26
+ color_ds = (color_candidates - [x_ds, y_ds]).first
27
+ size_ds = (size_candidates - [x_ds, y_ds, color_ds]).first
28
+
29
+ ca = case @data_set.dimensions_count
30
+ when 0,1
31
+ raise(ArgumentError.new("At least two data series required for scatterplot, only #{ @data_set.dimensions_count } given"))
32
+ when 2
33
+ key = ''
34
+ values_hash = x_ds.values.length.times.map { |idx|
35
+ r = { x: x_ds.values[idx], y: y_ds.values[idx] }
36
+ r[:color] = color_ds.values[idx] if color_ds
37
+ r
38
+ }
39
+ {
40
+ values: [ { key: key, values: values_hash } ],
41
+ x_axis_label: x_ds.name,
42
+ x_axis_tick_format: x_ds.axis_tick_format,
43
+ y_axis_label: y_ds.name,
44
+ y_axis_tick_format: y_ds.axis_tick_format,
45
+ }
46
+ when 3
47
+ visual_attr_ds = color_ds || size_ds
48
+ raise "No visual_attr_ds given" if visual_attr_ds.nil?
49
+ data_series_hash = visual_attr_ds.values.uniq.inject({}) { |m,visual_attr|
50
+ m[visual_attr] = []
51
+ m
52
+ }
53
+ x_ds.values.length.times.each { |idx|
54
+ data_series_hash[visual_attr_ds.values[idx]] << { x: x_ds.values[idx], y: y_ds.values[idx] }
55
+ }
56
+ {
57
+ values: data_series_hash.map { |k,v| { key: k, values: v } },
58
+ x_axis_label: x_ds.name,
59
+ x_axis_tick_format: x_ds.axis_tick_format,
60
+ y_axis_label: y_ds.name,
61
+ y_axis_tick_format: y_ds.axis_tick_format,
62
+ }
63
+ else
64
+ end
65
+ ca
66
+ end
67
+
68
+ def render
69
+ return '' unless render?
70
+ chart_attrs = compute_chart_attrs
71
+ %(
72
+ <div class="rde-chart rde-scatterplot">
73
+ <h3 class="rde-chart-title">Scatterplot</h3>
74
+ <div id="#{ dom_id }", style="height: 400px;">
75
+ <svg></svg>
76
+ </div>
77
+ <script type="text/javascript">
78
+ (function() {
79
+ var data = #{ chart_attrs[:values].to_json };
80
+
81
+ nv.addGraph(function() {
82
+ var chart = nv.models.scatterChart()
83
+ .showDistX(true)
84
+ .showDistY(true)
85
+ .useVoronoi(true)
86
+ .color(d3.scale.category10().range())
87
+ .transitionDuration(300)
88
+ ;
89
+
90
+ chart.xAxis.tickFormat(#{ chart_attrs[:x_axis_tick_format] })
91
+ .axisLabel('#{ chart_attrs[:x_axis_label] }')
92
+ ;
93
+
94
+ chart.yAxis.tickFormat(#{ chart_attrs[:y_axis_tick_format] })
95
+ .axisLabel('#{ chart_attrs[:y_axis_label] }')
96
+ ;
97
+
98
+ chart.tooltipContent(function(key) {
99
+ return key;
100
+ });
101
+
102
+ d3.select('##{ dom_id } svg')
103
+ .datum(data)
104
+ .call(chart);
105
+
106
+ nv.utils.windowResize(chart.update);
107
+
108
+ chart.dispatch.on('stateChange', function(e) { ('New State:', JSON.stringify(e)); });
109
+
110
+ return chart;
111
+ });
112
+ })();
113
+ </script>
114
+ </div>
115
+ )
116
+ end
117
+
118
+ end
119
+ end
120
+ end
@@ -0,0 +1 @@
1
+ # http://benjiec.github.io/scatter-matrix/demo/demo.html#
@@ -0,0 +1,120 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class StackedBarChartCategoricalPercent < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_candidates = @data_set.data_series.find_all { |ds|
12
+ (ds.chart_roles[Chart::ContingencyTable] & [:x, :any]).any?
13
+ }.sort { |a,b| b.uniq_vals.length <=> a.uniq_vals.length }
14
+ y_candidates = @data_set.data_series.find_all { |ds|
15
+ (ds.chart_roles[Chart::ContingencyTable] & [:y, :any]).any?
16
+ }
17
+
18
+ x_ds = x_candidates.first
19
+ y_ds = (y_candidates - [x_ds]).first
20
+
21
+ # initialize data_matrix
22
+ data_matrix = { :_sum => { :_sum => 0 } }
23
+ x_ds.uniq_vals.each { |x_val|
24
+ data_matrix[x_val] = {}
25
+ data_matrix[x_val][:_sum] = 0
26
+ y_ds.uniq_vals.each { |y_val|
27
+ data_matrix[x_val][y_val] = 0
28
+ data_matrix[:_sum][y_val] = 0
29
+ }
30
+ }
31
+ # populate data_matrix
32
+ x_ds.values.length.times { |idx|
33
+ x_val = x_ds.values[idx]
34
+ y_val = y_ds.values[idx]
35
+ data_matrix[x_val][y_val] += 1
36
+ data_matrix[:_sum][y_val] += 1
37
+ data_matrix[x_val][:_sum] += 1
38
+ data_matrix[:_sum][:_sum] += 1
39
+ }
40
+
41
+ x_sorted_keys = x_ds.uniq_vals.sort { |a,b|
42
+ data_matrix[b][:_sum] <=> data_matrix[a][:_sum]
43
+ }
44
+ y_sorted_keys = y_ds.uniq_vals.sort { |a,b|
45
+ data_matrix[:_sum][b] <=> data_matrix[:_sum][a]
46
+ }
47
+
48
+ values = case @data_set.dimensions_count
49
+ when 2
50
+ y_sorted_keys.map { |y_val|
51
+ {
52
+ key: y_val,
53
+ values: x_sorted_keys.map { |x_val|
54
+ {
55
+ x: x_val,
56
+ y: (data_matrix[x_val][y_val] / data_matrix[x_val][:_sum].to_f) }
57
+ }
58
+ }
59
+ }
60
+ else
61
+ raise(ArgumentError.new("Exactly two data series required for contingency table."))
62
+ end
63
+ {
64
+ values: values,
65
+ x_axis_label: x_ds.name,
66
+ x_axis_tick_format: 'function(d) { return d }',
67
+ y_axis_label: "#{ y_ds.name } distribution [%]",
68
+ y_axis_tick_format: "d3.format('.1%')",
69
+ }
70
+ end
71
+
72
+ def render
73
+ return '' unless render?
74
+ ca = compute_chart_attrs
75
+ %(
76
+ <div class="rde-chart rde-bar-chart">
77
+ <h3 class="rde-chart-title">Stacked Bar Chart</h3>
78
+ <div id="#{ dom_id }", style="height: 200px;">
79
+ <svg></svg>
80
+ </div>
81
+ <script type="text/javascript">
82
+ (function() {
83
+ var data = #{ ca[:values].to_json };
84
+
85
+ nv.addGraph(function() {
86
+ var chart = nv.models.multiBarChart()
87
+ ;
88
+
89
+ chart.xAxis
90
+ .axisLabel('#{ ca[:x_axis_label] }')
91
+ .tickFormat(#{ ca[:x_axis_tick_format] })
92
+ ;
93
+
94
+ chart.yAxis
95
+ .axisLabel('#{ ca[:y_axis_label] }')
96
+ .tickFormat(#{ ca[:y_axis_tick_format] })
97
+ ;
98
+
99
+ chart.multibar.stacked(true);
100
+ chart.showControls(false);
101
+
102
+ d3.select('##{ dom_id } svg')
103
+ .datum(data)
104
+ .transition().duration(100)
105
+ .call(chart)
106
+ ;
107
+
108
+ nv.utils.windowResize(chart.update);
109
+
110
+ return chart;
111
+ });
112
+ })();
113
+ </script>
114
+ </div>
115
+ )
116
+ end
117
+
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,115 @@
1
+ class RailsDataExplorer
2
+ class DataSeries
3
+
4
+ # TODO: Add concept of significant figures for rounding values when displaying them
5
+ # http://en.wikipedia.org/wiki/Significant_figures
6
+
7
+ attr_reader :data_type, :name, :values, :chart_roles
8
+ delegate :available_chart_types, :to => :data_type, :prefix => false
9
+ delegate :available_chart_roles, :to => :data_type, :prefix => false
10
+
11
+ # options: :chart_roles, :data_type (all optional)
12
+ def initialize(_name, _values, options={})
13
+ options = { chart_roles: [], data_type: nil }.merge(options)
14
+ @name = _name
15
+ @values = _values
16
+ @data_type = init_data_type(options[:data_type])
17
+ @chart_roles = init_chart_roles(options[:chart_roles]) # after data_type!
18
+ end
19
+
20
+ # Returns descriptive_statistics as a flat Array
21
+ def descriptive_statistics
22
+ @data_type.descriptive_statistics(values)
23
+ end
24
+
25
+ # Returns descriptive_statistics as a renderable table structure
26
+ def descriptive_statistics_table
27
+ @data_type.descriptive_statistics_table(values)
28
+ end
29
+
30
+ def values_summary
31
+ if values.length < 3 || values.inspect.length < 80
32
+ values.inspect
33
+ else
34
+ "[#{ values.first } ... #{ values.last }]"
35
+ end
36
+ end
37
+
38
+ def inspect(indent=1, recursive=1000)
39
+ r = %(#<#{ self.class.to_s }\n)
40
+ r << [
41
+ "@name=#{ name.inspect }",
42
+ "@data_type=#{ data_type.inspect }",
43
+ "@chart_roles=#{ chart_roles.inspect }",
44
+ "@values=<count: #{ values.count }, items: #{ values_summary }>",
45
+ ].map { |e| "#{ ' ' * indent }#{ e }\n"}.join
46
+ if recursive > 0
47
+ # nothing to recurse
48
+ end
49
+ r << %(#{ ' ' * (indent-1) }>\n)
50
+ end
51
+
52
+ def axis_tick_format
53
+ data_type.axis_tick_format(values)
54
+ end
55
+
56
+ def uniq_vals
57
+ @uniq_vals = values.uniq
58
+ end
59
+
60
+ def uniq_vals_count
61
+ @uniq_vals_count = uniq_vals.length
62
+ end
63
+
64
+ def min_val
65
+ @min_val = values.compact.min
66
+ end
67
+
68
+ def max_val
69
+ @max_val = values.compact.max
70
+ end
71
+
72
+ private
73
+
74
+ # @param[Array<Symbol>] chart_role_overrides, :x, :y, :color
75
+ # @return[Hash] keys are chart_classes, and values are arrays with roles
76
+ def init_chart_roles(chart_role_overrides)
77
+ r = if chart_role_overrides.any?
78
+ available_chart_types.inject(Hash.new([])) { |m,chart_type|
79
+ subset = chart_type[:chart_roles] & chart_role_overrides
80
+ next m if subset.empty?
81
+ m[chart_type[:chart_class]] += subset
82
+ m[chart_type[:chart_class]].uniq!
83
+ m
84
+ }
85
+ else
86
+ available_chart_types.inject(Hash.new([])) { |m,chart_type|
87
+ m[chart_type[:chart_class]] += chart_type[:chart_roles]
88
+ m[chart_type[:chart_class]].uniq!
89
+ m
90
+ }
91
+ end
92
+ r.freeze
93
+ end
94
+
95
+ def init_data_type(data_type_override)
96
+ if data_type_override.nil?
97
+ case values.first
98
+ when Integer, Bignum, Fixnum
99
+ DataType::Quantitative::Integer.new
100
+ when Float
101
+ DataType::Quantitative::Decimal.new
102
+ when String
103
+ DataType::Categorical.new
104
+ when DateTime, ActiveSupport::TimeWithZone
105
+ DataType::Quantitative::Temporal.new
106
+ else
107
+ raise(ArgumentError.new("Can't infer data type for value: #{ values.first.class.inspect }"))
108
+ end
109
+ else
110
+ data_type_override
111
+ end
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,127 @@
1
+ # Container for data series
2
+ class RailsDataExplorer
3
+ class DataSet
4
+
5
+ attr_reader :data_series
6
+
7
+ # @param[Array<Numeric, String, Symbol, Nil, Hash, DataSeries>] values_or_data_series
8
+ # Array can contain the following:
9
+ # * Numeric, String, Symbol, Nil - for a single data series
10
+ # * Hash - for multiple data series with the following keys:
11
+ # * :name - name for the series as String
12
+ # * :values - scalar values as array
13
+ # * :chart_roles [Array<Symbol>, optional] - what to use this series for. possible values: :x, :y, :color
14
+ # * :data_type (optional) - :quantitative, :categorical, :temporal
15
+ # * DataSeries
16
+ # @param[String] exploration_title used as fall back for data series name
17
+ def initialize(values_or_data_series, exploration_title)
18
+ @data_series = initialize_data_series(values_or_data_series, exploration_title)
19
+ validate_data_series
20
+ end
21
+
22
+ def initialize_data_series(values_or_data_series, exploration_title)
23
+ case values_or_data_series.first
24
+ when ActiveSupport::TimeWithZone, DateTime, Numeric, NilClass, String, Symbol
25
+ # Array of scalar values, convert to single data series
26
+ [DataSeries.new(exploration_title, values_or_data_series)]
27
+ when Hash
28
+ # Array of Hashes, convert each key/val pair to a data series
29
+ values_or_data_series.map { |data_series_attrs|
30
+ DataSeries.new(
31
+ data_series_attrs.delete(:name),
32
+ data_series_attrs.delete(:values),
33
+ data_series_attrs # pass remaining attrs as options
34
+ )
35
+ }
36
+ when DataSeries
37
+ # return as is
38
+ values_or_data_series
39
+ else
40
+ raise(
41
+ ArgumentError.new(
42
+ "Invalid datum. Only Hash, Numeric, String, Symbol, and Nil are allowed. " + \
43
+ "Found #{ values_or_data_series.first.class.to_s }."
44
+ )
45
+ )
46
+ end
47
+ end
48
+
49
+ def validate_data_series
50
+ # all series have same size
51
+ unless 1 == @data_series.map { |e| e.values.length }.uniq.length
52
+ raise(ArgumentError.new("All data series must have same length."))
53
+ end
54
+ # presence of at least one data_series
55
+ if 0 == dimensions_count
56
+ raise(ArgumentError.new("Please provide at least 1 data series."))
57
+ end
58
+ # TODO: all elements in a series are of same type
59
+ end
60
+
61
+ def dimensions_count
62
+ @data_series.length
63
+ end
64
+
65
+ def available_chart_types
66
+ case dimensions_count
67
+ when 0
68
+ # invalid, handled in validate_data_series
69
+ when 1
70
+ # charts for a single data series, use that series' available_chart_types
71
+ @data_series.first.available_chart_types(dimensions_count: 1).map { |e| e[:chart_class] }
72
+ else
73
+ # TODO: define on each chart type which chart_roles are required.
74
+ # Then use only charts for which all roles are filled.
75
+ # charts for two data series
76
+ # find intersection of all available chart types
77
+ r = @data_series.inject(nil) { |m,ds|
78
+ constraints = { dimensions_count: dimensions_count, chart_roles: ds.chart_roles }
79
+ # initialize m with first data series
80
+ m = ds.available_chart_types(constraints).map { |e| e[:chart_class] } if m.nil?
81
+ # find intersection of all available_chart_types
82
+ m = ds.available_chart_types(constraints).map { |e| e[:chart_class] } & m
83
+ m
84
+ }
85
+ r
86
+ end
87
+ end
88
+
89
+ def descriptive_statistics
90
+ case dimensions_count
91
+ when 0
92
+ # invalid, handled in validate_data_series
93
+ when 1
94
+ # charts for a single data series, use that series' descriptive_statistics
95
+ @data_series.first.descriptive_statistics
96
+ when 2
97
+ # charts for two data series
98
+ else
99
+ # charts for multiple data series
100
+ end
101
+ end
102
+
103
+ def inspect(indent=1, recursive=1000)
104
+ r = %(#<#{ self.class.to_s }\n)
105
+ r << [
106
+ "@dimensions_count=#{ dimensions_count }",
107
+ ].map { |e| "#{ ' ' * indent }#{ e }\n"}.join
108
+ if recursive > 0
109
+ # data_series
110
+ r << %(#{ ' ' * indent }@data_series=[\n)
111
+ data_series.each do |e|
112
+ r << "#{ ' ' * (indent + 1) }"
113
+ r << e.inspect(indent + 2, recursive - 1)
114
+ end
115
+ r << "#{ ' ' * indent }]\n"
116
+ # available_chart_types
117
+ r << %(#{ ' ' * indent }@available_chart_types=[\n)
118
+ available_chart_types.each do |e|
119
+ r << "#{ ' ' * (indent + 1) }#{ e.inspect }\n"
120
+ end
121
+ r << "#{ ' ' * indent }]\n"
122
+ end
123
+ r << %(#{ ' ' * (indent-1) }>\n)
124
+ end
125
+
126
+ end
127
+ end