rails-data-explorer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/.gitignore +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +52 -0
  6. data/Rakefile +18 -0
  7. data/lib/rails-data-explorer.rb +44 -0
  8. data/lib/rails-data-explorer/action_view_extension.rb +12 -0
  9. data/lib/rails-data-explorer/active_record_extension.rb +14 -0
  10. data/lib/rails-data-explorer/chart.rb +52 -0
  11. data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
  12. data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
  13. data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
  14. data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
  15. data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
  16. data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
  17. data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
  18. data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
  19. data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
  20. data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
  21. data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
  22. data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
  23. data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
  24. data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
  25. data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
  26. data/lib/rails-data-explorer/data_series.rb +115 -0
  27. data/lib/rails-data-explorer/data_set.rb +127 -0
  28. data/lib/rails-data-explorer/data_type.rb +34 -0
  29. data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
  30. data/lib/rails-data-explorer/data_type/geo.rb +1 -0
  31. data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
  32. data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
  33. data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
  34. data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
  35. data/lib/rails-data-explorer/engine.rb +24 -0
  36. data/lib/rails-data-explorer/exploration.rb +89 -0
  37. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
  38. data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
  39. data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
  40. data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
  41. data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
  42. data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
  43. data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
  44. data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
  45. data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
  46. data/rails-data-explorer.gemspec +30 -0
  47. data/vendor/assets/javascripts/d3.boxplot.js +302 -0
  48. data/vendor/assets/javascripts/d3.parcoords.js +585 -0
  49. data/vendor/assets/javascripts/d3.parsets.js +663 -0
  50. data/vendor/assets/javascripts/d3.v3.js +9294 -0
  51. data/vendor/assets/javascripts/nv.d3.js +14369 -0
  52. data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
  53. data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
  54. data/vendor/assets/stylesheets/bootstrap.css +1727 -0
  55. data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
  56. data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
  57. data/vendor/assets/stylesheets/d3.parsets.css +34 -0
  58. data/vendor/assets/stylesheets/nv.d3.css +769 -0
  59. data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
  60. data/vendor/assets/stylesheets/rde-default-style.css +42 -0
  61. metadata +250 -0
@@ -0,0 +1,120 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class Scatterplot < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_candidates = @data_set.data_series.find_all { |ds|
12
+ (ds.chart_roles[Chart::Scatterplot] & [:x, :any]).any?
13
+ }
14
+ y_candidates = @data_set.data_series.find_all { |ds|
15
+ (ds.chart_roles[Chart::Scatterplot] & [:y, :any]).any?
16
+ }
17
+ color_candidates = @data_set.data_series.find_all { |ds|
18
+ (ds.chart_roles[Chart::Scatterplot] & [:color, :any]).any?
19
+ }
20
+ size_candidates = @data_set.data_series.find_all { |ds|
21
+ (ds.chart_roles[Chart::Scatterplot] & [:size, :any]).any?
22
+ }
23
+
24
+ x_ds = x_candidates.first
25
+ y_ds = (y_candidates - [x_ds]).first
26
+ color_ds = (color_candidates - [x_ds, y_ds]).first
27
+ size_ds = (size_candidates - [x_ds, y_ds, color_ds]).first
28
+
29
+ ca = case @data_set.dimensions_count
30
+ when 0,1
31
+ raise(ArgumentError.new("At least two data series required for scatterplot, only #{ @data_set.dimensions_count } given"))
32
+ when 2
33
+ key = ''
34
+ values_hash = x_ds.values.length.times.map { |idx|
35
+ r = { x: x_ds.values[idx], y: y_ds.values[idx] }
36
+ r[:color] = color_ds.values[idx] if color_ds
37
+ r
38
+ }
39
+ {
40
+ values: [ { key: key, values: values_hash } ],
41
+ x_axis_label: x_ds.name,
42
+ x_axis_tick_format: x_ds.axis_tick_format,
43
+ y_axis_label: y_ds.name,
44
+ y_axis_tick_format: y_ds.axis_tick_format,
45
+ }
46
+ when 3
47
+ visual_attr_ds = color_ds || size_ds
48
+ raise "No visual_attr_ds given" if visual_attr_ds.nil?
49
+ data_series_hash = visual_attr_ds.values.uniq.inject({}) { |m,visual_attr|
50
+ m[visual_attr] = []
51
+ m
52
+ }
53
+ x_ds.values.length.times.each { |idx|
54
+ data_series_hash[visual_attr_ds.values[idx]] << { x: x_ds.values[idx], y: y_ds.values[idx] }
55
+ }
56
+ {
57
+ values: data_series_hash.map { |k,v| { key: k, values: v } },
58
+ x_axis_label: x_ds.name,
59
+ x_axis_tick_format: x_ds.axis_tick_format,
60
+ y_axis_label: y_ds.name,
61
+ y_axis_tick_format: y_ds.axis_tick_format,
62
+ }
63
+ else
64
+ end
65
+ ca
66
+ end
67
+
68
+ def render
69
+ return '' unless render?
70
+ chart_attrs = compute_chart_attrs
71
+ %(
72
+ <div class="rde-chart rde-scatterplot">
73
+ <h3 class="rde-chart-title">Scatterplot</h3>
74
+ <div id="#{ dom_id }", style="height: 400px;">
75
+ <svg></svg>
76
+ </div>
77
+ <script type="text/javascript">
78
+ (function() {
79
+ var data = #{ chart_attrs[:values].to_json };
80
+
81
+ nv.addGraph(function() {
82
+ var chart = nv.models.scatterChart()
83
+ .showDistX(true)
84
+ .showDistY(true)
85
+ .useVoronoi(true)
86
+ .color(d3.scale.category10().range())
87
+ .transitionDuration(300)
88
+ ;
89
+
90
+ chart.xAxis.tickFormat(#{ chart_attrs[:x_axis_tick_format] })
91
+ .axisLabel('#{ chart_attrs[:x_axis_label] }')
92
+ ;
93
+
94
+ chart.yAxis.tickFormat(#{ chart_attrs[:y_axis_tick_format] })
95
+ .axisLabel('#{ chart_attrs[:y_axis_label] }')
96
+ ;
97
+
98
+ chart.tooltipContent(function(key) {
99
+ return key;
100
+ });
101
+
102
+ d3.select('##{ dom_id } svg')
103
+ .datum(data)
104
+ .call(chart);
105
+
106
+ nv.utils.windowResize(chart.update);
107
+
108
+ chart.dispatch.on('stateChange', function(e) { ('New State:', JSON.stringify(e)); });
109
+
110
+ return chart;
111
+ });
112
+ })();
113
+ </script>
114
+ </div>
115
+ )
116
+ end
117
+
118
+ end
119
+ end
120
+ end
@@ -0,0 +1 @@
1
+ # http://benjiec.github.io/scatter-matrix/demo/demo.html#
@@ -0,0 +1,120 @@
1
+ class RailsDataExplorer
2
+ class Chart
3
+ class StackedBarChartCategoricalPercent < Chart
4
+
5
+ def initialize(_data_set, options = {})
6
+ @data_set = _data_set
7
+ @options = {}.merge(options)
8
+ end
9
+
10
+ def compute_chart_attrs
11
+ x_candidates = @data_set.data_series.find_all { |ds|
12
+ (ds.chart_roles[Chart::ContingencyTable] & [:x, :any]).any?
13
+ }.sort { |a,b| b.uniq_vals.length <=> a.uniq_vals.length }
14
+ y_candidates = @data_set.data_series.find_all { |ds|
15
+ (ds.chart_roles[Chart::ContingencyTable] & [:y, :any]).any?
16
+ }
17
+
18
+ x_ds = x_candidates.first
19
+ y_ds = (y_candidates - [x_ds]).first
20
+
21
+ # initialize data_matrix
22
+ data_matrix = { :_sum => { :_sum => 0 } }
23
+ x_ds.uniq_vals.each { |x_val|
24
+ data_matrix[x_val] = {}
25
+ data_matrix[x_val][:_sum] = 0
26
+ y_ds.uniq_vals.each { |y_val|
27
+ data_matrix[x_val][y_val] = 0
28
+ data_matrix[:_sum][y_val] = 0
29
+ }
30
+ }
31
+ # populate data_matrix
32
+ x_ds.values.length.times { |idx|
33
+ x_val = x_ds.values[idx]
34
+ y_val = y_ds.values[idx]
35
+ data_matrix[x_val][y_val] += 1
36
+ data_matrix[:_sum][y_val] += 1
37
+ data_matrix[x_val][:_sum] += 1
38
+ data_matrix[:_sum][:_sum] += 1
39
+ }
40
+
41
+ x_sorted_keys = x_ds.uniq_vals.sort { |a,b|
42
+ data_matrix[b][:_sum] <=> data_matrix[a][:_sum]
43
+ }
44
+ y_sorted_keys = y_ds.uniq_vals.sort { |a,b|
45
+ data_matrix[:_sum][b] <=> data_matrix[:_sum][a]
46
+ }
47
+
48
+ values = case @data_set.dimensions_count
49
+ when 2
50
+ y_sorted_keys.map { |y_val|
51
+ {
52
+ key: y_val,
53
+ values: x_sorted_keys.map { |x_val|
54
+ {
55
+ x: x_val,
56
+ y: (data_matrix[x_val][y_val] / data_matrix[x_val][:_sum].to_f) }
57
+ }
58
+ }
59
+ }
60
+ else
61
+ raise(ArgumentError.new("Exactly two data series required for contingency table."))
62
+ end
63
+ {
64
+ values: values,
65
+ x_axis_label: x_ds.name,
66
+ x_axis_tick_format: 'function(d) { return d }',
67
+ y_axis_label: "#{ y_ds.name } distribution [%]",
68
+ y_axis_tick_format: "d3.format('.1%')",
69
+ }
70
+ end
71
+
72
+ def render
73
+ return '' unless render?
74
+ ca = compute_chart_attrs
75
+ %(
76
+ <div class="rde-chart rde-bar-chart">
77
+ <h3 class="rde-chart-title">Stacked Bar Chart</h3>
78
+ <div id="#{ dom_id }", style="height: 200px;">
79
+ <svg></svg>
80
+ </div>
81
+ <script type="text/javascript">
82
+ (function() {
83
+ var data = #{ ca[:values].to_json };
84
+
85
+ nv.addGraph(function() {
86
+ var chart = nv.models.multiBarChart()
87
+ ;
88
+
89
+ chart.xAxis
90
+ .axisLabel('#{ ca[:x_axis_label] }')
91
+ .tickFormat(#{ ca[:x_axis_tick_format] })
92
+ ;
93
+
94
+ chart.yAxis
95
+ .axisLabel('#{ ca[:y_axis_label] }')
96
+ .tickFormat(#{ ca[:y_axis_tick_format] })
97
+ ;
98
+
99
+ chart.multibar.stacked(true);
100
+ chart.showControls(false);
101
+
102
+ d3.select('##{ dom_id } svg')
103
+ .datum(data)
104
+ .transition().duration(100)
105
+ .call(chart)
106
+ ;
107
+
108
+ nv.utils.windowResize(chart.update);
109
+
110
+ return chart;
111
+ });
112
+ })();
113
+ </script>
114
+ </div>
115
+ )
116
+ end
117
+
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,115 @@
1
+ class RailsDataExplorer
2
+ class DataSeries
3
+
4
+ # TODO: Add concept of significant figures for rounding values when displaying them
5
+ # http://en.wikipedia.org/wiki/Significant_figures
6
+
7
+ attr_reader :data_type, :name, :values, :chart_roles
8
+ delegate :available_chart_types, :to => :data_type, :prefix => false
9
+ delegate :available_chart_roles, :to => :data_type, :prefix => false
10
+
11
+ # options: :chart_roles, :data_type (all optional)
12
+ def initialize(_name, _values, options={})
13
+ options = { chart_roles: [], data_type: nil }.merge(options)
14
+ @name = _name
15
+ @values = _values
16
+ @data_type = init_data_type(options[:data_type])
17
+ @chart_roles = init_chart_roles(options[:chart_roles]) # after data_type!
18
+ end
19
+
20
+ # Returns descriptive_statistics as a flat Array
21
+ def descriptive_statistics
22
+ @data_type.descriptive_statistics(values)
23
+ end
24
+
25
+ # Returns descriptive_statistics as a renderable table structure
26
+ def descriptive_statistics_table
27
+ @data_type.descriptive_statistics_table(values)
28
+ end
29
+
30
+ def values_summary
31
+ if values.length < 3 || values.inspect.length < 80
32
+ values.inspect
33
+ else
34
+ "[#{ values.first } ... #{ values.last }]"
35
+ end
36
+ end
37
+
38
+ def inspect(indent=1, recursive=1000)
39
+ r = %(#<#{ self.class.to_s }\n)
40
+ r << [
41
+ "@name=#{ name.inspect }",
42
+ "@data_type=#{ data_type.inspect }",
43
+ "@chart_roles=#{ chart_roles.inspect }",
44
+ "@values=<count: #{ values.count }, items: #{ values_summary }>",
45
+ ].map { |e| "#{ ' ' * indent }#{ e }\n"}.join
46
+ if recursive > 0
47
+ # nothing to recurse
48
+ end
49
+ r << %(#{ ' ' * (indent-1) }>\n)
50
+ end
51
+
52
+ def axis_tick_format
53
+ data_type.axis_tick_format(values)
54
+ end
55
+
56
+ def uniq_vals
57
+ @uniq_vals = values.uniq
58
+ end
59
+
60
+ def uniq_vals_count
61
+ @uniq_vals_count = uniq_vals.length
62
+ end
63
+
64
+ def min_val
65
+ @min_val = values.compact.min
66
+ end
67
+
68
+ def max_val
69
+ @max_val = values.compact.max
70
+ end
71
+
72
+ private
73
+
74
+ # @param[Array<Symbol>] chart_role_overrides, :x, :y, :color
75
+ # @return[Hash] keys are chart_classes, and values are arrays with roles
76
+ def init_chart_roles(chart_role_overrides)
77
+ r = if chart_role_overrides.any?
78
+ available_chart_types.inject(Hash.new([])) { |m,chart_type|
79
+ subset = chart_type[:chart_roles] & chart_role_overrides
80
+ next m if subset.empty?
81
+ m[chart_type[:chart_class]] += subset
82
+ m[chart_type[:chart_class]].uniq!
83
+ m
84
+ }
85
+ else
86
+ available_chart_types.inject(Hash.new([])) { |m,chart_type|
87
+ m[chart_type[:chart_class]] += chart_type[:chart_roles]
88
+ m[chart_type[:chart_class]].uniq!
89
+ m
90
+ }
91
+ end
92
+ r.freeze
93
+ end
94
+
95
+ def init_data_type(data_type_override)
96
+ if data_type_override.nil?
97
+ case values.first
98
+ when Integer, Bignum, Fixnum
99
+ DataType::Quantitative::Integer.new
100
+ when Float
101
+ DataType::Quantitative::Decimal.new
102
+ when String
103
+ DataType::Categorical.new
104
+ when DateTime, ActiveSupport::TimeWithZone
105
+ DataType::Quantitative::Temporal.new
106
+ else
107
+ raise(ArgumentError.new("Can't infer data type for value: #{ values.first.class.inspect }"))
108
+ end
109
+ else
110
+ data_type_override
111
+ end
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,127 @@
1
+ # Container for data series
2
+ class RailsDataExplorer
3
+ class DataSet
4
+
5
+ attr_reader :data_series
6
+
7
+ # @param[Array<Numeric, String, Symbol, Nil, Hash, DataSeries>] values_or_data_series
8
+ # Array can contain the following:
9
+ # * Numeric, String, Symbol, Nil - for a single data series
10
+ # * Hash - for multiple data series with the following keys:
11
+ # * :name - name for the series as String
12
+ # * :values - scalar values as array
13
+ # * :chart_roles [Array<Symbol>, optional] - what to use this series for. possible values: :x, :y, :color
14
+ # * :data_type (optional) - :quantitative, :categorical, :temporal
15
+ # * DataSeries
16
+ # @param[String] exploration_title used as fall back for data series name
17
+ def initialize(values_or_data_series, exploration_title)
18
+ @data_series = initialize_data_series(values_or_data_series, exploration_title)
19
+ validate_data_series
20
+ end
21
+
22
+ def initialize_data_series(values_or_data_series, exploration_title)
23
+ case values_or_data_series.first
24
+ when ActiveSupport::TimeWithZone, DateTime, Numeric, NilClass, String, Symbol
25
+ # Array of scalar values, convert to single data series
26
+ [DataSeries.new(exploration_title, values_or_data_series)]
27
+ when Hash
28
+ # Array of Hashes, convert each key/val pair to a data series
29
+ values_or_data_series.map { |data_series_attrs|
30
+ DataSeries.new(
31
+ data_series_attrs.delete(:name),
32
+ data_series_attrs.delete(:values),
33
+ data_series_attrs # pass remaining attrs as options
34
+ )
35
+ }
36
+ when DataSeries
37
+ # return as is
38
+ values_or_data_series
39
+ else
40
+ raise(
41
+ ArgumentError.new(
42
+ "Invalid datum. Only Hash, Numeric, String, Symbol, and Nil are allowed. " + \
43
+ "Found #{ values_or_data_series.first.class.to_s }."
44
+ )
45
+ )
46
+ end
47
+ end
48
+
49
+ def validate_data_series
50
+ # all series have same size
51
+ unless 1 == @data_series.map { |e| e.values.length }.uniq.length
52
+ raise(ArgumentError.new("All data series must have same length."))
53
+ end
54
+ # presence of at least one data_series
55
+ if 0 == dimensions_count
56
+ raise(ArgumentError.new("Please provide at least 1 data series."))
57
+ end
58
+ # TODO: all elements in a series are of same type
59
+ end
60
+
61
+ def dimensions_count
62
+ @data_series.length
63
+ end
64
+
65
+ def available_chart_types
66
+ case dimensions_count
67
+ when 0
68
+ # invalid, handled in validate_data_series
69
+ when 1
70
+ # charts for a single data series, use that series' available_chart_types
71
+ @data_series.first.available_chart_types(dimensions_count: 1).map { |e| e[:chart_class] }
72
+ else
73
+ # TODO: define on each chart type which chart_roles are required.
74
+ # Then use only charts for which all roles are filled.
75
+ # charts for two data series
76
+ # find intersection of all available chart types
77
+ r = @data_series.inject(nil) { |m,ds|
78
+ constraints = { dimensions_count: dimensions_count, chart_roles: ds.chart_roles }
79
+ # initialize m with first data series
80
+ m = ds.available_chart_types(constraints).map { |e| e[:chart_class] } if m.nil?
81
+ # find intersection of all available_chart_types
82
+ m = ds.available_chart_types(constraints).map { |e| e[:chart_class] } & m
83
+ m
84
+ }
85
+ r
86
+ end
87
+ end
88
+
89
+ def descriptive_statistics
90
+ case dimensions_count
91
+ when 0
92
+ # invalid, handled in validate_data_series
93
+ when 1
94
+ # charts for a single data series, use that series' descriptive_statistics
95
+ @data_series.first.descriptive_statistics
96
+ when 2
97
+ # charts for two data series
98
+ else
99
+ # charts for multiple data series
100
+ end
101
+ end
102
+
103
+ def inspect(indent=1, recursive=1000)
104
+ r = %(#<#{ self.class.to_s }\n)
105
+ r << [
106
+ "@dimensions_count=#{ dimensions_count }",
107
+ ].map { |e| "#{ ' ' * indent }#{ e }\n"}.join
108
+ if recursive > 0
109
+ # data_series
110
+ r << %(#{ ' ' * indent }@data_series=[\n)
111
+ data_series.each do |e|
112
+ r << "#{ ' ' * (indent + 1) }"
113
+ r << e.inspect(indent + 2, recursive - 1)
114
+ end
115
+ r << "#{ ' ' * indent }]\n"
116
+ # available_chart_types
117
+ r << %(#{ ' ' * indent }@available_chart_types=[\n)
118
+ available_chart_types.each do |e|
119
+ r << "#{ ' ' * (indent + 1) }#{ e.inspect }\n"
120
+ end
121
+ r << "#{ ' ' * indent }]\n"
122
+ end
123
+ r << %(#{ ' ' * (indent-1) }>\n)
124
+ end
125
+
126
+ end
127
+ end