rails-data-explorer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/.gitignore +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +52 -0
  6. data/Rakefile +18 -0
  7. data/lib/rails-data-explorer.rb +44 -0
  8. data/lib/rails-data-explorer/action_view_extension.rb +12 -0
  9. data/lib/rails-data-explorer/active_record_extension.rb +14 -0
  10. data/lib/rails-data-explorer/chart.rb +52 -0
  11. data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
  12. data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
  13. data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
  14. data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
  15. data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
  16. data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
  17. data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
  18. data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
  19. data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
  20. data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
  21. data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
  22. data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
  23. data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
  24. data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
  25. data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
  26. data/lib/rails-data-explorer/data_series.rb +115 -0
  27. data/lib/rails-data-explorer/data_set.rb +127 -0
  28. data/lib/rails-data-explorer/data_type.rb +34 -0
  29. data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
  30. data/lib/rails-data-explorer/data_type/geo.rb +1 -0
  31. data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
  32. data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
  33. data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
  34. data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
  35. data/lib/rails-data-explorer/engine.rb +24 -0
  36. data/lib/rails-data-explorer/exploration.rb +89 -0
  37. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
  38. data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
  39. data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
  40. data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
  41. data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
  42. data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
  43. data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
  44. data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
  45. data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
  46. data/rails-data-explorer.gemspec +30 -0
  47. data/vendor/assets/javascripts/d3.boxplot.js +302 -0
  48. data/vendor/assets/javascripts/d3.parcoords.js +585 -0
  49. data/vendor/assets/javascripts/d3.parsets.js +663 -0
  50. data/vendor/assets/javascripts/d3.v3.js +9294 -0
  51. data/vendor/assets/javascripts/nv.d3.js +14369 -0
  52. data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
  53. data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
  54. data/vendor/assets/stylesheets/bootstrap.css +1727 -0
  55. data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
  56. data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
  57. data/vendor/assets/stylesheets/d3.parsets.css +34 -0
  58. data/vendor/assets/stylesheets/nv.d3.css +769 -0
  59. data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
  60. data/vendor/assets/stylesheets/rde-default-style.css +42 -0
  61. metadata +250 -0
@@ -0,0 +1,34 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+
4
+ # @param[Hash, optional] constraints
5
+ # * :dimensions_count - how many data_series are there?
6
+ def available_chart_types(constraints={})
7
+ r = all_available_chart_types
8
+ if(c = constraints.delete(:dimensions_count))
9
+ r = r.find_all { |chart_type|
10
+ (
11
+ chart_type[:dimensions_count_min].nil? ||
12
+ (chart_type[:dimensions_count_min] <= c)
13
+ ) && (
14
+ chart_type[:dimensions_count_max].nil? ||
15
+ (chart_type[:dimensions_count_max] >= c)
16
+ )
17
+ }
18
+ end
19
+ if(c = constraints.delete(:chart_roles))
20
+ # c = { ChartClass => [:x, :y], ... }
21
+ r = r.find_all { |chart_type|
22
+ c[chart_type[:chart_class]].nil? ||
23
+ (c[chart_type[:chart_class]] & chart_type[:chart_roles]).any?
24
+ }
25
+ end
26
+ # check for any unhandled options
27
+ if constraints.any?
28
+ raise "Unhandled constraints: #{ constraints.inspect }"
29
+ end
30
+ r
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,117 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Categorical < DataType
4
+
5
+ def all_available_chart_types
6
+ [
7
+ {
8
+ :chart_class => Chart::HistogramCategorical,
9
+ :chart_roles => [:x],
10
+ :dimensions_count_min => 1,
11
+ :dimensions_count_max => 1,
12
+ },
13
+ {
14
+ :chart_class => Chart::PieChart,
15
+ :chart_roles => [:any],
16
+ :dimensions_count_min => 1,
17
+ :dimensions_count_max => 1,
18
+ },
19
+ {
20
+ chart_class: Chart::BoxPlotGroup,
21
+ chart_roles: [:y],
22
+ dimensions_count_min: 2,
23
+ dimensions_count_max: 2,
24
+ },
25
+ {
26
+ chart_class: Chart::Scatterplot,
27
+ chart_roles: [:color],
28
+ dimensions_count_min: 3,
29
+ },
30
+ {
31
+ chart_class: Chart::ParallelCoordinates,
32
+ chart_roles: [:dimension],
33
+ dimensions_count_min: 3,
34
+ },
35
+ {
36
+ chart_class: Chart::StackedBarChartCategoricalPercent,
37
+ chart_roles: [:x, :y],
38
+ dimensions_count_min: 2,
39
+ dimensions_count_max: 2,
40
+ },
41
+ {
42
+ chart_class: Chart::ContingencyTable,
43
+ chart_roles: [:any],
44
+ dimensions_count_min: 2,
45
+ dimensions_count_max: 2,
46
+ },
47
+ {
48
+ chart_class: Chart::DescriptiveStatisticsTable,
49
+ chart_roles: [:any],
50
+ dimensions_count_min: 1,
51
+ dimensions_count_max: 1,
52
+ },
53
+ {
54
+ chart_class: Chart::ParallelSet,
55
+ chart_roles: [:dimension],
56
+ dimensions_count_min: 3,
57
+ },
58
+ ].freeze
59
+ end
60
+
61
+ def descriptive_statistics(values)
62
+ frequencies = values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
63
+ total_count = values.length
64
+ ruby_formatters = {
65
+ :integer => Proc.new { |v| number_with_delimiter(v.round) },
66
+ :percent => Proc.new { |v| number_to_percentage(v, :precision => 3, :significant => true, :strip_insignificant_zeros => true, :delimiter => ',') },
67
+ }
68
+ r = frequencies.inject([]) { |m, (k,v)|
69
+ m << { :label => "#{ k.to_s }_count", :value => v, :ruby_formatter => ruby_formatters[:integer] }
70
+ m << { :label => "#{ k.to_s }_percent", :value => (v / total_count.to_f) * 100, :ruby_formatter => ruby_formatters[:percent] }
71
+ m
72
+ }.sort { |a,b| b[:value] <=> a[:value] }
73
+ r.insert(0, { :label => 'Total_count', :value => total_count, :ruby_formatter => ruby_formatters[:integer] })
74
+ r.insert(0, { :label => 'Total_percent', :value => 100, :ruby_formatter => ruby_formatters[:percent] })
75
+ r
76
+ end
77
+
78
+ # Returns an OpenStruct that describes a statistics table.
79
+ def descriptive_statistics_table(values)
80
+ desc_stats = descriptive_statistics(values)
81
+ labels = desc_stats.map { |e| e[:label].gsub(/_count|_percent/, '') }.uniq
82
+ table = OpenStruct.new(
83
+ :rows => []
84
+ )
85
+ table.rows << OpenStruct.new(
86
+ :css_class => 'rde-column_header',
87
+ :tag => :tr,
88
+ :cells => labels.map { |label|
89
+ OpenStruct.new(:value => label, :ruby_formatter => Proc.new { |e| e }, :tag => :th, :css_class => 'rde-cell-label')
90
+ }
91
+ )
92
+ table.rows << OpenStruct.new(
93
+ :css_class => 'rde-data_row',
94
+ :tag => :tr,
95
+ :cells => labels.map { |label|
96
+ stat = desc_stats.detect { |e| "#{ label }_count" == e[:label] }
97
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value')
98
+ }
99
+ )
100
+ table.rows << OpenStruct.new(
101
+ :css_class => 'rde-data_row',
102
+ :tag => :tr,
103
+ :cells => labels.map { |label|
104
+ stat = desc_stats.detect { |e| "#{ label }_percent" == e[:label] }
105
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value')
106
+ }
107
+ )
108
+ table
109
+ end
110
+
111
+ def axis_tick_format(values)
112
+ %(function(d) { return d })
113
+ end
114
+
115
+ end
116
+ end
117
+ end
@@ -0,0 +1 @@
1
+ # For displaying data on maps.
@@ -0,0 +1,109 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative < DataType
4
+
5
+ # This is an abstract class. Use sub_classes
6
+
7
+ def all_available_chart_types
8
+ [
9
+ {
10
+ chart_class: Chart::BoxPlot,
11
+ chart_roles: [:y],
12
+ dimensions_count_min: 1,
13
+ dimensions_count_max: 1
14
+ },
15
+ {
16
+ chart_class: Chart::HistogramQuantitative,
17
+ chart_roles: [:x],
18
+ dimensions_count_min: 1,
19
+ dimensions_count_max: 1
20
+ },
21
+ {
22
+ chart_class: Chart::BoxPlotGroup,
23
+ chart_roles: [:x],
24
+ dimensions_count_min: 2,
25
+ dimensions_count_max: 2,
26
+ },
27
+ {
28
+ chart_class: Chart::Scatterplot,
29
+ chart_roles: [:x, :y, :size],
30
+ dimensions_count_min: 2
31
+ },
32
+ {
33
+ chart_class: Chart::DescriptiveStatisticsTable,
34
+ chart_roles: [:any],
35
+ dimensions_count_min: 1,
36
+ dimensions_count_max: 1
37
+ },
38
+ {
39
+ chart_class: Chart::ParallelCoordinates,
40
+ chart_roles: [:dimension],
41
+ dimensions_count_min: 3,
42
+ },
43
+ ].freeze
44
+ end
45
+
46
+ def descriptive_statistics(values)
47
+ stats = ::DescriptiveStatistics::Stats.new(values)
48
+ ruby_formatters = {
49
+ :integer => Proc.new { |v| number_with_delimiter(v.round) },
50
+ :decimal => Proc.new { |v| number_with_precision(v, :precision => 3, :significant => true, :strip_insignificant_zeros => true, :delimiter => ',') },
51
+ :pass_through => Proc.new { |v| v },
52
+ }
53
+ [
54
+ { :label => 'Min', :value => stats.min, :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
55
+ { :label => '1%ile', :value => stats.value_from_percentile(1), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
56
+ { :label => '10%ile', :value => stats.value_from_percentile(10), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
57
+ { :label => '25%ile', :value => stats.value_from_percentile(25), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
58
+ { :label => 'Median', :value => stats.median, :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
59
+ { :label => '75%ile', :value => stats.value_from_percentile(75), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
60
+ { :label => '90%ile', :value => stats.value_from_percentile(90), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
61
+ { :label => '99%ile', :value => stats.value_from_percentile(99), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
62
+ { :label => 'Max', :value => stats.max, :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
63
+ { :label => '', :value => '', :ruby_formatter => ruby_formatters[:pass_through], :table_row => 1 },
64
+
65
+ { :label => 'Range', :value => stats.range, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
66
+ { :label => 'Mean', :value => stats.mean, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
67
+ { :label => 'Mode', :value => stats.mode, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
68
+ { :label => 'Count', :value => values.length, :ruby_formatter => ruby_formatters[:integer], :table_row => 2 },
69
+ { :label => 'Sum', :value => values.inject(0) { |m,e| m += e }, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
70
+ { :label => 'Variance', :value => stats.variance, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
71
+ { :label => 'Std. dev.', :value => stats.standard_deviation, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
72
+ { :label => 'Rel. std. dev.', :value => stats.relative_standard_deviation, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
73
+ { :label => 'Skewness', :value => stats.skewness, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
74
+ { :label => 'Kurtosis', :value => stats.kurtosis, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
75
+ ]
76
+ end
77
+
78
+ # Returns an OpenStruct that describes a statistics table.
79
+ def descriptive_statistics_table(values)
80
+ desc_stats = descriptive_statistics(values)
81
+ table = OpenStruct.new(
82
+ :rows => []
83
+ )
84
+ [1,2].each do |table_row|
85
+ table.rows << OpenStruct.new(
86
+ :css_class => 'rde-column_header',
87
+ :tag => :tr,
88
+ :cells => desc_stats.find_all { |e| table_row == e[:table_row] }.map { |stat|
89
+ OpenStruct.new(:value => stat[:label], :ruby_formatter => Proc.new { |e| e }, :tag => :th, :css_class => 'rde-cell-label')
90
+ }
91
+ )
92
+ table.rows << OpenStruct.new(
93
+ :css_class => 'rde-data_row',
94
+ :tag => :tr,
95
+ :cells => desc_stats.find_all { |e| table_row == e[:table_row] }.map { |stat|
96
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value')
97
+ }
98
+ )
99
+ end
100
+ table
101
+ end
102
+
103
+ def axis_tick_format(values)
104
+ raise "Implement me in sub_class"
105
+ end
106
+
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,13 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative
4
+ class Decimal < Quantitative
5
+
6
+ def axis_tick_format(values)
7
+ "d3.format('.02f')"
8
+ end
9
+
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative
4
+ class Integer < Quantitative
5
+
6
+ def axis_tick_format(values)
7
+ "d3.format('r')"
8
+ end
9
+
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,62 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative
4
+ class Temporal < Quantitative
5
+
6
+ def all_available_chart_types
7
+ [
8
+ {
9
+ :chart_class => Chart::HistogramTemporal,
10
+ :chart_roles => [:x],
11
+ :dimensions_count_min => 1,
12
+ :dimensions_count_max => 1
13
+ },
14
+ {
15
+ chart_class: Chart::DescriptiveStatisticsTable,
16
+ chart_roles: [:any],
17
+ dimensions_count_min: 1,
18
+ dimensions_count_max: 1
19
+ },
20
+ {
21
+ chart_class: Chart::ParallelCoordinates,
22
+ chart_roles: [:dimension],
23
+ dimensions_count_min: 3,
24
+ },
25
+ ].freeze
26
+ end
27
+
28
+ def descriptive_statistics(values)
29
+ ruby_formatter = Proc.new { |v| v.nil? ? '' : v.strftime('%a, %b %e, %Y, %l:%M:%S %p %Z') }
30
+ [
31
+ { :label => 'Min', :value => values.min, :ruby_formatter => ruby_formatter },
32
+ { :label => 'Max', :value => values.max, :ruby_formatter => ruby_formatter },
33
+ { :label => 'Count', :value => values.length, :ruby_formatter => Proc.new { |e| number_with_delimiter(e) } },
34
+ ]
35
+ end
36
+
37
+ # Returns an OpenStruct that describes a statistics table.
38
+ def descriptive_statistics_table(values)
39
+ desc_stats = descriptive_statistics(values)
40
+ table = OpenStruct.new(
41
+ :rows => desc_stats.map { |stat|
42
+ OpenStruct.new(
43
+ :css_class => 'rde-row-values',
44
+ :tag => :tr,
45
+ :cells => [
46
+ OpenStruct.new(:value => stat[:label], :tag => :th, :css_class => 'rde-row_header'),
47
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value'),
48
+ ]
49
+ )
50
+ }
51
+ )
52
+ table
53
+ end
54
+
55
+ def axis_tick_format(values)
56
+ %(function(d) { return d3.time.format('%x')(new Date(d)) })
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,24 @@
1
+ require 'rails'
2
+
3
+ class RailsDataExplorer
4
+ class Engine < ::Rails::Engine
5
+
6
+ # It's an engine so that we can add javascript and image assets
7
+ # to the asset pipeline.
8
+
9
+ # initializer "rails-data-explorer.active_record_extension" do |app|
10
+ # require 'rails-data-explorer/active_record_extension'
11
+ # class ::ActiveRecord::Base
12
+ # extend RailsDataExplorer::ActiveRecordExtension::ClassMethods
13
+ # end
14
+ # end
15
+
16
+ # initializer "rails-data-explorer.action_view_extension" do |app|
17
+ # require 'rails-data-explorer/action_view_extension'
18
+ # class ::ActionView::Base
19
+ # include RailsDataExplorer::ActionViewExtension
20
+ # end
21
+ # end
22
+
23
+ end
24
+ end
@@ -0,0 +1,89 @@
1
+ class RailsDataExplorer
2
+ class Exploration
3
+
4
+ attr_accessor :output_buffer # required for content_tag
5
+ include ActionView::Helpers::TagHelper
6
+
7
+ attr_accessor :charts, :data_set, :title
8
+
9
+ # Initializes a new visualization.
10
+ # @param[String] _title will be printed at top of visualization
11
+ # @param[Array] data_set_or_array can be a number of things:
12
+ # * Array<Scalar> - for single data series, uni-variate options are applied.
13
+ # * Array<Hash> - for multiple data series, bi/multi-variate options are applied.
14
+ # * DataSet - For finer grained control.
15
+ # @param[Array<Chart, String, Symbol>, optional] chart_specs
16
+ # The list of charts to include. Defaults to all applicable charts for the
17
+ # given data_set_or_array.
18
+ # Charts can be provided as Array of Strings, Symbols, or Chart classes
19
+ # (can be mixed).
20
+ def initialize(_title, data_set_or_array, chart_specs=nil)
21
+ @title = _title
22
+ @data_set = initialize_data_set(data_set_or_array)
23
+ @charts = initialize_charts(chart_specs)
24
+ end
25
+
26
+ def render
27
+ content_tag(:div, :class => 'rde-exploration panel panel-default', :id => dom_id) do
28
+ content_tag(:div, :class => 'panel-heading') do
29
+ content_tag(:h2, @title, :class => 'rde-exploration-title panel-title')
30
+ end +
31
+ content_tag(:div, :class => 'panel-body') do
32
+ @charts.map { |e| e.render }.join.html_safe
33
+ end
34
+ end.html_safe
35
+ end
36
+
37
+ def dom_id
38
+ "rde-exploration-#{ object_id }"
39
+ end
40
+
41
+ def inspect(indent=1, recursive=1000)
42
+ r = %(#<#{ self.class.to_s }\n)
43
+ r << [
44
+ "@title=#{ @title.inspect }",
45
+ ].map { |e| "#{ ' ' * indent }#{ e }\n"}.join
46
+ if recursive > 0
47
+ r << %(#{ ' ' * indent }@data_set=)
48
+ r << data_set.inspect(indent + 1, recursive - 1)
49
+ end
50
+ r << %(#{ ' ' * (indent-1) }>\n)
51
+ end
52
+
53
+ private
54
+
55
+ def initialize_charts(chart_specs)
56
+ if chart_specs.present?
57
+ chart_specs.map { |chart_spec|
58
+ case chart_spec
59
+ when Chart
60
+ when String, Symbol
61
+ else
62
+ end
63
+ }
64
+ else
65
+ @data_set.available_chart_types.map { |e|
66
+ e.send(:new, @data_set)
67
+ }
68
+ end
69
+ end
70
+
71
+ def initialize_data_set(data_set_or_array)
72
+ case data_set_or_array
73
+ when Array
74
+ DataSet.new(data_set_or_array, @title)
75
+ when DataSet
76
+ # use as is
77
+ _data_set
78
+ else
79
+ raise(
80
+ ArgumentError.new(
81
+ "data_set_or_array must be an Array or a DataSet, " + \
82
+ "is #{ data_set_or_array.class.to_s }"
83
+ )
84
+ )
85
+ end
86
+ end
87
+
88
+ end
89
+ end