rails-data-explorer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/.gitignore +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.md +52 -0
  6. data/Rakefile +18 -0
  7. data/lib/rails-data-explorer.rb +44 -0
  8. data/lib/rails-data-explorer/action_view_extension.rb +12 -0
  9. data/lib/rails-data-explorer/active_record_extension.rb +14 -0
  10. data/lib/rails-data-explorer/chart.rb +52 -0
  11. data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
  12. data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
  13. data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
  14. data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
  15. data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
  16. data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
  17. data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
  18. data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
  19. data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
  20. data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
  21. data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
  22. data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
  23. data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
  24. data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
  25. data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
  26. data/lib/rails-data-explorer/data_series.rb +115 -0
  27. data/lib/rails-data-explorer/data_set.rb +127 -0
  28. data/lib/rails-data-explorer/data_type.rb +34 -0
  29. data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
  30. data/lib/rails-data-explorer/data_type/geo.rb +1 -0
  31. data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
  32. data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
  33. data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
  34. data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
  35. data/lib/rails-data-explorer/engine.rb +24 -0
  36. data/lib/rails-data-explorer/exploration.rb +89 -0
  37. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
  38. data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
  39. data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
  40. data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
  41. data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
  42. data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
  43. data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
  44. data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
  45. data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
  46. data/rails-data-explorer.gemspec +30 -0
  47. data/vendor/assets/javascripts/d3.boxplot.js +302 -0
  48. data/vendor/assets/javascripts/d3.parcoords.js +585 -0
  49. data/vendor/assets/javascripts/d3.parsets.js +663 -0
  50. data/vendor/assets/javascripts/d3.v3.js +9294 -0
  51. data/vendor/assets/javascripts/nv.d3.js +14369 -0
  52. data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
  53. data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
  54. data/vendor/assets/stylesheets/bootstrap.css +1727 -0
  55. data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
  56. data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
  57. data/vendor/assets/stylesheets/d3.parsets.css +34 -0
  58. data/vendor/assets/stylesheets/nv.d3.css +769 -0
  59. data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
  60. data/vendor/assets/stylesheets/rde-default-style.css +42 -0
  61. metadata +250 -0
@@ -0,0 +1,34 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+
4
+ # @param[Hash, optional] constraints
5
+ # * :dimensions_count - how many data_series are there?
6
+ def available_chart_types(constraints={})
7
+ r = all_available_chart_types
8
+ if(c = constraints.delete(:dimensions_count))
9
+ r = r.find_all { |chart_type|
10
+ (
11
+ chart_type[:dimensions_count_min].nil? ||
12
+ (chart_type[:dimensions_count_min] <= c)
13
+ ) && (
14
+ chart_type[:dimensions_count_max].nil? ||
15
+ (chart_type[:dimensions_count_max] >= c)
16
+ )
17
+ }
18
+ end
19
+ if(c = constraints.delete(:chart_roles))
20
+ # c = { ChartClass => [:x, :y], ... }
21
+ r = r.find_all { |chart_type|
22
+ c[chart_type[:chart_class]].nil? ||
23
+ (c[chart_type[:chart_class]] & chart_type[:chart_roles]).any?
24
+ }
25
+ end
26
+ # check for any unhandled options
27
+ if constraints.any?
28
+ raise "Unhandled constraints: #{ constraints.inspect }"
29
+ end
30
+ r
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,117 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Categorical < DataType
4
+
5
+ def all_available_chart_types
6
+ [
7
+ {
8
+ :chart_class => Chart::HistogramCategorical,
9
+ :chart_roles => [:x],
10
+ :dimensions_count_min => 1,
11
+ :dimensions_count_max => 1,
12
+ },
13
+ {
14
+ :chart_class => Chart::PieChart,
15
+ :chart_roles => [:any],
16
+ :dimensions_count_min => 1,
17
+ :dimensions_count_max => 1,
18
+ },
19
+ {
20
+ chart_class: Chart::BoxPlotGroup,
21
+ chart_roles: [:y],
22
+ dimensions_count_min: 2,
23
+ dimensions_count_max: 2,
24
+ },
25
+ {
26
+ chart_class: Chart::Scatterplot,
27
+ chart_roles: [:color],
28
+ dimensions_count_min: 3,
29
+ },
30
+ {
31
+ chart_class: Chart::ParallelCoordinates,
32
+ chart_roles: [:dimension],
33
+ dimensions_count_min: 3,
34
+ },
35
+ {
36
+ chart_class: Chart::StackedBarChartCategoricalPercent,
37
+ chart_roles: [:x, :y],
38
+ dimensions_count_min: 2,
39
+ dimensions_count_max: 2,
40
+ },
41
+ {
42
+ chart_class: Chart::ContingencyTable,
43
+ chart_roles: [:any],
44
+ dimensions_count_min: 2,
45
+ dimensions_count_max: 2,
46
+ },
47
+ {
48
+ chart_class: Chart::DescriptiveStatisticsTable,
49
+ chart_roles: [:any],
50
+ dimensions_count_min: 1,
51
+ dimensions_count_max: 1,
52
+ },
53
+ {
54
+ chart_class: Chart::ParallelSet,
55
+ chart_roles: [:dimension],
56
+ dimensions_count_min: 3,
57
+ },
58
+ ].freeze
59
+ end
60
+
61
+ def descriptive_statistics(values)
62
+ frequencies = values.inject(Hash.new(0)) { |m,e| m[e] += 1; m }
63
+ total_count = values.length
64
+ ruby_formatters = {
65
+ :integer => Proc.new { |v| number_with_delimiter(v.round) },
66
+ :percent => Proc.new { |v| number_to_percentage(v, :precision => 3, :significant => true, :strip_insignificant_zeros => true, :delimiter => ',') },
67
+ }
68
+ r = frequencies.inject([]) { |m, (k,v)|
69
+ m << { :label => "#{ k.to_s }_count", :value => v, :ruby_formatter => ruby_formatters[:integer] }
70
+ m << { :label => "#{ k.to_s }_percent", :value => (v / total_count.to_f) * 100, :ruby_formatter => ruby_formatters[:percent] }
71
+ m
72
+ }.sort { |a,b| b[:value] <=> a[:value] }
73
+ r.insert(0, { :label => 'Total_count', :value => total_count, :ruby_formatter => ruby_formatters[:integer] })
74
+ r.insert(0, { :label => 'Total_percent', :value => 100, :ruby_formatter => ruby_formatters[:percent] })
75
+ r
76
+ end
77
+
78
+ # Returns an OpenStruct that describes a statistics table.
79
+ def descriptive_statistics_table(values)
80
+ desc_stats = descriptive_statistics(values)
81
+ labels = desc_stats.map { |e| e[:label].gsub(/_count|_percent/, '') }.uniq
82
+ table = OpenStruct.new(
83
+ :rows => []
84
+ )
85
+ table.rows << OpenStruct.new(
86
+ :css_class => 'rde-column_header',
87
+ :tag => :tr,
88
+ :cells => labels.map { |label|
89
+ OpenStruct.new(:value => label, :ruby_formatter => Proc.new { |e| e }, :tag => :th, :css_class => 'rde-cell-label')
90
+ }
91
+ )
92
+ table.rows << OpenStruct.new(
93
+ :css_class => 'rde-data_row',
94
+ :tag => :tr,
95
+ :cells => labels.map { |label|
96
+ stat = desc_stats.detect { |e| "#{ label }_count" == e[:label] }
97
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value')
98
+ }
99
+ )
100
+ table.rows << OpenStruct.new(
101
+ :css_class => 'rde-data_row',
102
+ :tag => :tr,
103
+ :cells => labels.map { |label|
104
+ stat = desc_stats.detect { |e| "#{ label }_percent" == e[:label] }
105
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value')
106
+ }
107
+ )
108
+ table
109
+ end
110
+
111
+ def axis_tick_format(values)
112
+ %(function(d) { return d })
113
+ end
114
+
115
+ end
116
+ end
117
+ end
@@ -0,0 +1 @@
1
+ # For displaying data on maps.
@@ -0,0 +1,109 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative < DataType
4
+
5
+ # This is an abstract class. Use sub_classes
6
+
7
+ def all_available_chart_types
8
+ [
9
+ {
10
+ chart_class: Chart::BoxPlot,
11
+ chart_roles: [:y],
12
+ dimensions_count_min: 1,
13
+ dimensions_count_max: 1
14
+ },
15
+ {
16
+ chart_class: Chart::HistogramQuantitative,
17
+ chart_roles: [:x],
18
+ dimensions_count_min: 1,
19
+ dimensions_count_max: 1
20
+ },
21
+ {
22
+ chart_class: Chart::BoxPlotGroup,
23
+ chart_roles: [:x],
24
+ dimensions_count_min: 2,
25
+ dimensions_count_max: 2,
26
+ },
27
+ {
28
+ chart_class: Chart::Scatterplot,
29
+ chart_roles: [:x, :y, :size],
30
+ dimensions_count_min: 2
31
+ },
32
+ {
33
+ chart_class: Chart::DescriptiveStatisticsTable,
34
+ chart_roles: [:any],
35
+ dimensions_count_min: 1,
36
+ dimensions_count_max: 1
37
+ },
38
+ {
39
+ chart_class: Chart::ParallelCoordinates,
40
+ chart_roles: [:dimension],
41
+ dimensions_count_min: 3,
42
+ },
43
+ ].freeze
44
+ end
45
+
46
+ def descriptive_statistics(values)
47
+ stats = ::DescriptiveStatistics::Stats.new(values)
48
+ ruby_formatters = {
49
+ :integer => Proc.new { |v| number_with_delimiter(v.round) },
50
+ :decimal => Proc.new { |v| number_with_precision(v, :precision => 3, :significant => true, :strip_insignificant_zeros => true, :delimiter => ',') },
51
+ :pass_through => Proc.new { |v| v },
52
+ }
53
+ [
54
+ { :label => 'Min', :value => stats.min, :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
55
+ { :label => '1%ile', :value => stats.value_from_percentile(1), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
56
+ { :label => '10%ile', :value => stats.value_from_percentile(10), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
57
+ { :label => '25%ile', :value => stats.value_from_percentile(25), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
58
+ { :label => 'Median', :value => stats.median, :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
59
+ { :label => '75%ile', :value => stats.value_from_percentile(75), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
60
+ { :label => '90%ile', :value => stats.value_from_percentile(90), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
61
+ { :label => '99%ile', :value => stats.value_from_percentile(99), :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
62
+ { :label => 'Max', :value => stats.max, :ruby_formatter => ruby_formatters[:decimal], :table_row => 1 },
63
+ { :label => '', :value => '', :ruby_formatter => ruby_formatters[:pass_through], :table_row => 1 },
64
+
65
+ { :label => 'Range', :value => stats.range, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
66
+ { :label => 'Mean', :value => stats.mean, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
67
+ { :label => 'Mode', :value => stats.mode, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
68
+ { :label => 'Count', :value => values.length, :ruby_formatter => ruby_formatters[:integer], :table_row => 2 },
69
+ { :label => 'Sum', :value => values.inject(0) { |m,e| m += e }, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
70
+ { :label => 'Variance', :value => stats.variance, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
71
+ { :label => 'Std. dev.', :value => stats.standard_deviation, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
72
+ { :label => 'Rel. std. dev.', :value => stats.relative_standard_deviation, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
73
+ { :label => 'Skewness', :value => stats.skewness, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
74
+ { :label => 'Kurtosis', :value => stats.kurtosis, :ruby_formatter => ruby_formatters[:decimal], :table_row => 2 },
75
+ ]
76
+ end
77
+
78
+ # Returns an OpenStruct that describes a statistics table.
79
+ def descriptive_statistics_table(values)
80
+ desc_stats = descriptive_statistics(values)
81
+ table = OpenStruct.new(
82
+ :rows => []
83
+ )
84
+ [1,2].each do |table_row|
85
+ table.rows << OpenStruct.new(
86
+ :css_class => 'rde-column_header',
87
+ :tag => :tr,
88
+ :cells => desc_stats.find_all { |e| table_row == e[:table_row] }.map { |stat|
89
+ OpenStruct.new(:value => stat[:label], :ruby_formatter => Proc.new { |e| e }, :tag => :th, :css_class => 'rde-cell-label')
90
+ }
91
+ )
92
+ table.rows << OpenStruct.new(
93
+ :css_class => 'rde-data_row',
94
+ :tag => :tr,
95
+ :cells => desc_stats.find_all { |e| table_row == e[:table_row] }.map { |stat|
96
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value')
97
+ }
98
+ )
99
+ end
100
+ table
101
+ end
102
+
103
+ def axis_tick_format(values)
104
+ raise "Implement me in sub_class"
105
+ end
106
+
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,13 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative
4
+ class Decimal < Quantitative
5
+
6
+ def axis_tick_format(values)
7
+ "d3.format('.02f')"
8
+ end
9
+
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative
4
+ class Integer < Quantitative
5
+
6
+ def axis_tick_format(values)
7
+ "d3.format('r')"
8
+ end
9
+
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,62 @@
1
+ class RailsDataExplorer
2
+ class DataType
3
+ class Quantitative
4
+ class Temporal < Quantitative
5
+
6
+ def all_available_chart_types
7
+ [
8
+ {
9
+ :chart_class => Chart::HistogramTemporal,
10
+ :chart_roles => [:x],
11
+ :dimensions_count_min => 1,
12
+ :dimensions_count_max => 1
13
+ },
14
+ {
15
+ chart_class: Chart::DescriptiveStatisticsTable,
16
+ chart_roles: [:any],
17
+ dimensions_count_min: 1,
18
+ dimensions_count_max: 1
19
+ },
20
+ {
21
+ chart_class: Chart::ParallelCoordinates,
22
+ chart_roles: [:dimension],
23
+ dimensions_count_min: 3,
24
+ },
25
+ ].freeze
26
+ end
27
+
28
+ def descriptive_statistics(values)
29
+ ruby_formatter = Proc.new { |v| v.nil? ? '' : v.strftime('%a, %b %e, %Y, %l:%M:%S %p %Z') }
30
+ [
31
+ { :label => 'Min', :value => values.min, :ruby_formatter => ruby_formatter },
32
+ { :label => 'Max', :value => values.max, :ruby_formatter => ruby_formatter },
33
+ { :label => 'Count', :value => values.length, :ruby_formatter => Proc.new { |e| number_with_delimiter(e) } },
34
+ ]
35
+ end
36
+
37
+ # Returns an OpenStruct that describes a statistics table.
38
+ def descriptive_statistics_table(values)
39
+ desc_stats = descriptive_statistics(values)
40
+ table = OpenStruct.new(
41
+ :rows => desc_stats.map { |stat|
42
+ OpenStruct.new(
43
+ :css_class => 'rde-row-values',
44
+ :tag => :tr,
45
+ :cells => [
46
+ OpenStruct.new(:value => stat[:label], :tag => :th, :css_class => 'rde-row_header'),
47
+ OpenStruct.new(:value => stat[:value], :ruby_formatter => stat[:ruby_formatter], :tag => :td, :css_class => 'rde-cell-value'),
48
+ ]
49
+ )
50
+ }
51
+ )
52
+ table
53
+ end
54
+
55
+ def axis_tick_format(values)
56
+ %(function(d) { return d3.time.format('%x')(new Date(d)) })
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,24 @@
1
+ require 'rails'
2
+
3
+ class RailsDataExplorer
4
+ class Engine < ::Rails::Engine
5
+
6
+ # It's an engine so that we can add javascript and image assets
7
+ # to the asset pipeline.
8
+
9
+ # initializer "rails-data-explorer.active_record_extension" do |app|
10
+ # require 'rails-data-explorer/active_record_extension'
11
+ # class ::ActiveRecord::Base
12
+ # extend RailsDataExplorer::ActiveRecordExtension::ClassMethods
13
+ # end
14
+ # end
15
+
16
+ # initializer "rails-data-explorer.action_view_extension" do |app|
17
+ # require 'rails-data-explorer/action_view_extension'
18
+ # class ::ActionView::Base
19
+ # include RailsDataExplorer::ActionViewExtension
20
+ # end
21
+ # end
22
+
23
+ end
24
+ end
@@ -0,0 +1,89 @@
1
+ class RailsDataExplorer
2
+ class Exploration
3
+
4
+ attr_accessor :output_buffer # required for content_tag
5
+ include ActionView::Helpers::TagHelper
6
+
7
+ attr_accessor :charts, :data_set, :title
8
+
9
+ # Initializes a new visualization.
10
+ # @param[String] _title will be printed at top of visualization
11
+ # @param[Array] data_set_or_array can be a number of things:
12
+ # * Array<Scalar> - for single data series, uni-variate options are applied.
13
+ # * Array<Hash> - for multiple data series, bi/multi-variate options are applied.
14
+ # * DataSet - For finer grained control.
15
+ # @param[Array<Chart, String, Symbol>, optional] chart_specs
16
+ # The list of charts to include. Defaults to all applicable charts for the
17
+ # given data_set_or_array.
18
+ # Charts can be provided as Array of Strings, Symbols, or Chart classes
19
+ # (can be mixed).
20
+ def initialize(_title, data_set_or_array, chart_specs=nil)
21
+ @title = _title
22
+ @data_set = initialize_data_set(data_set_or_array)
23
+ @charts = initialize_charts(chart_specs)
24
+ end
25
+
26
+ def render
27
+ content_tag(:div, :class => 'rde-exploration panel panel-default', :id => dom_id) do
28
+ content_tag(:div, :class => 'panel-heading') do
29
+ content_tag(:h2, @title, :class => 'rde-exploration-title panel-title')
30
+ end +
31
+ content_tag(:div, :class => 'panel-body') do
32
+ @charts.map { |e| e.render }.join.html_safe
33
+ end
34
+ end.html_safe
35
+ end
36
+
37
+ def dom_id
38
+ "rde-exploration-#{ object_id }"
39
+ end
40
+
41
+ def inspect(indent=1, recursive=1000)
42
+ r = %(#<#{ self.class.to_s }\n)
43
+ r << [
44
+ "@title=#{ @title.inspect }",
45
+ ].map { |e| "#{ ' ' * indent }#{ e }\n"}.join
46
+ if recursive > 0
47
+ r << %(#{ ' ' * indent }@data_set=)
48
+ r << data_set.inspect(indent + 1, recursive - 1)
49
+ end
50
+ r << %(#{ ' ' * (indent-1) }>\n)
51
+ end
52
+
53
+ private
54
+
55
+ def initialize_charts(chart_specs)
56
+ if chart_specs.present?
57
+ chart_specs.map { |chart_spec|
58
+ case chart_spec
59
+ when Chart
60
+ when String, Symbol
61
+ else
62
+ end
63
+ }
64
+ else
65
+ @data_set.available_chart_types.map { |e|
66
+ e.send(:new, @data_set)
67
+ }
68
+ end
69
+ end
70
+
71
+ def initialize_data_set(data_set_or_array)
72
+ case data_set_or_array
73
+ when Array
74
+ DataSet.new(data_set_or_array, @title)
75
+ when DataSet
76
+ # use as is
77
+ _data_set
78
+ else
79
+ raise(
80
+ ArgumentError.new(
81
+ "data_set_or_array must be an Array or a DataSet, " + \
82
+ "is #{ data_set_or_array.class.to_s }"
83
+ )
84
+ )
85
+ end
86
+ end
87
+
88
+ end
89
+ end