rails-data-explorer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +52 -0
- data/Rakefile +18 -0
- data/lib/rails-data-explorer.rb +44 -0
- data/lib/rails-data-explorer/action_view_extension.rb +12 -0
- data/lib/rails-data-explorer/active_record_extension.rb +14 -0
- data/lib/rails-data-explorer/chart.rb +52 -0
- data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
- data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
- data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
- data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
- data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
- data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
- data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
- data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
- data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
- data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
- data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
- data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
- data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
- data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
- data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
- data/lib/rails-data-explorer/data_series.rb +115 -0
- data/lib/rails-data-explorer/data_set.rb +127 -0
- data/lib/rails-data-explorer/data_type.rb +34 -0
- data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
- data/lib/rails-data-explorer/data_type/geo.rb +1 -0
- data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
- data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
- data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
- data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
- data/lib/rails-data-explorer/engine.rb +24 -0
- data/lib/rails-data-explorer/exploration.rb +89 -0
- data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
- data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
- data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
- data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
- data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
- data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
- data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
- data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
- data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
- data/rails-data-explorer.gemspec +30 -0
- data/vendor/assets/javascripts/d3.boxplot.js +302 -0
- data/vendor/assets/javascripts/d3.parcoords.js +585 -0
- data/vendor/assets/javascripts/d3.parsets.js +663 -0
- data/vendor/assets/javascripts/d3.v3.js +9294 -0
- data/vendor/assets/javascripts/nv.d3.js +14369 -0
- data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
- data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
- data/vendor/assets/stylesheets/bootstrap.css +1727 -0
- data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
- data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
- data/vendor/assets/stylesheets/d3.parsets.css +34 -0
- data/vendor/assets/stylesheets/nv.d3.css +769 -0
- data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
- data/vendor/assets/stylesheets/rde-default-style.css +42 -0
- metadata +250 -0
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 Jo Hund
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
rails-data-explorer
|
2
|
+
===================
|
3
|
+
|
4
|
+
rails-data-explorer is a Rails Engine plugin that makes it easy to explore the
|
5
|
+
data in your app using charts and statistics.
|
6
|
+
|
7
|
+
Make sure to go to the thorough [documentation](http://rails-data-explorer.clearcove.ca)
|
8
|
+
to find out more!
|
9
|
+
|
10
|
+
|
11
|
+
### Installation
|
12
|
+
|
13
|
+
`gem install rails-data-explorer`
|
14
|
+
|
15
|
+
or with bundler in your Gemfile:
|
16
|
+
|
17
|
+
`gem 'rails-data-explorer'`
|
18
|
+
|
19
|
+
|
20
|
+
### Concepts
|
21
|
+
|
22
|
+
* Exploration - top level container
|
23
|
+
* DataSet - like a spreadsheet with one or more columns of data
|
24
|
+
* DataSeries - like a column in a spreadsheet, with multiple rows of data
|
25
|
+
* DataType - Each DataSeries contains data of a certain type.
|
26
|
+
* Categorical
|
27
|
+
* Quantitative
|
28
|
+
* Integer
|
29
|
+
* Decimal
|
30
|
+
* Temporal
|
31
|
+
* Geo
|
32
|
+
* Chart -
|
33
|
+
|
34
|
+
|
35
|
+
### Resources
|
36
|
+
|
37
|
+
* [Documentation](http://rails-data-explorer.clearcove.ca)
|
38
|
+
* [Live demo](http://rails-data-explorer-demo.herokuapp.com)
|
39
|
+
* [Changelog](https://github.com/jhund/rails-data-explorer/blob/master/CHANGELOG.md)
|
40
|
+
* [Source code (github)](https://github.com/jhund/rails-data-explorer)
|
41
|
+
* [Issues](https://github.com/jhund/rails-data-explorer/issues)
|
42
|
+
* [Rubygems.org](http://rubygems.org/gems/rails-data-explorer)
|
43
|
+
|
44
|
+
### License
|
45
|
+
|
46
|
+
[MIT licensed](https://github.com/jhund/rails-data-explorer/blob/master/MIT-LICENSE).
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
### Copyright
|
51
|
+
|
52
|
+
Copyright (c) 2014 Jo Hund. See [(MIT) LICENSE](https://github.com/jhund/rails-data-explorer/blob/master/MIT-LICENSE) for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
begin
|
3
|
+
require 'bundler'
|
4
|
+
rescue LoadError
|
5
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
6
|
+
end
|
7
|
+
Bundler::GemHelper.install_tasks
|
8
|
+
|
9
|
+
require 'rake/testtask'
|
10
|
+
|
11
|
+
Rake::TestTask.new do |t|
|
12
|
+
t.libs.push "lib"
|
13
|
+
t.libs.push "spec"
|
14
|
+
t.pattern = "spec/**/*_spec.rb"
|
15
|
+
t.verbose = true
|
16
|
+
end
|
17
|
+
|
18
|
+
task :default => ['test']
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'color'
|
2
|
+
require 'descriptive-statistics'
|
3
|
+
require 'distribution'
|
4
|
+
require 'interpolate'
|
5
|
+
|
6
|
+
require 'rails-data-explorer/engine'
|
7
|
+
|
8
|
+
require 'rails-data-explorer/chart'
|
9
|
+
require 'rails-data-explorer/data_series'
|
10
|
+
require 'rails-data-explorer/data_set'
|
11
|
+
require 'rails-data-explorer/data_type'
|
12
|
+
require 'rails-data-explorer/exploration'
|
13
|
+
require 'rails-data-explorer/statistics/rng_category'
|
14
|
+
require 'rails-data-explorer/statistics/rng_gaussian'
|
15
|
+
require 'rails-data-explorer/statistics/rng_power_law'
|
16
|
+
require 'rails-data-explorer/utils/color_scale'
|
17
|
+
require 'rails-data-explorer/utils/value_formatter'
|
18
|
+
|
19
|
+
require 'rails-data-explorer/chart/box_plot'
|
20
|
+
require 'rails-data-explorer/chart/box_plot_group'
|
21
|
+
require 'rails-data-explorer/chart/contingency_table'
|
22
|
+
require 'rails-data-explorer/chart/descriptive_statistics_table'
|
23
|
+
require 'rails-data-explorer/chart/histogram_categorical'
|
24
|
+
require 'rails-data-explorer/chart/histogram_quantitative'
|
25
|
+
require 'rails-data-explorer/chart/histogram_temporal'
|
26
|
+
require 'rails-data-explorer/chart/parallel_coordinates'
|
27
|
+
require 'rails-data-explorer/chart/parallel_set'
|
28
|
+
require 'rails-data-explorer/chart/pie_chart'
|
29
|
+
require 'rails-data-explorer/chart/scatterplot'
|
30
|
+
require 'rails-data-explorer/chart/stacked_bar_chart_categorical_percent'
|
31
|
+
require 'rails-data-explorer/data_type/categorical'
|
32
|
+
require 'rails-data-explorer/data_type/quantitative'
|
33
|
+
require 'rails-data-explorer/data_type/quantitative/decimal'
|
34
|
+
require 'rails-data-explorer/data_type/quantitative/integer'
|
35
|
+
require 'rails-data-explorer/data_type/quantitative/temporal'
|
36
|
+
|
37
|
+
class RailsDataExplorer
|
38
|
+
|
39
|
+
# Convenience method to instantiate new Exploration
|
40
|
+
def self.new(*args)
|
41
|
+
RailsDataExplorer::Exploration.new(*args)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class RailsDataExplorer
|
2
|
+
class Chart
|
3
|
+
|
4
|
+
include ActionView::Helpers::NumberHelper
|
5
|
+
attr_accessor :output_buffer # required for content_tag
|
6
|
+
include ActionView::Helpers::TagHelper
|
7
|
+
|
8
|
+
def dom_id
|
9
|
+
"rde-chart-#{ object_id }"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns true if this chart will be rendered. Sometimes we can't make that
|
13
|
+
# decision until render time. Override this method in sub classes, e.g.,
|
14
|
+
# to avoid rendering ParallelCoordinates when all data series are categorical.
|
15
|
+
def render?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
# Renders an HTML table
|
22
|
+
# @param[OpenStruct, Struct] table_struct
|
23
|
+
def render_html_table(table_struct)
|
24
|
+
content_tag(:table, :class => 'table rde-table') do
|
25
|
+
table_struct.rows.map { |row|
|
26
|
+
content_tag(row.tag, :class => row.css_class) do
|
27
|
+
row.cells.map { |cell|
|
28
|
+
if cell.ruby_formatter
|
29
|
+
content_tag(
|
30
|
+
cell.tag,
|
31
|
+
instance_exec(cell.value, &cell.ruby_formatter),
|
32
|
+
:class => cell.css_class,
|
33
|
+
:title => cell.title,
|
34
|
+
:style => cell.style,
|
35
|
+
)
|
36
|
+
else
|
37
|
+
content_tag(
|
38
|
+
cell.tag,
|
39
|
+
cell.value,
|
40
|
+
:class => cell.css_class,
|
41
|
+
:title => cell.title,
|
42
|
+
:style => cell.style,
|
43
|
+
)
|
44
|
+
end
|
45
|
+
}.join.html_safe
|
46
|
+
end
|
47
|
+
}.join.html_safe
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# http://johan.github.io/d3/ex/box.html
|
2
|
+
# http://bl.ocks.org/mbostock/4061502
|
3
|
+
class RailsDataExplorer
|
4
|
+
class Chart
|
5
|
+
class BoxPlot < Chart
|
6
|
+
|
7
|
+
def initialize(_data_set, options = {})
|
8
|
+
@data_set = _data_set
|
9
|
+
@options = {}.merge(options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def compute_chart_attrs
|
13
|
+
x_ds = @data_set.data_series.first
|
14
|
+
{
|
15
|
+
values: [x_ds.values],
|
16
|
+
min: x_ds.min_val,
|
17
|
+
max: x_ds.max_val,
|
18
|
+
base_width: 120,
|
19
|
+
base_height: 1334,
|
20
|
+
axis_tick_format: x_ds.axis_tick_format,
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def render
|
25
|
+
return '' unless render?
|
26
|
+
ca = compute_chart_attrs
|
27
|
+
%(
|
28
|
+
<div id="#{ dom_id }" class="rde-chart rde-box-plot">
|
29
|
+
<svg class="box" style="height: #{ ca[:base_width] }px;"></svg>
|
30
|
+
|
31
|
+
<script type="text/javascript">
|
32
|
+
(function() {
|
33
|
+
var base_width = #{ ca[:base_width] },
|
34
|
+
base_height = #{ ca[:base_height] },
|
35
|
+
margin = { top: 10, right: 50, bottom: 95, left: 50 },
|
36
|
+
width = base_width - margin.left - margin.right,
|
37
|
+
height = base_height - margin.top - margin.bottom;
|
38
|
+
|
39
|
+
var min = #{ ca[:min] },
|
40
|
+
max = #{ ca[:max] };
|
41
|
+
|
42
|
+
var chart = d3.box()
|
43
|
+
.whiskers(iqr(1.5))
|
44
|
+
.width(width)
|
45
|
+
.height(height)
|
46
|
+
.tickFormat(#{ ca[:axis_tick_format] });
|
47
|
+
|
48
|
+
var data = #{ ca[:values].to_json };
|
49
|
+
|
50
|
+
chart.domain([min, max]);
|
51
|
+
|
52
|
+
var svg = d3.select("##{ dom_id }").selectAll("svg")
|
53
|
+
.data(data)
|
54
|
+
.append("g")
|
55
|
+
.attr("transform", "rotate(90) translate(" + (width + margin.left) + " -" + (height + margin.bottom) + ")")
|
56
|
+
.call(chart);
|
57
|
+
|
58
|
+
// Function to compute the interquartile range.
|
59
|
+
function iqr(k) {
|
60
|
+
return function(d, i) {
|
61
|
+
var q1 = d.quartiles[0],
|
62
|
+
q3 = d.quartiles[2],
|
63
|
+
iqr = (q3 - q1) * k,
|
64
|
+
i = -1,
|
65
|
+
j = d.length;
|
66
|
+
while (d[++i] < q1 - iqr);
|
67
|
+
while (d[--j] > q3 + iqr);
|
68
|
+
return [i, j];
|
69
|
+
};
|
70
|
+
}
|
71
|
+
})();
|
72
|
+
</script>
|
73
|
+
</div>
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# http://bl.ocks.org/jensgrubert/7789216
|
2
|
+
# http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
|
3
|
+
# http://mbostock.github.io/protovis/ex/box-and-whisker.html
|
4
|
+
# http://bl.ocks.org/mbostock/4061502
|
5
|
+
# http://johan.github.io/d3/ex/box.html
|
6
|
+
# http://johan.github.io/d3/ex/box.html
|
7
|
+
# http://bl.ocks.org/mbostock/4061502
|
8
|
+
class RailsDataExplorer
|
9
|
+
class Chart
|
10
|
+
class BoxPlotGroup < Chart
|
11
|
+
|
12
|
+
def initialize(_data_set, options = {})
|
13
|
+
@data_set = _data_set
|
14
|
+
@options = {}.merge(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def compute_chart_attrs
|
18
|
+
x_candidates = @data_set.data_series.find_all { |ds|
|
19
|
+
(ds.chart_roles[Chart::BoxPlotGroup] & [:x, :any]).any?
|
20
|
+
}
|
21
|
+
y_candidates = @data_set.data_series.find_all { |ds|
|
22
|
+
(ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any?
|
23
|
+
}
|
24
|
+
|
25
|
+
x_ds = x_candidates.first
|
26
|
+
y_ds = (y_candidates - [x_ds]).first
|
27
|
+
|
28
|
+
return false if x_ds.nil? || y_ds.nil?
|
29
|
+
|
30
|
+
min = x_ds.min_val # get global min
|
31
|
+
max = x_ds.max_val # get global max
|
32
|
+
|
33
|
+
values_hash = y_ds.uniq_vals.inject({}) { |m,y_val|
|
34
|
+
m[y_val] = []
|
35
|
+
m
|
36
|
+
}
|
37
|
+
|
38
|
+
y_ds.values.each_with_index { |y_val, idx|
|
39
|
+
values_hash[y_val] << x_ds.values[idx]
|
40
|
+
}
|
41
|
+
|
42
|
+
{
|
43
|
+
values: values_hash.values,
|
44
|
+
min: min,
|
45
|
+
max: max,
|
46
|
+
base_width: 120,
|
47
|
+
base_height: 1334,
|
48
|
+
axis_tick_format: x_ds.axis_tick_format,
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def render
|
53
|
+
return '' unless render?
|
54
|
+
ca = compute_chart_attrs
|
55
|
+
return '' unless ca
|
56
|
+
%(
|
57
|
+
<div id="#{ dom_id }" class="rde-chart rde-box-plot">
|
58
|
+
<svg class="box" style="height: #{ ca[:base_width] }px;"></svg>
|
59
|
+
<svg class="box" style="height: #{ ca[:base_width] }px;"></svg>
|
60
|
+
|
61
|
+
<script type="text/javascript">
|
62
|
+
(function() {
|
63
|
+
var base_width = #{ ca[:base_width] },
|
64
|
+
base_height = #{ ca[:base_height] },
|
65
|
+
margin = { top: 10, right: 50, bottom: 95, left: 50 },
|
66
|
+
width = base_width - margin.left - margin.right,
|
67
|
+
height = base_height - margin.top - margin.bottom;
|
68
|
+
|
69
|
+
var min = #{ ca[:min] },
|
70
|
+
max = #{ ca[:max] };
|
71
|
+
|
72
|
+
var chart = d3.box()
|
73
|
+
.whiskers(iqr(1.5))
|
74
|
+
.width(width)
|
75
|
+
.height(height)
|
76
|
+
.tickFormat(#{ ca[:axis_tick_format] });
|
77
|
+
|
78
|
+
var data = #{ ca[:values].to_json };
|
79
|
+
|
80
|
+
chart.domain([min, max]);
|
81
|
+
|
82
|
+
var svg = d3.select("##{ dom_id }").selectAll("svg")
|
83
|
+
.data(data)
|
84
|
+
.append("g")
|
85
|
+
.attr("transform", "rotate(90) translate(" + (width + margin.left) + " -" + (height + margin.bottom) + ")")
|
86
|
+
.call(chart);
|
87
|
+
|
88
|
+
// Function to compute the interquartile range.
|
89
|
+
function iqr(k) {
|
90
|
+
return function(d, i) {
|
91
|
+
var q1 = d.quartiles[0],
|
92
|
+
q3 = d.quartiles[2],
|
93
|
+
iqr = (q3 - q1) * k,
|
94
|
+
i = -1,
|
95
|
+
j = d.length;
|
96
|
+
while (d[++i] < q1 - iqr);
|
97
|
+
while (d[--j] > q3 + iqr);
|
98
|
+
return [i, j];
|
99
|
+
};
|
100
|
+
}
|
101
|
+
})();
|
102
|
+
</script>
|
103
|
+
</div>
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# See this project for code to compute chi_square and contingency_coefficient
|
2
|
+
# https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
|
3
|
+
#
|
4
|
+
# Resources for Chi Squared Test
|
5
|
+
# * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
|
6
|
+
# * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
|
7
|
+
# * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
|
8
|
+
class RailsDataExplorer
|
9
|
+
class Chart
|
10
|
+
class ContingencyTable < Chart
|
11
|
+
|
12
|
+
def initialize(_data_set, options = {})
|
13
|
+
@data_set = _data_set
|
14
|
+
@options = {}.merge(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def compute_chart_attrs
|
18
|
+
x_candidates = @data_set.data_series.find_all { |ds|
|
19
|
+
(ds.chart_roles[Chart::ContingencyTable] & [:x, :any]).any?
|
20
|
+
}
|
21
|
+
y_candidates = @data_set.data_series.find_all { |ds|
|
22
|
+
(ds.chart_roles[Chart::ContingencyTable] & [:y, :any]).any?
|
23
|
+
}
|
24
|
+
|
25
|
+
x_ds = x_candidates.first
|
26
|
+
y_ds = (y_candidates - [x_ds]).first
|
27
|
+
|
28
|
+
# Compute @observed_vals, @expected_vals, etc.
|
29
|
+
compute_contingency_and_chi_squared!(x_ds, y_ds)
|
30
|
+
|
31
|
+
x_sorted_keys = x_ds.uniq_vals.sort { |a,b|
|
32
|
+
@observed_vals[b][:_sum] <=> @observed_vals[a][:_sum]
|
33
|
+
}
|
34
|
+
y_sorted_keys = y_ds.uniq_vals.sort { |a,b|
|
35
|
+
@observed_vals[:_sum][b] <=> @observed_vals[:_sum][a]
|
36
|
+
}
|
37
|
+
|
38
|
+
ca = case @data_set.dimensions_count
|
39
|
+
when 2
|
40
|
+
# Table
|
41
|
+
OpenStruct.new(
|
42
|
+
# Top header row
|
43
|
+
:rows => [
|
44
|
+
OpenStruct.new(
|
45
|
+
:css_class => 'rde-column_header',
|
46
|
+
:tag => :tr,
|
47
|
+
:cells => [
|
48
|
+
OpenStruct.new(:tag => :th, :value => '')
|
49
|
+
] +
|
50
|
+
x_sorted_keys.map { |x_val|
|
51
|
+
OpenStruct.new(:tag => :th, :value => x_val)
|
52
|
+
} +
|
53
|
+
[OpenStruct.new(:tag => :th, :value => 'Totals')]
|
54
|
+
)
|
55
|
+
] +
|
56
|
+
# Data rows
|
57
|
+
y_sorted_keys.map { |y_val|
|
58
|
+
OpenStruct.new(
|
59
|
+
:css_class => 'rde-data_row',
|
60
|
+
:tag => :tr,
|
61
|
+
:cells => [
|
62
|
+
OpenStruct.new(:tag => :th, :value => y_val, :css_class => 'rde-row_header')
|
63
|
+
] +
|
64
|
+
x_sorted_keys.map { |x_val|
|
65
|
+
OpenStruct.new(
|
66
|
+
:tag => :td,
|
67
|
+
:value => @observed_vals[x_val][y_val],
|
68
|
+
:css_class => 'rde-numerical',
|
69
|
+
:title => "Expected value: #{ number_with_precision(@expected_vals[x_val][y_val]) }",
|
70
|
+
:style => "color: #{ @delta_attrs[x_val][y_val][:color] };",
|
71
|
+
)
|
72
|
+
} +
|
73
|
+
[OpenStruct.new(:tag => :th, :value => @observed_vals[:_sum][y_val])]
|
74
|
+
)
|
75
|
+
} +
|
76
|
+
# Footer row
|
77
|
+
[
|
78
|
+
OpenStruct.new(
|
79
|
+
:css_class => 'rde-column_header',
|
80
|
+
:tag => :tr,
|
81
|
+
:cells => [
|
82
|
+
OpenStruct.new(:tag => :th, :value => 'Totals', :css_class => 'rde-row_header')
|
83
|
+
] +
|
84
|
+
x_sorted_keys.map { |x_val|
|
85
|
+
OpenStruct.new(:tag => :th, :value => @observed_vals[x_val][:_sum])
|
86
|
+
} +
|
87
|
+
[OpenStruct.new(:tag => :th, :value => @observed_vals[:_sum][:_sum])]
|
88
|
+
)
|
89
|
+
]
|
90
|
+
)
|
91
|
+
else
|
92
|
+
raise(ArgumentError.new("Exactly two data series required for contingency table."))
|
93
|
+
end
|
94
|
+
ca
|
95
|
+
end
|
96
|
+
|
97
|
+
def render
|
98
|
+
return '' unless render?
|
99
|
+
ca = compute_chart_attrs
|
100
|
+
content_tag(:div, :class => 'rde-chart rde-contingency-table', :id => dom_id) do
|
101
|
+
content_tag(:h3, "Contingency Table", :class => 'rde-chart-title') +
|
102
|
+
render_html_table(ca)
|
103
|
+
end +
|
104
|
+
content_tag(:p, @conclusion)
|
105
|
+
end
|
106
|
+
|
107
|
+
def render?
|
108
|
+
# http://en.wikipedia.org/wiki/Pearson's_chi-squared_test#Assumptions
|
109
|
+
true
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
# Computes @observed_vals, @expected_vals, @chi_squared, etc.
|
115
|
+
# @param[DataSeries] x_ds
|
116
|
+
# @param[DataSeries] y_ds
|
117
|
+
def compute_contingency_and_chi_squared!(x_ds, y_ds)
|
118
|
+
# Compute the observed values table
|
119
|
+
@observed_vals = { :_sum => { :_sum => 0 } }
|
120
|
+
x_ds.uniq_vals.each { |x_val|
|
121
|
+
@observed_vals[x_val] = {}
|
122
|
+
@observed_vals[x_val][:_sum] = 0
|
123
|
+
y_ds.uniq_vals.each { |y_val|
|
124
|
+
@observed_vals[x_val][y_val] = 0
|
125
|
+
@observed_vals[:_sum][y_val] = 0
|
126
|
+
}
|
127
|
+
}
|
128
|
+
x_ds.values.length.times { |idx|
|
129
|
+
x_val = x_ds.values[idx]
|
130
|
+
y_val = y_ds.values[idx]
|
131
|
+
@observed_vals[x_val][y_val] += 1
|
132
|
+
@observed_vals[:_sum][y_val] += 1
|
133
|
+
@observed_vals[x_val][:_sum] += 1
|
134
|
+
@observed_vals[:_sum][:_sum] += 1
|
135
|
+
}
|
136
|
+
# Compute degrees of freedom
|
137
|
+
@degrees_of_freedom = (x_ds.uniq_vals_count - 1) * (y_ds.uniq_vals_count - 1)
|
138
|
+
# Compute the expected values table
|
139
|
+
@expected_vals = {}
|
140
|
+
x_ds.uniq_vals.each { |x_val|
|
141
|
+
@expected_vals[x_val] = {}
|
142
|
+
y_ds.uniq_vals.each { |y_val|
|
143
|
+
@expected_vals[x_val][y_val] = (
|
144
|
+
@observed_vals[:_sum][y_val] * @observed_vals[x_val][:_sum]
|
145
|
+
) / (@observed_vals[:_sum][:_sum]).to_f
|
146
|
+
}
|
147
|
+
}
|
148
|
+
# Compute Chi squared
|
149
|
+
@chi_squared = 0
|
150
|
+
x_ds.uniq_vals.each { |x_val|
|
151
|
+
y_ds.uniq_vals.each { |y_val|
|
152
|
+
@chi_squared += (
|
153
|
+
(@observed_vals[x_val][y_val] - @expected_vals[x_val][y_val]) ** 2
|
154
|
+
) / @expected_vals[x_val][y_val]
|
155
|
+
}
|
156
|
+
}
|
157
|
+
# Compute deltas
|
158
|
+
@delta_attrs = {}
|
159
|
+
color_scale = RailsDataExplorer::Utils::ColorScale.new
|
160
|
+
x_ds.uniq_vals.each { |x_val|
|
161
|
+
@delta_attrs[x_val] = {}
|
162
|
+
y_ds.uniq_vals.each { |y_val|
|
163
|
+
delta = @observed_vals[x_val][y_val] - @expected_vals[x_val][y_val]
|
164
|
+
delta_factor = delta / @expected_vals[x_val][y_val].to_f
|
165
|
+
@delta_attrs[x_val][y_val] = {
|
166
|
+
:expected => @expected_vals[x_val][y_val],
|
167
|
+
:color => color_scale.compute(delta_factor),
|
168
|
+
:delta => delta,
|
169
|
+
:delta_factor => delta_factor,
|
170
|
+
}
|
171
|
+
}
|
172
|
+
}
|
173
|
+
# Compute probability of observing a sample statistic as extreme as the
|
174
|
+
# observed test statistic.
|
175
|
+
@p_value = 1 - Distribution::ChiSquare.cdf(@chi_squared, @degrees_of_freedom)
|
176
|
+
# Set significance_level
|
177
|
+
@significance_level = 0.05
|
178
|
+
# Compute conclusion
|
179
|
+
@conclusion = %(<a href="http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test#Test_of_independence">Pearson chi squared test of independence</a> suggests that )
|
180
|
+
@conclusion << if @p_value <= @significance_level
|
181
|
+
"#{ x_ds.name } and #{ y_ds.name } are dependent variables (p_value: #{ number_with_precision(@p_value) })"
|
182
|
+
else
|
183
|
+
"#{ x_ds.name } and #{ y_ds.name } are independent variables (p_value: #{ number_with_precision(@p_value) })"
|
184
|
+
end
|
185
|
+
@conclusion = @conclusion.html_safe
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|