rails-data-explorer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +52 -0
- data/Rakefile +18 -0
- data/lib/rails-data-explorer.rb +44 -0
- data/lib/rails-data-explorer/action_view_extension.rb +12 -0
- data/lib/rails-data-explorer/active_record_extension.rb +14 -0
- data/lib/rails-data-explorer/chart.rb +52 -0
- data/lib/rails-data-explorer/chart/box_plot.rb +79 -0
- data/lib/rails-data-explorer/chart/box_plot_group.rb +109 -0
- data/lib/rails-data-explorer/chart/contingency_table.rb +189 -0
- data/lib/rails-data-explorer/chart/descriptive_statistics_table.rb +22 -0
- data/lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb +0 -0
- data/lib/rails-data-explorer/chart/histogram_categorical.rb +73 -0
- data/lib/rails-data-explorer/chart/histogram_quantitative.rb +73 -0
- data/lib/rails-data-explorer/chart/histogram_temporal.rb +78 -0
- data/lib/rails-data-explorer/chart/multi_dimensional_charts.rb +1 -0
- data/lib/rails-data-explorer/chart/parallel_coordinates.rb +89 -0
- data/lib/rails-data-explorer/chart/parallel_set.rb +65 -0
- data/lib/rails-data-explorer/chart/pie_chart.rb +67 -0
- data/lib/rails-data-explorer/chart/scatterplot.rb +120 -0
- data/lib/rails-data-explorer/chart/scatterplot_matrix.rb +1 -0
- data/lib/rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb +120 -0
- data/lib/rails-data-explorer/data_series.rb +115 -0
- data/lib/rails-data-explorer/data_set.rb +127 -0
- data/lib/rails-data-explorer/data_type.rb +34 -0
- data/lib/rails-data-explorer/data_type/categorical.rb +117 -0
- data/lib/rails-data-explorer/data_type/geo.rb +1 -0
- data/lib/rails-data-explorer/data_type/quantitative.rb +109 -0
- data/lib/rails-data-explorer/data_type/quantitative/decimal.rb +13 -0
- data/lib/rails-data-explorer/data_type/quantitative/integer.rb +13 -0
- data/lib/rails-data-explorer/data_type/quantitative/temporal.rb +62 -0
- data/lib/rails-data-explorer/engine.rb +24 -0
- data/lib/rails-data-explorer/exploration.rb +89 -0
- data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +75 -0
- data/lib/rails-data-explorer/statistics/rng_category.rb +37 -0
- data/lib/rails-data-explorer/statistics/rng_gaussian.rb +24 -0
- data/lib/rails-data-explorer/statistics/rng_power_law.rb +21 -0
- data/lib/rails-data-explorer/utils/color_scale.rb +33 -0
- data/lib/rails-data-explorer/utils/data_binner.rb +8 -0
- data/lib/rails-data-explorer/utils/data_encoder.rb +2 -0
- data/lib/rails-data-explorer/utils/data_quantizer.rb +2 -0
- data/lib/rails-data-explorer/utils/value_formatter.rb +41 -0
- data/rails-data-explorer.gemspec +30 -0
- data/vendor/assets/javascripts/d3.boxplot.js +302 -0
- data/vendor/assets/javascripts/d3.parcoords.js +585 -0
- data/vendor/assets/javascripts/d3.parsets.js +663 -0
- data/vendor/assets/javascripts/d3.v3.js +9294 -0
- data/vendor/assets/javascripts/nv.d3.js +14369 -0
- data/vendor/assets/javascripts/rails-data-explorer.js +19 -0
- data/vendor/assets/stylesheets/bootstrap-theme.css +346 -0
- data/vendor/assets/stylesheets/bootstrap.css +1727 -0
- data/vendor/assets/stylesheets/d3.boxplot.css +20 -0
- data/vendor/assets/stylesheets/d3.parcoords.css +34 -0
- data/vendor/assets/stylesheets/d3.parsets.css +34 -0
- data/vendor/assets/stylesheets/nv.d3.css +769 -0
- data/vendor/assets/stylesheets/rails-data-explorer.css +21 -0
- data/vendor/assets/stylesheets/rde-default-style.css +42 -0
- metadata +250 -0
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 Jo Hund
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
rails-data-explorer
|
2
|
+
===================
|
3
|
+
|
4
|
+
rails-data-explorer is a Rails Engine plugin that makes it easy to explore the
|
5
|
+
data in your app using charts and statistics.
|
6
|
+
|
7
|
+
Make sure to go to the thorough [documentation](http://rails-data-explorer.clearcove.ca)
|
8
|
+
to find out more!
|
9
|
+
|
10
|
+
|
11
|
+
### Installation
|
12
|
+
|
13
|
+
`gem install rails-data-explorer`
|
14
|
+
|
15
|
+
or with bundler in your Gemfile:
|
16
|
+
|
17
|
+
`gem 'rails-data-explorer'`
|
18
|
+
|
19
|
+
|
20
|
+
### Concepts
|
21
|
+
|
22
|
+
* Exploration - top level container
|
23
|
+
* DataSet - like a spreadsheet with one or more columns of data
|
24
|
+
* DataSeries - like a column in a spreadsheet, with multiple rows of data
|
25
|
+
* DataType - Each DataSeries contains data of a certain type.
|
26
|
+
* Categorical
|
27
|
+
* Quantitative
|
28
|
+
* Integer
|
29
|
+
* Decimal
|
30
|
+
* Temporal
|
31
|
+
* Geo
|
32
|
+
* Chart -
|
33
|
+
|
34
|
+
|
35
|
+
### Resources
|
36
|
+
|
37
|
+
* [Documentation](http://rails-data-explorer.clearcove.ca)
|
38
|
+
* [Live demo](http://rails-data-explorer-demo.herokuapp.com)
|
39
|
+
* [Changelog](https://github.com/jhund/rails-data-explorer/blob/master/CHANGELOG.md)
|
40
|
+
* [Source code (github)](https://github.com/jhund/rails-data-explorer)
|
41
|
+
* [Issues](https://github.com/jhund/rails-data-explorer/issues)
|
42
|
+
* [Rubygems.org](http://rubygems.org/gems/rails-data-explorer)
|
43
|
+
|
44
|
+
### License
|
45
|
+
|
46
|
+
[MIT licensed](https://github.com/jhund/rails-data-explorer/blob/master/MIT-LICENSE).
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
### Copyright
|
51
|
+
|
52
|
+
Copyright (c) 2014 Jo Hund. See [(MIT) LICENSE](https://github.com/jhund/rails-data-explorer/blob/master/MIT-LICENSE) for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
begin
|
3
|
+
require 'bundler'
|
4
|
+
rescue LoadError
|
5
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
6
|
+
end
|
7
|
+
Bundler::GemHelper.install_tasks
|
8
|
+
|
9
|
+
require 'rake/testtask'
|
10
|
+
|
11
|
+
Rake::TestTask.new do |t|
|
12
|
+
t.libs.push "lib"
|
13
|
+
t.libs.push "spec"
|
14
|
+
t.pattern = "spec/**/*_spec.rb"
|
15
|
+
t.verbose = true
|
16
|
+
end
|
17
|
+
|
18
|
+
task :default => ['test']
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'color'
|
2
|
+
require 'descriptive-statistics'
|
3
|
+
require 'distribution'
|
4
|
+
require 'interpolate'
|
5
|
+
|
6
|
+
require 'rails-data-explorer/engine'
|
7
|
+
|
8
|
+
require 'rails-data-explorer/chart'
|
9
|
+
require 'rails-data-explorer/data_series'
|
10
|
+
require 'rails-data-explorer/data_set'
|
11
|
+
require 'rails-data-explorer/data_type'
|
12
|
+
require 'rails-data-explorer/exploration'
|
13
|
+
require 'rails-data-explorer/statistics/rng_category'
|
14
|
+
require 'rails-data-explorer/statistics/rng_gaussian'
|
15
|
+
require 'rails-data-explorer/statistics/rng_power_law'
|
16
|
+
require 'rails-data-explorer/utils/color_scale'
|
17
|
+
require 'rails-data-explorer/utils/value_formatter'
|
18
|
+
|
19
|
+
require 'rails-data-explorer/chart/box_plot'
|
20
|
+
require 'rails-data-explorer/chart/box_plot_group'
|
21
|
+
require 'rails-data-explorer/chart/contingency_table'
|
22
|
+
require 'rails-data-explorer/chart/descriptive_statistics_table'
|
23
|
+
require 'rails-data-explorer/chart/histogram_categorical'
|
24
|
+
require 'rails-data-explorer/chart/histogram_quantitative'
|
25
|
+
require 'rails-data-explorer/chart/histogram_temporal'
|
26
|
+
require 'rails-data-explorer/chart/parallel_coordinates'
|
27
|
+
require 'rails-data-explorer/chart/parallel_set'
|
28
|
+
require 'rails-data-explorer/chart/pie_chart'
|
29
|
+
require 'rails-data-explorer/chart/scatterplot'
|
30
|
+
require 'rails-data-explorer/chart/stacked_bar_chart_categorical_percent'
|
31
|
+
require 'rails-data-explorer/data_type/categorical'
|
32
|
+
require 'rails-data-explorer/data_type/quantitative'
|
33
|
+
require 'rails-data-explorer/data_type/quantitative/decimal'
|
34
|
+
require 'rails-data-explorer/data_type/quantitative/integer'
|
35
|
+
require 'rails-data-explorer/data_type/quantitative/temporal'
|
36
|
+
|
37
|
+
class RailsDataExplorer
|
38
|
+
|
39
|
+
# Convenience method to instantiate new Exploration
|
40
|
+
def self.new(*args)
|
41
|
+
RailsDataExplorer::Exploration.new(*args)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class RailsDataExplorer
|
2
|
+
class Chart
|
3
|
+
|
4
|
+
include ActionView::Helpers::NumberHelper
|
5
|
+
attr_accessor :output_buffer # required for content_tag
|
6
|
+
include ActionView::Helpers::TagHelper
|
7
|
+
|
8
|
+
def dom_id
|
9
|
+
"rde-chart-#{ object_id }"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns true if this chart will be rendered. Sometimes we can't make that
|
13
|
+
# decision until render time. Override this method in sub classes, e.g.,
|
14
|
+
# to avoid rendering ParallelCoordinates when all data series are categorical.
|
15
|
+
def render?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
# Renders an HTML table
|
22
|
+
# @param[OpenStruct, Struct] table_struct
|
23
|
+
def render_html_table(table_struct)
|
24
|
+
content_tag(:table, :class => 'table rde-table') do
|
25
|
+
table_struct.rows.map { |row|
|
26
|
+
content_tag(row.tag, :class => row.css_class) do
|
27
|
+
row.cells.map { |cell|
|
28
|
+
if cell.ruby_formatter
|
29
|
+
content_tag(
|
30
|
+
cell.tag,
|
31
|
+
instance_exec(cell.value, &cell.ruby_formatter),
|
32
|
+
:class => cell.css_class,
|
33
|
+
:title => cell.title,
|
34
|
+
:style => cell.style,
|
35
|
+
)
|
36
|
+
else
|
37
|
+
content_tag(
|
38
|
+
cell.tag,
|
39
|
+
cell.value,
|
40
|
+
:class => cell.css_class,
|
41
|
+
:title => cell.title,
|
42
|
+
:style => cell.style,
|
43
|
+
)
|
44
|
+
end
|
45
|
+
}.join.html_safe
|
46
|
+
end
|
47
|
+
}.join.html_safe
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# http://johan.github.io/d3/ex/box.html
|
2
|
+
# http://bl.ocks.org/mbostock/4061502
|
3
|
+
class RailsDataExplorer
|
4
|
+
class Chart
|
5
|
+
class BoxPlot < Chart
|
6
|
+
|
7
|
+
def initialize(_data_set, options = {})
|
8
|
+
@data_set = _data_set
|
9
|
+
@options = {}.merge(options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def compute_chart_attrs
|
13
|
+
x_ds = @data_set.data_series.first
|
14
|
+
{
|
15
|
+
values: [x_ds.values],
|
16
|
+
min: x_ds.min_val,
|
17
|
+
max: x_ds.max_val,
|
18
|
+
base_width: 120,
|
19
|
+
base_height: 1334,
|
20
|
+
axis_tick_format: x_ds.axis_tick_format,
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def render
|
25
|
+
return '' unless render?
|
26
|
+
ca = compute_chart_attrs
|
27
|
+
%(
|
28
|
+
<div id="#{ dom_id }" class="rde-chart rde-box-plot">
|
29
|
+
<svg class="box" style="height: #{ ca[:base_width] }px;"></svg>
|
30
|
+
|
31
|
+
<script type="text/javascript">
|
32
|
+
(function() {
|
33
|
+
var base_width = #{ ca[:base_width] },
|
34
|
+
base_height = #{ ca[:base_height] },
|
35
|
+
margin = { top: 10, right: 50, bottom: 95, left: 50 },
|
36
|
+
width = base_width - margin.left - margin.right,
|
37
|
+
height = base_height - margin.top - margin.bottom;
|
38
|
+
|
39
|
+
var min = #{ ca[:min] },
|
40
|
+
max = #{ ca[:max] };
|
41
|
+
|
42
|
+
var chart = d3.box()
|
43
|
+
.whiskers(iqr(1.5))
|
44
|
+
.width(width)
|
45
|
+
.height(height)
|
46
|
+
.tickFormat(#{ ca[:axis_tick_format] });
|
47
|
+
|
48
|
+
var data = #{ ca[:values].to_json };
|
49
|
+
|
50
|
+
chart.domain([min, max]);
|
51
|
+
|
52
|
+
var svg = d3.select("##{ dom_id }").selectAll("svg")
|
53
|
+
.data(data)
|
54
|
+
.append("g")
|
55
|
+
.attr("transform", "rotate(90) translate(" + (width + margin.left) + " -" + (height + margin.bottom) + ")")
|
56
|
+
.call(chart);
|
57
|
+
|
58
|
+
// Function to compute the interquartile range.
|
59
|
+
function iqr(k) {
|
60
|
+
return function(d, i) {
|
61
|
+
var q1 = d.quartiles[0],
|
62
|
+
q3 = d.quartiles[2],
|
63
|
+
iqr = (q3 - q1) * k,
|
64
|
+
i = -1,
|
65
|
+
j = d.length;
|
66
|
+
while (d[++i] < q1 - iqr);
|
67
|
+
while (d[--j] > q3 + iqr);
|
68
|
+
return [i, j];
|
69
|
+
};
|
70
|
+
}
|
71
|
+
})();
|
72
|
+
</script>
|
73
|
+
</div>
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# http://bl.ocks.org/jensgrubert/7789216
|
2
|
+
# http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE
|
3
|
+
# http://mbostock.github.io/protovis/ex/box-and-whisker.html
|
4
|
+
# http://bl.ocks.org/mbostock/4061502
|
5
|
+
# http://johan.github.io/d3/ex/box.html
|
6
|
+
# http://johan.github.io/d3/ex/box.html
|
7
|
+
# http://bl.ocks.org/mbostock/4061502
|
8
|
+
class RailsDataExplorer
|
9
|
+
class Chart
|
10
|
+
class BoxPlotGroup < Chart
|
11
|
+
|
12
|
+
def initialize(_data_set, options = {})
|
13
|
+
@data_set = _data_set
|
14
|
+
@options = {}.merge(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def compute_chart_attrs
|
18
|
+
x_candidates = @data_set.data_series.find_all { |ds|
|
19
|
+
(ds.chart_roles[Chart::BoxPlotGroup] & [:x, :any]).any?
|
20
|
+
}
|
21
|
+
y_candidates = @data_set.data_series.find_all { |ds|
|
22
|
+
(ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any?
|
23
|
+
}
|
24
|
+
|
25
|
+
x_ds = x_candidates.first
|
26
|
+
y_ds = (y_candidates - [x_ds]).first
|
27
|
+
|
28
|
+
return false if x_ds.nil? || y_ds.nil?
|
29
|
+
|
30
|
+
min = x_ds.min_val # get global min
|
31
|
+
max = x_ds.max_val # get global max
|
32
|
+
|
33
|
+
values_hash = y_ds.uniq_vals.inject({}) { |m,y_val|
|
34
|
+
m[y_val] = []
|
35
|
+
m
|
36
|
+
}
|
37
|
+
|
38
|
+
y_ds.values.each_with_index { |y_val, idx|
|
39
|
+
values_hash[y_val] << x_ds.values[idx]
|
40
|
+
}
|
41
|
+
|
42
|
+
{
|
43
|
+
values: values_hash.values,
|
44
|
+
min: min,
|
45
|
+
max: max,
|
46
|
+
base_width: 120,
|
47
|
+
base_height: 1334,
|
48
|
+
axis_tick_format: x_ds.axis_tick_format,
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def render
|
53
|
+
return '' unless render?
|
54
|
+
ca = compute_chart_attrs
|
55
|
+
return '' unless ca
|
56
|
+
%(
|
57
|
+
<div id="#{ dom_id }" class="rde-chart rde-box-plot">
|
58
|
+
<svg class="box" style="height: #{ ca[:base_width] }px;"></svg>
|
59
|
+
<svg class="box" style="height: #{ ca[:base_width] }px;"></svg>
|
60
|
+
|
61
|
+
<script type="text/javascript">
|
62
|
+
(function() {
|
63
|
+
var base_width = #{ ca[:base_width] },
|
64
|
+
base_height = #{ ca[:base_height] },
|
65
|
+
margin = { top: 10, right: 50, bottom: 95, left: 50 },
|
66
|
+
width = base_width - margin.left - margin.right,
|
67
|
+
height = base_height - margin.top - margin.bottom;
|
68
|
+
|
69
|
+
var min = #{ ca[:min] },
|
70
|
+
max = #{ ca[:max] };
|
71
|
+
|
72
|
+
var chart = d3.box()
|
73
|
+
.whiskers(iqr(1.5))
|
74
|
+
.width(width)
|
75
|
+
.height(height)
|
76
|
+
.tickFormat(#{ ca[:axis_tick_format] });
|
77
|
+
|
78
|
+
var data = #{ ca[:values].to_json };
|
79
|
+
|
80
|
+
chart.domain([min, max]);
|
81
|
+
|
82
|
+
var svg = d3.select("##{ dom_id }").selectAll("svg")
|
83
|
+
.data(data)
|
84
|
+
.append("g")
|
85
|
+
.attr("transform", "rotate(90) translate(" + (width + margin.left) + " -" + (height + margin.bottom) + ")")
|
86
|
+
.call(chart);
|
87
|
+
|
88
|
+
// Function to compute the interquartile range.
|
89
|
+
function iqr(k) {
|
90
|
+
return function(d, i) {
|
91
|
+
var q1 = d.quartiles[0],
|
92
|
+
q3 = d.quartiles[2],
|
93
|
+
iqr = (q3 - q1) * k,
|
94
|
+
i = -1,
|
95
|
+
j = d.length;
|
96
|
+
while (d[++i] < q1 - iqr);
|
97
|
+
while (d[--j] > q3 + iqr);
|
98
|
+
return [i, j];
|
99
|
+
};
|
100
|
+
}
|
101
|
+
})();
|
102
|
+
</script>
|
103
|
+
</div>
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# See this project for code to compute chi_square and contingency_coefficient
|
2
|
+
# https://github.com/bioruby/bioruby/blob/master/lib/bio/util/contingency_table.rb
|
3
|
+
#
|
4
|
+
# Resources for Chi Squared Test
|
5
|
+
# * http://www.quora.com/What-is-the-most-intuitive-explanation-for-the-chi-square-test
|
6
|
+
# * http://people.revoledu.com/kardi/tutorial/Questionnaire/Chi-Square%20IndependentTest.html
|
7
|
+
# * http://stattrek.com/chi-square-test/independence.aspx?Tutorial=AP
|
8
|
+
class RailsDataExplorer
|
9
|
+
class Chart
|
10
|
+
class ContingencyTable < Chart
|
11
|
+
|
12
|
+
def initialize(_data_set, options = {})
|
13
|
+
@data_set = _data_set
|
14
|
+
@options = {}.merge(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def compute_chart_attrs
|
18
|
+
x_candidates = @data_set.data_series.find_all { |ds|
|
19
|
+
(ds.chart_roles[Chart::ContingencyTable] & [:x, :any]).any?
|
20
|
+
}
|
21
|
+
y_candidates = @data_set.data_series.find_all { |ds|
|
22
|
+
(ds.chart_roles[Chart::ContingencyTable] & [:y, :any]).any?
|
23
|
+
}
|
24
|
+
|
25
|
+
x_ds = x_candidates.first
|
26
|
+
y_ds = (y_candidates - [x_ds]).first
|
27
|
+
|
28
|
+
# Compute @observed_vals, @expected_vals, etc.
|
29
|
+
compute_contingency_and_chi_squared!(x_ds, y_ds)
|
30
|
+
|
31
|
+
x_sorted_keys = x_ds.uniq_vals.sort { |a,b|
|
32
|
+
@observed_vals[b][:_sum] <=> @observed_vals[a][:_sum]
|
33
|
+
}
|
34
|
+
y_sorted_keys = y_ds.uniq_vals.sort { |a,b|
|
35
|
+
@observed_vals[:_sum][b] <=> @observed_vals[:_sum][a]
|
36
|
+
}
|
37
|
+
|
38
|
+
ca = case @data_set.dimensions_count
|
39
|
+
when 2
|
40
|
+
# Table
|
41
|
+
OpenStruct.new(
|
42
|
+
# Top header row
|
43
|
+
:rows => [
|
44
|
+
OpenStruct.new(
|
45
|
+
:css_class => 'rde-column_header',
|
46
|
+
:tag => :tr,
|
47
|
+
:cells => [
|
48
|
+
OpenStruct.new(:tag => :th, :value => '')
|
49
|
+
] +
|
50
|
+
x_sorted_keys.map { |x_val|
|
51
|
+
OpenStruct.new(:tag => :th, :value => x_val)
|
52
|
+
} +
|
53
|
+
[OpenStruct.new(:tag => :th, :value => 'Totals')]
|
54
|
+
)
|
55
|
+
] +
|
56
|
+
# Data rows
|
57
|
+
y_sorted_keys.map { |y_val|
|
58
|
+
OpenStruct.new(
|
59
|
+
:css_class => 'rde-data_row',
|
60
|
+
:tag => :tr,
|
61
|
+
:cells => [
|
62
|
+
OpenStruct.new(:tag => :th, :value => y_val, :css_class => 'rde-row_header')
|
63
|
+
] +
|
64
|
+
x_sorted_keys.map { |x_val|
|
65
|
+
OpenStruct.new(
|
66
|
+
:tag => :td,
|
67
|
+
:value => @observed_vals[x_val][y_val],
|
68
|
+
:css_class => 'rde-numerical',
|
69
|
+
:title => "Expected value: #{ number_with_precision(@expected_vals[x_val][y_val]) }",
|
70
|
+
:style => "color: #{ @delta_attrs[x_val][y_val][:color] };",
|
71
|
+
)
|
72
|
+
} +
|
73
|
+
[OpenStruct.new(:tag => :th, :value => @observed_vals[:_sum][y_val])]
|
74
|
+
)
|
75
|
+
} +
|
76
|
+
# Footer row
|
77
|
+
[
|
78
|
+
OpenStruct.new(
|
79
|
+
:css_class => 'rde-column_header',
|
80
|
+
:tag => :tr,
|
81
|
+
:cells => [
|
82
|
+
OpenStruct.new(:tag => :th, :value => 'Totals', :css_class => 'rde-row_header')
|
83
|
+
] +
|
84
|
+
x_sorted_keys.map { |x_val|
|
85
|
+
OpenStruct.new(:tag => :th, :value => @observed_vals[x_val][:_sum])
|
86
|
+
} +
|
87
|
+
[OpenStruct.new(:tag => :th, :value => @observed_vals[:_sum][:_sum])]
|
88
|
+
)
|
89
|
+
]
|
90
|
+
)
|
91
|
+
else
|
92
|
+
raise(ArgumentError.new("Exactly two data series required for contingency table."))
|
93
|
+
end
|
94
|
+
ca
|
95
|
+
end
|
96
|
+
|
97
|
+
def render
|
98
|
+
return '' unless render?
|
99
|
+
ca = compute_chart_attrs
|
100
|
+
content_tag(:div, :class => 'rde-chart rde-contingency-table', :id => dom_id) do
|
101
|
+
content_tag(:h3, "Contingency Table", :class => 'rde-chart-title') +
|
102
|
+
render_html_table(ca)
|
103
|
+
end +
|
104
|
+
content_tag(:p, @conclusion)
|
105
|
+
end
|
106
|
+
|
107
|
+
def render?
|
108
|
+
# http://en.wikipedia.org/wiki/Pearson's_chi-squared_test#Assumptions
|
109
|
+
true
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
# Computes @observed_vals, @expected_vals, @chi_squared, etc.
|
115
|
+
# @param[DataSeries] x_ds
|
116
|
+
# @param[DataSeries] y_ds
|
117
|
+
def compute_contingency_and_chi_squared!(x_ds, y_ds)
|
118
|
+
# Compute the observed values table
|
119
|
+
@observed_vals = { :_sum => { :_sum => 0 } }
|
120
|
+
x_ds.uniq_vals.each { |x_val|
|
121
|
+
@observed_vals[x_val] = {}
|
122
|
+
@observed_vals[x_val][:_sum] = 0
|
123
|
+
y_ds.uniq_vals.each { |y_val|
|
124
|
+
@observed_vals[x_val][y_val] = 0
|
125
|
+
@observed_vals[:_sum][y_val] = 0
|
126
|
+
}
|
127
|
+
}
|
128
|
+
x_ds.values.length.times { |idx|
|
129
|
+
x_val = x_ds.values[idx]
|
130
|
+
y_val = y_ds.values[idx]
|
131
|
+
@observed_vals[x_val][y_val] += 1
|
132
|
+
@observed_vals[:_sum][y_val] += 1
|
133
|
+
@observed_vals[x_val][:_sum] += 1
|
134
|
+
@observed_vals[:_sum][:_sum] += 1
|
135
|
+
}
|
136
|
+
# Compute degrees of freedom
|
137
|
+
@degrees_of_freedom = (x_ds.uniq_vals_count - 1) * (y_ds.uniq_vals_count - 1)
|
138
|
+
# Compute the expected values table
|
139
|
+
@expected_vals = {}
|
140
|
+
x_ds.uniq_vals.each { |x_val|
|
141
|
+
@expected_vals[x_val] = {}
|
142
|
+
y_ds.uniq_vals.each { |y_val|
|
143
|
+
@expected_vals[x_val][y_val] = (
|
144
|
+
@observed_vals[:_sum][y_val] * @observed_vals[x_val][:_sum]
|
145
|
+
) / (@observed_vals[:_sum][:_sum]).to_f
|
146
|
+
}
|
147
|
+
}
|
148
|
+
# Compute Chi squared
|
149
|
+
@chi_squared = 0
|
150
|
+
x_ds.uniq_vals.each { |x_val|
|
151
|
+
y_ds.uniq_vals.each { |y_val|
|
152
|
+
@chi_squared += (
|
153
|
+
(@observed_vals[x_val][y_val] - @expected_vals[x_val][y_val]) ** 2
|
154
|
+
) / @expected_vals[x_val][y_val]
|
155
|
+
}
|
156
|
+
}
|
157
|
+
# Compute deltas
|
158
|
+
@delta_attrs = {}
|
159
|
+
color_scale = RailsDataExplorer::Utils::ColorScale.new
|
160
|
+
x_ds.uniq_vals.each { |x_val|
|
161
|
+
@delta_attrs[x_val] = {}
|
162
|
+
y_ds.uniq_vals.each { |y_val|
|
163
|
+
delta = @observed_vals[x_val][y_val] - @expected_vals[x_val][y_val]
|
164
|
+
delta_factor = delta / @expected_vals[x_val][y_val].to_f
|
165
|
+
@delta_attrs[x_val][y_val] = {
|
166
|
+
:expected => @expected_vals[x_val][y_val],
|
167
|
+
:color => color_scale.compute(delta_factor),
|
168
|
+
:delta => delta,
|
169
|
+
:delta_factor => delta_factor,
|
170
|
+
}
|
171
|
+
}
|
172
|
+
}
|
173
|
+
# Compute probability of observing a sample statistic as extreme as the
|
174
|
+
# observed test statistic.
|
175
|
+
@p_value = 1 - Distribution::ChiSquare.cdf(@chi_squared, @degrees_of_freedom)
|
176
|
+
# Set significance_level
|
177
|
+
@significance_level = 0.05
|
178
|
+
# Compute conclusion
|
179
|
+
@conclusion = %(<a href="http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test#Test_of_independence">Pearson chi squared test of independence</a> suggests that )
|
180
|
+
@conclusion << if @p_value <= @significance_level
|
181
|
+
"#{ x_ds.name } and #{ y_ds.name } are dependent variables (p_value: #{ number_with_precision(@p_value) })"
|
182
|
+
else
|
183
|
+
"#{ x_ds.name } and #{ y_ds.name } are independent variables (p_value: #{ number_with_precision(@p_value) })"
|
184
|
+
end
|
185
|
+
@conclusion = @conclusion.html_safe
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|