rails-data-explorer 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -3
- data/README.md +2 -0
- data/lib/rails-data-explorer-no-rails.rb +36 -32
- data/lib/rails-data-explorer.rb +38 -35
- data/lib/rails_data_explorer.rb +29 -10
- data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
- data/lib/rails_data_explorer/active_record_extension.rb +19 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
- data/lib/rails_data_explorer/chart/anova.rb +1 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
- data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
- data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
- data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
- data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
- data/lib/rails_data_explorer/data_series.rb +241 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
- data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
- data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
- data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
- data/rails-data-explorer.gemspec +5 -6
- data/spec/rails_data_explorer/chart_spec.rb +11 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
- data/spec/rails_data_explorer/data_set_spec.rb +31 -0
- data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
- data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
- data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
- data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
- data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
- data/spec/rails_data_explorer/data_type_spec.rb +7 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
- data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
- data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
- data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
- data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
- data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
- metadata +91 -82
- data/lib/rails-data-explorer/active_record_extension.rb +0 -14
- data/lib/rails-data-explorer/constants.rb +0 -5
- data/lib/rails-data-explorer/data_series.rb +0 -156
- data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
- data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -1,75 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
|
3
|
-
From http://en.wikipedia.org/wiki/Pearson's_chi-squared_test
|
4
|
-
|
5
|
-
Pearson's chi-squared test is used to assess whether paired observations on two
|
6
|
-
variables, expressed in a contingency table, are independent of each other.
|
7
|
-
|
8
|
-
An "observation" consists of the values of two outcomes and the null hypothesis
|
9
|
-
is that the occurrence of these outcomes is statistically independent. Each
|
10
|
-
observation is allocated to one cell of a two-dimensional array of cells (called
|
11
|
-
a contingency table) according to the values of the two outcomes.
|
12
|
-
|
13
|
-
Assumptions
|
14
|
-
-----------
|
15
|
-
|
16
|
-
The chi-squared test, when used with the standard approximation that a chi-
|
17
|
-
squared distribution is applicable, has the following assumptions:
|
18
|
-
|
19
|
-
* Simple random sample – The sample data is a random sampling from a fixed
|
20
|
-
distribution or population where every collection of members of the population
|
21
|
-
of the given sample size has an equal probability of selection. Variants of
|
22
|
-
the test have been developed for complex samples, such as where the data is
|
23
|
-
weighted. Other forms can be used such as purposive sampling.
|
24
|
-
* Sample size (whole table) – A sample with a sufficiently large size is assumed.
|
25
|
-
If a chi squared test is conducted on a sample with a smaller size, then the
|
26
|
-
chi squared test will yield an inaccurate inference. The researcher, by using
|
27
|
-
chi squared test on small samples, might end up committing a Type II error.
|
28
|
-
* Expected cell count – Adequate expected cell counts. Some require 5 or more,
|
29
|
-
and others require 10 or more. A common rule is 5 or more in all cells of a
|
30
|
-
2-by-2 table, and 5 or more in 80% of cells in larger tables, but no cells
|
31
|
-
with zero expected count. When this assumption is not met, Yates's Correction
|
32
|
-
is applied.
|
33
|
-
* Independence – The observations are always assumed to be independent of each
|
34
|
-
other. This means chi-squared cannot be used to test correlated data
|
35
|
-
(like matched pairs or panel data). In those cases you might want to turn to
|
36
|
-
McNemar's test.
|
37
|
-
|
38
|
-
Problems
|
39
|
-
--------
|
40
|
-
|
41
|
-
The approximation to the chi-squared distribution breaks down if expected
|
42
|
-
frequencies are too low. It will normally be acceptable so long as no more than
|
43
|
-
20% of the events have expected frequencies below 5. Where there is only 1
|
44
|
-
degree of freedom, the approximation is not reliable if expected frequencies are
|
45
|
-
below 10. In this case, a better approximation can be obtained by reducing the
|
46
|
-
absolute value of each difference between observed and expected frequencies by
|
47
|
-
0.5 before squaring; this is called Yates's correction for continuity.
|
48
|
-
|
49
|
-
In cases where the expected value, E, is found to be small (indicating a small
|
50
|
-
underlying population probability, and/or a small number of observations), the
|
51
|
-
normal approximation of the multinomial distribution can fail, and in such cases
|
52
|
-
it is found to be more appropriate to use the G-test, a likelihood ratio-based
|
53
|
-
test statistic. Where the total sample size is small, it is necessary to use an
|
54
|
-
appropriate exact test, typically either the binomial test or (for contingency
|
55
|
-
tables) Fisher's exact test. This test uses the conditional distribution of the
|
56
|
-
test statistic given the marginal totals; however, it does not assume that the
|
57
|
-
data were generated from an experiment in which the marginal totals are fixed
|
58
|
-
and is valid whether or not that is the case.
|
59
|
-
|
60
|
-
=end
|
61
|
-
|
62
|
-
class RailsDataExplorer
|
63
|
-
module Statistics
|
64
|
-
class PearsonsChiSquaredIndependenceTest
|
65
|
-
|
66
|
-
#
|
67
|
-
def initialize(data_matrix, min_probability = 0.05)
|
68
|
-
end
|
69
|
-
|
70
|
-
def compute
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require_relative '../../helper_no_rails'
|
2
|
-
|
3
|
-
class RailsDataExplorer
|
4
|
-
class DataType
|
5
|
-
describe Categorical do
|
6
|
-
|
7
|
-
let(:dt) { Categorical.new }
|
8
|
-
let(:values) { ['a', 'a', 'b', 'c'] }
|
9
|
-
|
10
|
-
describe "#descriptive_statistics" do
|
11
|
-
|
12
|
-
let(:desc_stats) {
|
13
|
-
dt.descriptive_statistics(values)
|
14
|
-
}
|
15
|
-
|
16
|
-
it "computes count for each uniq val" do
|
17
|
-
desc_stats.detect{ |e| 'a_count' == e[:label] }[:value].must_equal 2
|
18
|
-
end
|
19
|
-
|
20
|
-
it "computes percent for each uniq val" do
|
21
|
-
desc_stats.detect{ |e| 'a_percent' == e[:label] }[:value].must_equal 50.0
|
22
|
-
end
|
23
|
-
|
24
|
-
it "computes total count" do
|
25
|
-
desc_stats.detect{ |e| 'Total_count' == e[:label] }[:value].must_equal 4
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
describe "#available_chart_types" do
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|