rails-data-explorer 0.2.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -3
  3. data/README.md +2 -0
  4. data/lib/rails-data-explorer-no-rails.rb +36 -32
  5. data/lib/rails-data-explorer.rb +38 -35
  6. data/lib/rails_data_explorer.rb +29 -10
  7. data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
  8. data/lib/rails_data_explorer/active_record_extension.rb +19 -0
  9. data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
  10. data/lib/rails_data_explorer/chart/anova.rb +1 -0
  11. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
  12. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
  13. data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
  14. data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
  15. data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
  16. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
  17. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
  18. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
  19. data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
  20. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
  21. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
  22. data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
  23. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
  24. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
  25. data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
  26. data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
  27. data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
  28. data/lib/rails_data_explorer/data_series.rb +241 -0
  29. data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
  30. data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
  31. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
  32. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
  33. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
  34. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
  35. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
  36. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
  37. data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
  38. data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
  39. data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
  40. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
  41. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
  42. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
  43. data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
  44. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
  45. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
  46. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
  47. data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
  48. data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
  49. data/rails-data-explorer.gemspec +5 -6
  50. data/spec/rails_data_explorer/chart_spec.rb +11 -0
  51. data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
  52. data/spec/rails_data_explorer/data_set_spec.rb +31 -0
  53. data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
  54. data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
  55. data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
  56. data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
  57. data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
  58. data/spec/rails_data_explorer/data_type_spec.rb +7 -0
  59. data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
  60. data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
  61. data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
  62. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
  63. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
  64. data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
  65. data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
  66. metadata +91 -82
  67. data/lib/rails-data-explorer/active_record_extension.rb +0 -14
  68. data/lib/rails-data-explorer/constants.rb +0 -5
  69. data/lib/rails-data-explorer/data_series.rb +0 -156
  70. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
  71. data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -1,75 +0,0 @@
1
- =begin
2
-
3
- From http://en.wikipedia.org/wiki/Pearson's_chi-squared_test
4
-
5
- Pearson's chi-squared test is used to assess whether paired observations on two
6
- variables, expressed in a contingency table, are independent of each other.
7
-
8
- An "observation" consists of the values of two outcomes and the null hypothesis
9
- is that the occurrence of these outcomes is statistically independent. Each
10
- observation is allocated to one cell of a two-dimensional array of cells (called
11
- a contingency table) according to the values of the two outcomes.
12
-
13
- Assumptions
14
- -----------
15
-
16
- The chi-squared test, when used with the standard approximation that a chi-
17
- squared distribution is applicable, has the following assumptions:
18
-
19
- * Simple random sample – The sample data is a random sampling from a fixed
20
- distribution or population where every collection of members of the population
21
- of the given sample size has an equal probability of selection. Variants of
22
- the test have been developed for complex samples, such as where the data is
23
- weighted. Other forms can be used such as purposive sampling.
24
- * Sample size (whole table) – A sample with a sufficiently large size is assumed.
25
- If a chi squared test is conducted on a sample with a smaller size, then the
26
- chi squared test will yield an inaccurate inference. The researcher, by using
27
- chi squared test on small samples, might end up committing a Type II error.
28
- * Expected cell count – Adequate expected cell counts. Some require 5 or more,
29
- and others require 10 or more. A common rule is 5 or more in all cells of a
30
- 2-by-2 table, and 5 or more in 80% of cells in larger tables, but no cells
31
- with zero expected count. When this assumption is not met, Yates's Correction
32
- is applied.
33
- * Independence – The observations are always assumed to be independent of each
34
- other. This means chi-squared cannot be used to test correlated data
35
- (like matched pairs or panel data). In those cases you might want to turn to
36
- McNemar's test.
37
-
38
- Problems
39
- --------
40
-
41
- The approximation to the chi-squared distribution breaks down if expected
42
- frequencies are too low. It will normally be acceptable so long as no more than
43
- 20% of the events have expected frequencies below 5. Where there is only 1
44
- degree of freedom, the approximation is not reliable if expected frequencies are
45
- below 10. In this case, a better approximation can be obtained by reducing the
46
- absolute value of each difference between observed and expected frequencies by
47
- 0.5 before squaring; this is called Yates's correction for continuity.
48
-
49
- In cases where the expected value, E, is found to be small (indicating a small
50
- underlying population probability, and/or a small number of observations), the
51
- normal approximation of the multinomial distribution can fail, and in such cases
52
- it is found to be more appropriate to use the G-test, a likelihood ratio-based
53
- test statistic. Where the total sample size is small, it is necessary to use an
54
- appropriate exact test, typically either the binomial test or (for contingency
55
- tables) Fisher's exact test. This test uses the conditional distribution of the
56
- test statistic given the marginal totals; however, it does not assume that the
57
- data were generated from an experiment in which the marginal totals are fixed
58
- and is valid whether or not that is the case.
59
-
60
- =end
61
-
62
- class RailsDataExplorer
63
- module Statistics
64
- class PearsonsChiSquaredIndependenceTest
65
-
66
- #
67
- def initialize(data_matrix, min_probability = 0.05)
68
- end
69
-
70
- def compute
71
- end
72
-
73
- end
74
- end
75
- end
@@ -1,34 +0,0 @@
1
- require_relative '../../helper_no_rails'
2
-
3
- class RailsDataExplorer
4
- class DataType
5
- describe Categorical do
6
-
7
- let(:dt) { Categorical.new }
8
- let(:values) { ['a', 'a', 'b', 'c'] }
9
-
10
- describe "#descriptive_statistics" do
11
-
12
- let(:desc_stats) {
13
- dt.descriptive_statistics(values)
14
- }
15
-
16
- it "computes count for each uniq val" do
17
- desc_stats.detect{ |e| 'a_count' == e[:label] }[:value].must_equal 2
18
- end
19
-
20
- it "computes percent for each uniq val" do
21
- desc_stats.detect{ |e| 'a_percent' == e[:label] }[:value].must_equal 50.0
22
- end
23
-
24
- it "computes total count" do
25
- desc_stats.detect{ |e| 'Total_count' == e[:label] }[:value].must_equal 4
26
- end
27
- end
28
-
29
- describe "#available_chart_types" do
30
- end
31
-
32
- end
33
- end
34
- end