rails-data-explorer 0.2.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -3
  3. data/README.md +2 -0
  4. data/lib/rails-data-explorer-no-rails.rb +36 -32
  5. data/lib/rails-data-explorer.rb +38 -35
  6. data/lib/rails_data_explorer.rb +29 -10
  7. data/lib/{rails-data-explorer → rails_data_explorer}/action_view_extension.rb +39 -17
  8. data/lib/rails_data_explorer/active_record_extension.rb +19 -0
  9. data/lib/{rails-data-explorer → rails_data_explorer}/chart.rb +10 -0
  10. data/lib/rails_data_explorer/chart/anova.rb +1 -0
  11. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot.rb +12 -3
  12. data/lib/{rails-data-explorer → rails_data_explorer}/chart/box_plot_group.rb +49 -22
  13. data/lib/{rails-data-explorer → rails_data_explorer}/chart/contingency_table.rb +19 -8
  14. data/lib/{rails-data-explorer → rails_data_explorer}/chart/descriptive_statistics_table.rb +9 -0
  15. data/lib/rails_data_explorer/chart/descriptive_statistics_table_group.rb +1 -0
  16. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_categorical.rb +12 -8
  17. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_quantitative.rb +12 -2
  18. data/lib/{rails-data-explorer → rails_data_explorer}/chart/histogram_temporal.rb +11 -2
  19. data/lib/{rails-data-explorer → rails_data_explorer}/chart/multi_dimensional_charts.rb +2 -0
  20. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_coordinates.rb +11 -1
  21. data/lib/{rails-data-explorer → rails_data_explorer}/chart/parallel_set.rb +11 -2
  22. data/lib/{rails-data-explorer → rails_data_explorer}/chart/pie_chart.rb +12 -8
  23. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot.rb +13 -1
  24. data/lib/{rails-data-explorer → rails_data_explorer}/chart/scatterplot_matrix.rb +2 -0
  25. data/lib/{rails-data-explorer/chart/stacked_bar_chart_categorical_percent.rb → rails_data_explorer/chart/stacked_bar_chart_categorical.rb} +37 -14
  26. data/lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb +28 -0
  27. data/lib/rails_data_explorer/chart/stacked_histogram_temporal.rb +199 -0
  28. data/lib/rails_data_explorer/data_series.rb +241 -0
  29. data/lib/{rails-data-explorer → rails_data_explorer}/data_set.rb +13 -4
  30. data/lib/{rails-data-explorer → rails_data_explorer}/data_type.rb +13 -0
  31. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/categorical.rb +79 -18
  32. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/geo.rb +2 -0
  33. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative.rb +14 -4
  34. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/decimal.rb +9 -0
  35. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/integer.rb +9 -0
  36. data/lib/{rails-data-explorer → rails_data_explorer}/data_type/quantitative/temporal.rb +9 -0
  37. data/lib/{rails-data-explorer → rails_data_explorer}/engine.rb +12 -0
  38. data/lib/{rails-data-explorer → rails_data_explorer}/exploration.rb +11 -0
  39. data/lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb +72 -0
  40. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_category.rb +13 -0
  41. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_gaussian.rb +12 -1
  42. data/lib/{rails-data-explorer → rails_data_explorer}/statistics/rng_power_law.rb +11 -0
  43. data/lib/{rails-data-explorer → rails_data_explorer}/utils/color_scale.rb +6 -0
  44. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_binner.rb +13 -8
  45. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_encoder.rb +2 -0
  46. data/lib/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer.rb +8 -3
  47. data/lib/{rails-data-explorer → rails_data_explorer}/utils/rde_table.rb +14 -11
  48. data/lib/{rails-data-explorer → rails_data_explorer}/utils/value_formatter.rb +9 -4
  49. data/rails-data-explorer.gemspec +5 -6
  50. data/spec/rails_data_explorer/chart_spec.rb +11 -0
  51. data/spec/{rails-data-explorer → rails_data_explorer}/data_series_spec.rb +0 -0
  52. data/spec/rails_data_explorer/data_set_spec.rb +31 -0
  53. data/spec/rails_data_explorer/data_type/categorical_spec.rb +126 -0
  54. data/{lib/rails-data-explorer/chart/descriptive_statistics_table_group.rb → spec/rails_data_explorer/data_type/quantitative/decimal_spec.rb} +0 -0
  55. data/spec/rails_data_explorer/data_type/quantitative/integer_spec.rb +0 -0
  56. data/spec/rails_data_explorer/data_type/quantitative/temporal_spec.rb +34 -0
  57. data/spec/rails_data_explorer/data_type/quantitative_spec.rb +118 -0
  58. data/spec/rails_data_explorer/data_type_spec.rb +7 -0
  59. data/spec/{rails-data-explorer → rails_data_explorer}/exploration_spec.rb +5 -5
  60. data/spec/rails_data_explorer/statistics/pearsons_chi_squared_independence_test_spec.rb +0 -0
  61. data/spec/rails_data_explorer/utils/color_scale_spec.rb +13 -0
  62. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_binner_spec.rb +0 -0
  63. data/spec/{rails-data-explorer → rails_data_explorer}/utils/data_quantizer_spec.rb +0 -0
  64. data/spec/rails_data_explorer/utils/value_formatter_spec.rb +33 -0
  65. data/vendor/assets/stylesheets/sources/rde-default-style.css +5 -1
  66. metadata +91 -82
  67. data/lib/rails-data-explorer/active_record_extension.rb +0 -14
  68. data/lib/rails-data-explorer/constants.rb +0 -5
  69. data/lib/rails-data-explorer/data_series.rb +0 -156
  70. data/lib/rails-data-explorer/statistics/pearsons_chi_squared_independence_test.rb +0 -75
  71. data/spec/rails-data-explorer/data_type/categorical_spec.rb +0 -34
@@ -1,75 +0,0 @@
1
- =begin
2
-
3
- From http://en.wikipedia.org/wiki/Pearson's_chi-squared_test
4
-
5
- Pearson's chi-squared test is used to assess whether paired observations on two
6
- variables, expressed in a contingency table, are independent of each other.
7
-
8
- An "observation" consists of the values of two outcomes and the null hypothesis
9
- is that the occurrence of these outcomes is statistically independent. Each
10
- observation is allocated to one cell of a two-dimensional array of cells (called
11
- a contingency table) according to the values of the two outcomes.
12
-
13
- Assumptions
14
- -----------
15
-
16
- The chi-squared test, when used with the standard approximation that a chi-
17
- squared distribution is applicable, has the following assumptions:
18
-
19
- * Simple random sample – The sample data is a random sampling from a fixed
20
- distribution or population where every collection of members of the population
21
- of the given sample size has an equal probability of selection. Variants of
22
- the test have been developed for complex samples, such as where the data is
23
- weighted. Other forms can be used such as purposive sampling.
24
- * Sample size (whole table) – A sample with a sufficiently large size is assumed.
25
- If a chi squared test is conducted on a sample with a smaller size, then the
26
- chi squared test will yield an inaccurate inference. The researcher, by using
27
- chi squared test on small samples, might end up committing a Type II error.
28
- * Expected cell count – Adequate expected cell counts. Some require 5 or more,
29
- and others require 10 or more. A common rule is 5 or more in all cells of a
30
- 2-by-2 table, and 5 or more in 80% of cells in larger tables, but no cells
31
- with zero expected count. When this assumption is not met, Yates's Correction
32
- is applied.
33
- * Independence – The observations are always assumed to be independent of each
34
- other. This means chi-squared cannot be used to test correlated data
35
- (like matched pairs or panel data). In those cases you might want to turn to
36
- McNemar's test.
37
-
38
- Problems
39
- --------
40
-
41
- The approximation to the chi-squared distribution breaks down if expected
42
- frequencies are too low. It will normally be acceptable so long as no more than
43
- 20% of the events have expected frequencies below 5. Where there is only 1
44
- degree of freedom, the approximation is not reliable if expected frequencies are
45
- below 10. In this case, a better approximation can be obtained by reducing the
46
- absolute value of each difference between observed and expected frequencies by
47
- 0.5 before squaring; this is called Yates's correction for continuity.
48
-
49
- In cases where the expected value, E, is found to be small (indicating a small
50
- underlying population probability, and/or a small number of observations), the
51
- normal approximation of the multinomial distribution can fail, and in such cases
52
- it is found to be more appropriate to use the G-test, a likelihood ratio-based
53
- test statistic. Where the total sample size is small, it is necessary to use an
54
- appropriate exact test, typically either the binomial test or (for contingency
55
- tables) Fisher's exact test. This test uses the conditional distribution of the
56
- test statistic given the marginal totals; however, it does not assume that the
57
- data were generated from an experiment in which the marginal totals are fixed
58
- and is valid whether or not that is the case.
59
-
60
- =end
61
-
62
- class RailsDataExplorer
63
- module Statistics
64
- class PearsonsChiSquaredIndependenceTest
65
-
66
- #
67
- def initialize(data_matrix, min_probability = 0.05)
68
- end
69
-
70
- def compute
71
- end
72
-
73
- end
74
- end
75
- end
@@ -1,34 +0,0 @@
1
- require_relative '../../helper_no_rails'
2
-
3
- class RailsDataExplorer
4
- class DataType
5
- describe Categorical do
6
-
7
- let(:dt) { Categorical.new }
8
- let(:values) { ['a', 'a', 'b', 'c'] }
9
-
10
- describe "#descriptive_statistics" do
11
-
12
- let(:desc_stats) {
13
- dt.descriptive_statistics(values)
14
- }
15
-
16
- it "computes count for each uniq val" do
17
- desc_stats.detect{ |e| 'a_count' == e[:label] }[:value].must_equal 2
18
- end
19
-
20
- it "computes percent for each uniq val" do
21
- desc_stats.detect{ |e| 'a_percent' == e[:label] }[:value].must_equal 50.0
22
- end
23
-
24
- it "computes total count" do
25
- desc_stats.detect{ |e| 'Total_count' == e[:label] }[:value].must_equal 4
26
- end
27
- end
28
-
29
- describe "#available_chart_types" do
30
- end
31
-
32
- end
33
- end
34
- end