statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,12 @@
1
+ Copyright (c) 2009-2015, Claudio Bustos
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+
8
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9
+
10
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
+
12
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,175 @@
1
+ # Statsample
2
+
3
+ [![Build Status](https://travis-ci.org/SciRuby/statsample.svg?branch=master)](https://travis-ci.org/SciRuby/statsample)
4
+ [![Code Climate](https://codeclimate.com/github/SciRuby/statsample/badges/gpa.svg)](https://codeclimate.com/github/SciRuby/statsample)
5
+ [![Gem Version](https://badge.fury.io/rb/statsample.svg)](http://badge.fury.io/rb/statsample)
6
+
7
+ Homepage :: https://github.com/sciruby/statsample
8
+
9
+ # Installation
10
+
11
+ You should have a recent version of GSL and R (with the `irr` and `Rserve` libraries) installed. In Ubuntu:
12
+
13
+ ```bash
14
+ $ sudo apt-get install libgsl0-dev r-base r-base-dev
15
+ $ sudo Rscript -e "install.packages(c('Rserve', 'irr'))"
16
+ ```
17
+
18
+ With these libraries in place, just install from rubygems:
19
+
20
+ ```bash
21
+ $ [sudo] gem install statsample
22
+ ```
23
+
24
+ On *nix, you should install statsample-optimization to retrieve gems gsl, statistics2 and a C extension to speed some methods.
25
+
26
+ ```bash
27
+ $ [sudo] gem install statsample-optimization
28
+ ```
29
+
30
+ If you need to work on Structural Equation Modeling, you could see +statsample-sem+. You need R with +sem+ or +OpenMx+ [http://openmx.psyc.virginia.edu/] libraries installed
31
+
32
+ ```bash
33
+ $ [sudo] gem install statsample-sem
34
+ ```
35
+ # Testing
36
+
37
+ See CONTRIBUTING for information on testing and contributing to statsample.
38
+
39
+ # Documentation
40
+
41
+ You can see the latest documentation in [rubydoc.info](http://www.rubydoc.info/github/sciruby/statsample/master).
42
+
43
+ # Usage
44
+
45
+ ## Notebooks
46
+
47
+ You can see some iruby notebooks here:
48
+
49
+ ### Statistics
50
+
51
+ * [Correlation Matrix with daru and statsample](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Correlation%20Matrix%20with%20daru%20and%20statsample.ipynb)
52
+ * [Dominance Analysis with statsample](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Dominance%20Analysis%20with%20statsample.ipynb)
53
+ * [Reliability ICC](http://nbviewer.ipython.org/github/v0dro/sciruby-notebooks/blob/master/Statistics/Reliability%20ICC%20with%20statsample.ipynb)
54
+ * [Levene Test](http://nbviewer.ipython.org/github/v0dro/sciruby-notebooks/blob/master/Statistics/Levene%20Test.ipynb)
55
+ * [Multiple Regression](http://nbviewer.ipython.org/github/v0dro/sciruby-notebooks/blob/master/Statistics/Multiple%20Regression.ipynb)
56
+ * [Parallel Analysis on PCA](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Parallel%20Analysis%20on%20PCA.ipynb)
57
+ * [Polychoric Analysis](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Polychoric%20Correlation.ipynb)
58
+ * [Reliability Scale and Multiscale Analysis](https://github.com/SciRuby/sciruby-notebooks/blob/master/Statistics/Reliability%20Scale%20Analysis.ipynb)
59
+ * [Velicer MAP Test](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Velicer%20MAP%20test.ipynb)
60
+
61
+ ### Visualizations
62
+
63
+ * [Creating Boxplots with daru and statsample](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Boxplot%20with%20daru%20and%20statsample.ipynb)
64
+ * [Creating A Histogram](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Creating%20a%20Histogram.ipynb)
65
+ * [Creating a Scatterplot](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Scatterplot%20with%20statsample.ipynb)
66
+
67
+ ### Working with DataFrame and Vector
68
+
69
+ * [Creating Vectors and DataFrames with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
70
+ * [Detailed Usage of Daru::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
71
+ * [Detailed Usage of Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
72
+ * [Visualizing Data with Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
73
+
74
+ ## Examples
75
+
76
+ See the /examples directory for some use cases. The notebooks listed above have mostly
77
+ the same examples, and they look better so you might want to see that first.
78
+
79
+ # Description
80
+
81
+ A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0 and 2.1.1. See `.travis.yml` for more information.
82
+
83
+ Include:
84
+ - Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
85
+ - Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial, tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by +statsample-bivariate-extension+ gem.
86
+ - Intra-class correlation
87
+ - Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for One-way ANOVA.
88
+ - Tests: F, T, Levene, U-Mannwhitney.
89
+ - Regression: Simple, Multiple (OLS)
90
+ - Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for estimation of number of factors.
91
+ - Reliability analysis for simple scale and a DSL to easily analyze multiple scales using factor analysis and correlations, if you want it.
92
+ - Basic time series support
93
+ - Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
94
+ - Sample calculation related formulas
95
+ - Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
96
+ - Creates reports on text, html and rtf, using ReportBuilder gem
97
+ - Graphics: Histogram, Boxplot and Scatterplot
98
+
99
+ ## Principles
100
+
101
+ - Software Design:
102
+ - One module/class for each type of analysis
103
+ - Options can be set as hash on initialize() or as setters methods
104
+ - Clean API for interactive sessions
105
+ - summary() returns all necessary informacion for interactive sessions
106
+ - All statistical data available though methods on objects
107
+ - All (important) methods should be tested. Better with random data.
108
+ - Statistical Design
109
+ - Results are tested against text results, SPSS and R outputs.
110
+ - Go beyond Null Hiphotesis Testing, using confidence intervals and effect sizes when possible
111
+ - (When possible) All references for methods are documented, providing sensible information on documentation
112
+
113
+ # Features
114
+
115
+ - Classes for manipulation and storage of data:
116
+ - Uses [daru](https://github.com/v0dro/daru) for storing data and basic statistics.
117
+ - Statsample::Multiset: multiple datasets with same fields and type of vectors
118
+ - Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors. Also you can create contrast using Statsample::Anova::Contrast
119
+ - Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
120
+ - Multiple types of regression.
121
+ - Simple Regression : Statsample::Regression::Simple
122
+ - Multiple Regression: Statsample::Regression::Multiple
123
+ - Factorial Analysis algorithms on Statsample::Factor module.
124
+ - Classes for Extraction of factors:
125
+ - Statsample::Factor::PCA
126
+ - Statsample::Factor::PrincipalAxis
127
+ - Classes for Rotation of factors:
128
+ - Statsample::Factor::Varimax
129
+ - Statsample::Factor::Equimax
130
+ - Statsample::Factor::Quartimax
131
+ - Classes for calculation of factors to retain
132
+ - Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
133
+ - Statsample::Factor::MAP performs Velicer's Minimum Average Partial (MAP) test, which retain components as long as the variance in the correlation matrix represents systematic variance.
134
+ - Dominance Analysis. Based on Budescu and Azen papers, dominance analysis is a method to analyze the relative importance of one predictor relative to another on multiple regression
135
+ - Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables
136
+ - Statsample::DominanceAnalysis::Bootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
137
+ - Module Statsample::Codification, to help to codify open questions
138
+ - Converters to export data:
139
+ - Statsample::Mx : Write Mx Files
140
+ - Statsample::GGobi : Write Ggobi files
141
+ - Module Statsample::Crosstab provides function to create crosstab for categorical data
142
+ - Module Statsample::Reliability provides functions to analyze scales with psychometric methods.
143
+ - Class Statsample::Reliability::ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
144
+ - Class Statsample::Reliability::MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
145
+ - Class Statsample::Reliability::ICC provides intra-class correlation, using Shrout & Fleiss(1979) and McGraw & Wong (1996) formulations.
146
+ - Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
147
+ - Module Statsample::Test provides several methods and classes to perform inferencial statistics
148
+ - Statsample::Test::BartlettSphericity
149
+ - Statsample::Test::ChiSquare
150
+ - Statsample::Test::F
151
+ - Statsample::Test::KolmogorovSmirnov (only D value)
152
+ - Statsample::Test::Levene
153
+ - Statsample::Test::UMannWhitney
154
+ - Statsample::Test::T
155
+ - Statsample::Test::WilcoxonSignedRank
156
+ - Module Graph provides several classes to create beautiful graphs using rubyvis
157
+ - Statsample::Graph::Boxplot
158
+ - Statsample::Graph::Histogram
159
+ - Statsample::Graph::Scatterplot
160
+ - Gem <tt>bio-statsample-timeseries</tt> provides module Statsample::TimeSeries with support for time series, including ARIMA estimation using Kalman-Filter.
161
+ - Gem <tt>statsample-sem</tt> provides a DSL to R libraries +sem+ and +OpenMx+
162
+ - Gem <tt>statsample-glm</tt> provides you with GML method, to work with Logistic, Poisson and Gaussian regression ,using ML or IRWLS.
163
+ - Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.
164
+
165
+ # Resources
166
+
167
+ - Source code on github :: http://github.com/sciruby/statsample
168
+ - Bug report and feature request :: http://github.com/sciruby/statsample/issues
169
+ - E-mailing list :: https://groups.google.com/forum/#!forum/sciruby-dev
170
+
171
+ # License
172
+
173
+ BSD-3 (See LICENSE.txt)
174
+
175
+ Could change between version, without previous warning. If you want a specific license, just choose the version that you need.
@@ -0,0 +1,44 @@
1
+ $:.unshift File.expand_path("../lib/", __FILE__)
2
+ lib_folder = File.expand_path("../lib", __FILE__)
3
+
4
+ require 'statsample/version'
5
+ require 'rake'
6
+ require 'rake/testtask'
7
+ require 'rdoc/task'
8
+ require 'bundler/gem_tasks'
9
+
10
+ # Setup the necessary gems, specified in the gemspec.
11
+ require 'bundler'
12
+ begin
13
+ Bundler.setup(:default, :development)
14
+ rescue Bundler::BundlerError => e
15
+ $stderr.puts e.message
16
+ $stderr.puts "Run `bundle install` to install missing gems"
17
+ exit e.status_code
18
+ end
19
+
20
+ Rake::TestTask.new do |t|
21
+ t.pattern = "test/test_*.rb"
22
+ end
23
+
24
+ RDoc::Task.new do |rdoc|
25
+ rdoc.main = "README.md"
26
+ rdoc.rdoc_files.include("README.md", "lib", "History.txt", "LICENSE.txt", "references.txt")
27
+ end
28
+
29
+ desc "Update pot/po files."
30
+ task "gettext:updatepo" do
31
+ require 'gettext/tools'
32
+ GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
33
+ end
34
+
35
+ desc "Create mo-files"
36
+ task "gettext:makemo" do
37
+ require 'gettext/tools'
38
+ GetText.create_mofiles()
39
+ end
40
+
41
+ desc 'Run pry'
42
+ task :pry do |task|
43
+ sh "pry -r #{lib_folder}/statsample.rb"
44
+ end
@@ -0,0 +1,32 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+ cases=250
5
+ vars=20
6
+
7
+ name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
8
+ author 'Clbustos'
9
+ date '2011-01-18'
10
+ summary "
11
+ A correlation matrix could be constructed using matrix algebra or
12
+ mannualy, calculating covariances, means and sd for each pair of vectors.
13
+ In this test, we test the calculation using #{vars} variables with
14
+ #{cases} cases on each vector
15
+ "
16
+
17
+ reps 200 #number of repetitions
18
+
19
+ ds = Daru::DataFrame.new(
20
+ vars.times.inject({}) do |ac,v|
21
+ ac["x#{v}".to_sym]=Daru::Vector.new_with_size(cases) {rand()}
22
+ ac
23
+ end
24
+ )
25
+
26
+ measure "Statsample::Bivariate.correlation_matrix_optimized" do
27
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
28
+ end
29
+
30
+ measure "Statsample::Bivariate.correlation_matrix_pairwise" do
31
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
32
+ end
@@ -0,0 +1,33 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+ cases=500
5
+ vars=5
6
+
7
+
8
+ name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
9
+ author 'Clbustos'
10
+ date '2011-01-18'
11
+ summary "
12
+ A correlation matrix could be constructed using matrix algebra or
13
+ mannualy, calculating covariances, means and sd for each pair of vectors.
14
+ In this test, we test the calculation using #{vars} variables with
15
+ #{cases} cases on each vector
16
+ "
17
+
18
+ reps 200 #number of repetitions
19
+
20
+ ds = Daru::DataFrame.new(
21
+ vars.times.inject({}) do |ac,v|
22
+ ac["x#{v}".to_sym]=Daru::Vector.new_with_size(cases) {rand()}
23
+ ac
24
+ end
25
+ )
26
+
27
+ measure "Statsample::Bivariate.correlation_matrix_optimized" do
28
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
29
+ end
30
+
31
+ measure "Statsample::Bivariate.correlation_matrix_pairwise" do
32
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
33
+ end
@@ -0,0 +1,93 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
5
+ <title>Correlation matrix analysis</title>
6
+ <style>
7
+ body {
8
+ margin:0;
9
+ padding:1em;
10
+ }
11
+ table {
12
+ border-collapse: collapse;
13
+
14
+ }
15
+ table td {
16
+ border: 1px solid black;
17
+ }
18
+ .section {
19
+ margin:0.5em;
20
+ }
21
+ </style>
22
+
23
+ </head><body>
24
+ <h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
25
+ <ul>
26
+ <li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
27
+ <ul>
28
+ <li><a href='#toc_2'>ANOVA</a></li>
29
+ </ul>
30
+ <li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
31
+ <ul>
32
+ <li><a href='#toc_4'>ANOVA</a></li>
33
+ </ul>
34
+ </ul>
35
+ </div>
36
+ <div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
37
+ <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
38
+ <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
39
+ <p>Cases(listwise)=63(63)</p>
40
+ <p>R=0.978844</p>
41
+ <p>R^2=0.958137</p>
42
+ <p>R^2 Adj=0.956008</p>
43
+ <p>Std.Error R=3.092024</p>
44
+ <p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
45
+ <div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
46
+ <a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
47
+ <tbody>
48
+ <tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
49
+ <tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
50
+ <tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
51
+ </tbody>
52
+ </table>
53
+
54
+ </div>
55
+ <a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
56
+ <tbody>
57
+ <tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
58
+ <tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
59
+ <tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
60
+ <tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
61
+ </tbody>
62
+ </table>
63
+
64
+ </div>
65
+ <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
66
+ <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
67
+ <p>Cases(listwise)=63(63)</p>
68
+ <p>R=0.999637</p>
69
+ <p>R^2=0.999275</p>
70
+ <p>R^2 Adj=0.999238</p>
71
+ <p>Std.Error R=0.538365</p>
72
+ <p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
73
+ <div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
74
+ <a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
75
+ <tbody>
76
+ <tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
77
+ <tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
78
+ <tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
79
+ </tbody>
80
+ </table>
81
+
82
+ </div>
83
+ <a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
84
+ <tbody>
85
+ <tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
86
+ <tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
87
+ <tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
88
+ <tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
89
+ </tbody>
90
+ </table>
91
+
92
+ </div>
93
+ </body></html>
@@ -0,0 +1,71 @@
1
+ # This test create a database to adjust the best algorithm
2
+ # to use on correlation matrix
3
+ require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
4
+ require 'statsample'
5
+ require 'benchmark'
6
+
7
+ def create_dataset(vars,cases)
8
+ ran = Distribution::Normal.rng
9
+ ds = Daru::DataFrame.new(
10
+ vars.times.inject({}) do |ac,v|
11
+ ac["x#{v}".to_sym] = Daru::Vector.new_with_size(cases) {ran.call}
12
+ ac
13
+ end
14
+ )
15
+ end
16
+
17
+ def prediction_pairwise(vars,cases)
18
+ Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
19
+ end
20
+ def prediction_optimized(vars,cases)
21
+ Statsample::Bivariate.prediction_optimized(vars,cases) / 10
22
+ end
23
+
24
+ if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
25
+ reps=100 #number of repetitions
26
+ ds_sizes=[5,10,30,50,100,150,200,500,1000]
27
+ ds_vars=[3,4,5,10,20,30,40]
28
+ #ds_sizes=[5,10]
29
+ #ds_vars=[3,5,20]
30
+ rs = Daru::DataFrame.new({}, order: [:cases, :vars, :time_optimized, :time_pairwise])
31
+
32
+ ds_sizes.each do |cases|
33
+ ds_vars.each do |vars|
34
+ ds = create_dataset(vars,cases)
35
+ time_optimized= Benchmark.realtime do
36
+ reps.times {
37
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
38
+ ds.clear_gsl
39
+ }
40
+ end
41
+
42
+ time_pairwise= Benchmark.realtime do
43
+ reps.times { Statsample::Bivariate.correlation_matrix_pairwise(ds) }
44
+ end
45
+
46
+ puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
47
+
48
+ rs.add_row(Daru::Vector.new({
49
+ :cases => cases,
50
+ :vars => vars,
51
+ :time_optimized => Math.sqrt(time_optimized*1000),
52
+ :time_pairwise =>Math.sqrt(time_pairwise*1000)
53
+ })
54
+ )
55
+ end
56
+ end
57
+ else
58
+ rs=Statsample.load("correlation_matrix.ds")
59
+ end
60
+
61
+ rs[:c_v] = rs.collect {|row| row[:cases]*row[:vars]}
62
+
63
+ rs.save("correlation_matrix.ds")
64
+ Statsample::Excel.write(rs,"correlation_matrix.xls")
65
+
66
+ rb = ReportBuilder.new(:name=>"Correlation matrix analysis")
67
+
68
+ rb.add(Statsample::Regression.multiple(rs[:cases,:vars,:time_optimized,:c_v],:time_optimized, :digits=>6))
69
+ rb.add(Statsample::Regression.multiple(rs[:cases,:vars,:time_pairwise,:c_v],:time_pairwise, :digits=>6))
70
+
71
+ rb.save_html("correlation_matrix.html")