statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
data/History.txt
CHANGED
@@ -1,4 +1,19 @@
|
|
1
|
+
=== 0.6.7 / 2010-03-23
|
2
|
+
* Bug fix: dependency on Reportbuilder should be set to "~>0.2.0", not "0.2"
|
3
|
+
=== 0.6.6 / 2010-03-22
|
4
|
+
* Set ReportBuilder dependency to '0.2.~' version, because future API break
|
5
|
+
* Removed Alglib dependency
|
6
|
+
* Factor::PrincipalAxis and Factor::PCA reworked
|
7
|
+
* Standarization of documentation on almost every file
|
8
|
+
* New Statsample::Test::Levene, to test equality of variances
|
9
|
+
* Constant HAS_GSL replaced by Statsample.has_gsl?
|
10
|
+
* PCA and Principal Axis test based on R and SPSS results
|
11
|
+
* Bug fix on test_dataset.rb / test_saveload
|
12
|
+
* Added Rakefile
|
13
|
+
* Demos for levene, Principal Axis
|
14
|
+
|
1
15
|
=== 0.6.5 / 2010-02-24
|
16
|
+
|
2
17
|
* Bug fix on test: Use tempfile instead of tempdir
|
3
18
|
* Multiple Regression: Calculation of constant standard error , using covariance matrix.
|
4
19
|
* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
|
data/Manifest.txt
CHANGED
@@ -2,6 +2,7 @@ History.txt
|
|
2
2
|
LICENSE.txt
|
3
3
|
Manifest.txt
|
4
4
|
README.txt
|
5
|
+
Rakefile
|
5
6
|
bin/statsample
|
6
7
|
data/crime.txt
|
7
8
|
data/locale/es/LC_MESSAGES/statsample.mo
|
@@ -12,9 +13,11 @@ data/tetmat_test.txt
|
|
12
13
|
demo/correlation_matrix.rb
|
13
14
|
demo/dominance_analysis_bootstrap.rb
|
14
15
|
demo/dominanceanalysis.rb
|
16
|
+
demo/levene.rb
|
15
17
|
demo/multiple_regression.rb
|
16
18
|
demo/multivariate_correlation.rb
|
17
19
|
demo/polychoric.rb
|
20
|
+
demo/principal_axis.rb
|
18
21
|
demo/tetrachoric.rb
|
19
22
|
lib/distribution.rb
|
20
23
|
lib/distribution/chisquare.rb
|
@@ -71,8 +74,10 @@ lib/statsample/reliability.rb
|
|
71
74
|
lib/statsample/resample.rb
|
72
75
|
lib/statsample/srs.rb
|
73
76
|
lib/statsample/test.rb
|
77
|
+
lib/statsample/test/levene.rb
|
74
78
|
lib/statsample/test/umannwhitney.rb
|
75
79
|
lib/statsample/vector.rb
|
80
|
+
po/es/statsample.mo
|
76
81
|
po/es/statsample.po
|
77
82
|
po/statsample.pot
|
78
83
|
setup.rb
|
@@ -100,6 +105,7 @@ test/test_reliability.rb
|
|
100
105
|
test/test_resample.rb
|
101
106
|
test/test_srs.rb
|
102
107
|
test/test_statistics.rb
|
108
|
+
test/test_stest.rb
|
103
109
|
test/test_stratified.rb
|
104
110
|
test/test_svg_graph.rb
|
105
111
|
test/test_umannwhitney.rb
|
data/README.txt
CHANGED
@@ -10,25 +10,43 @@ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.
|
|
10
10
|
Includes:
|
11
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
12
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
13
|
-
* Correlations: Pearson
|
14
|
-
* Regression: Simple, Multiple, Probit
|
13
|
+
* Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
|
14
|
+
* Regression: Simple, Multiple, Probit and Logit
|
15
15
|
* Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
|
16
16
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
17
17
|
* Sample calculation related formulas
|
18
18
|
|
19
19
|
== FEATURES:
|
20
20
|
|
21
|
-
*
|
22
|
-
*
|
23
|
-
*
|
24
|
-
*
|
25
|
-
*
|
26
|
-
*
|
27
|
-
*
|
28
|
-
*
|
29
|
-
*
|
21
|
+
* Classes for manipulation and storage of data:
|
22
|
+
* Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
|
23
|
+
* Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
|
24
|
+
* Statsample::Multiset: multiple datasets with same fields and type of vectors
|
25
|
+
* Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
|
26
|
+
* Multiple types of regression.
|
27
|
+
* Simple Regression : Statsample::Regression::Simple
|
28
|
+
* Multiple Regression: Statsample::Regression::Multiple
|
29
|
+
* Logit Regression: Statsample::Regression::Binomial::Logit
|
30
|
+
* Probit Regression: Statsample::Regression::Binomial::Probit
|
31
|
+
* Factorial Analysis algorithms on Statsample::Factor module.
|
32
|
+
* Classes for Extraction of factors:
|
33
|
+
* Statsample::Factor::PCA
|
34
|
+
* Statsample::Factor::PrincipalAxis
|
35
|
+
* Classes for Rotation of factors:
|
36
|
+
* Statsample::Factor::Varimax
|
37
|
+
* Statsample::Factor::Equimax
|
38
|
+
* Statsample::Factor::Quartimax
|
39
|
+
* Dominance Analysis. Based on Budescu and Azen papers, Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables and DominanceAnalysisBootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
|
40
|
+
* Module Statsample::Codification, to help to codify open questions
|
41
|
+
* Converters to import and export data:
|
42
|
+
* Statsample::Database : Can create sql to create tables, read and insert data
|
43
|
+
* Statsample::CSV : Read and write CSV files
|
44
|
+
* Statsample::Excel : Read and write Excel files
|
45
|
+
* Statsample::Mx : Write Mx Files
|
46
|
+
* Statsample::GGobi : Write Ggobi files
|
47
|
+
* Module Statsample::Crosstab provides function to create crosstab for categorical data
|
30
48
|
* Reliability analysis provides functions to analyze scales. Class ItemAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted. With HtmlReport, graph the histogram of the scale and the Item Characteristic Curve for each item
|
31
|
-
* Module SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
49
|
+
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
32
50
|
* Interfaces to gdchart, gnuplot and SVG::Graph
|
33
51
|
|
34
52
|
|
data/Rakefile
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
# -*- coding: utf-8 -*-
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hoe'
|
7
|
+
require './lib/statsample'
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
if File.exists? './local_rakefile.rb'
|
14
|
+
require './local_rakefile'
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Ruby Lint"
|
18
|
+
task :lint do
|
19
|
+
executable=Config::CONFIG['RUBY_INSTALL_NAME']
|
20
|
+
Dir.glob("lib/**/*.rb") {|f|
|
21
|
+
if !system %{#{executable} -w -c "#{f}"}
|
22
|
+
puts "Error on: #{f}"
|
23
|
+
end
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
desc "Update pot/po files."
|
29
|
+
task :updatepo do
|
30
|
+
require 'gettext/tools'
|
31
|
+
GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
|
32
|
+
end
|
33
|
+
|
34
|
+
desc "Create mo-files"
|
35
|
+
task :makemo do
|
36
|
+
require 'gettext/tools'
|
37
|
+
GetText.create_mofiles()
|
38
|
+
# GetText.create_mofiles(true, "po", "locale") # This is for "Ruby on Rails".
|
39
|
+
end
|
40
|
+
|
41
|
+
h=Hoe.spec('statsample') do
|
42
|
+
self.version=Statsample::VERSION
|
43
|
+
self.rubyforge_name = "ruby-statsample"
|
44
|
+
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
45
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>0.2.0"] << ["minimization", "~>0.1.0"]
|
46
|
+
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
47
|
+
self.need_rdoc=false
|
48
|
+
end
|
49
|
+
|
50
|
+
Rake::RDocTask.new(:docs) do |rd|
|
51
|
+
rd.main = h.readme_file
|
52
|
+
rd.options << '-d' if (`which dot` =~ /\/dot/) unless
|
53
|
+
ENV['NODOT'] || Hoe::WINDOZE
|
54
|
+
rd.rdoc_dir = 'doc'
|
55
|
+
|
56
|
+
rd.rdoc_files.include("lib/**/*.rb")
|
57
|
+
rd.rdoc_files += h.spec.extra_rdoc_files
|
58
|
+
rd.rdoc_files.reject! {|f| f=="Manifest.txt"}
|
59
|
+
title = h.spec.rdoc_options.grep(/^(-t|--title)=?$/).first
|
60
|
+
if title then
|
61
|
+
rd.options << title
|
62
|
+
|
63
|
+
unless title =~ /\=/ then # for ['-t', 'title here']
|
64
|
+
title_index = spec.rdoc_options.index(title)
|
65
|
+
rd.options << spec.rdoc_options[title_index + 1]
|
66
|
+
end
|
67
|
+
else
|
68
|
+
title = "#{h.name}-#{h.version} Documentation"
|
69
|
+
title = "#{h.rubyforge_name}'s " + title if h.rubyforge_name != h.name
|
70
|
+
rd.options << '--title' << title
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
desc 'publicar a rdocs con analytics'
|
76
|
+
task :publicar_docs => [:clean, :docs] do
|
77
|
+
ruby %{agregar_adsense_a_doc.rb}
|
78
|
+
path = File.expand_path("~/.rubyforge/user-config.yml")
|
79
|
+
config = YAML.load(File.read(path))
|
80
|
+
host = "#{config["username"]}@rubyforge.org"
|
81
|
+
|
82
|
+
remote_dir = "/var/www/gforge-projects/#{h.rubyforge_name}/#{h.remote_rdoc_dir
|
83
|
+
}"
|
84
|
+
local_dir = h.local_rdoc_dir
|
85
|
+
Dir.glob(local_dir+"/**/*") {|file|
|
86
|
+
sh %{chmod 755 #{file}}
|
87
|
+
}
|
88
|
+
sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
|
89
|
+
end
|
90
|
+
|
91
|
+
# vim: syntax=Ruby
|
data/demo/levene.rb
ADDED
data/demo/multiple_regression.rb
CHANGED
@@ -17,7 +17,7 @@ Benchmark.bm(7) do |x|
|
|
17
17
|
|
18
18
|
rb=ReportBuilder.new("Multiple Regression Engines")
|
19
19
|
|
20
|
-
if
|
20
|
+
if Statsample.has_gsl?
|
21
21
|
x.report("GSL:") {
|
22
22
|
lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y',:name=>"Multiple Regression using GSL")
|
23
23
|
rb.add(lr.summary)
|
@@ -25,12 +25,6 @@ if HAS_GSL
|
|
25
25
|
end
|
26
26
|
|
27
27
|
|
28
|
-
if HAS_ALGIB
|
29
|
-
x.report("Alglib:") {
|
30
|
-
lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y', :name=>"Multiple Regression using Alglib")
|
31
|
-
rb.add(lr.summary)
|
32
|
-
}
|
33
|
-
end
|
34
28
|
x.report("Ruby:") {
|
35
29
|
lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y',:name=>"Multiple Regression using RubyEngine")
|
36
30
|
rb.add(lr.summary)
|
data/demo/polychoric.rb
CHANGED
@@ -6,6 +6,7 @@ ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
|
6
6
|
|
7
7
|
# Estimation of polychoric correlation using two-step (default)
|
8
8
|
poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
|
9
|
+
|
9
10
|
puts poly.summary
|
10
11
|
|
11
12
|
# Estimation of polychoric correlation using joint method (slow)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
matrix=Matrix[
|
6
|
+
[1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
|
7
|
+
fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1,:smc=>false)
|
8
|
+
puts fa.summary
|
data/lib/distribution/f.rb
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
module Distribution
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
2
|
+
# Calculate cdf and inverse cdf for Fisher Distribution.
|
3
|
+
# Uses Statistics2 module
|
4
|
+
module F
|
5
|
+
class << self
|
6
|
+
# Return the P-value of the corresponding integral with
|
7
|
+
# k degrees of freedom
|
8
|
+
#
|
9
|
+
# Distribution::F.p_value(0.95,1,2)
|
10
|
+
def p_value(pr,k1,k2)
|
11
|
+
Statistics2.pfdist(k1,k2, pr)
|
12
|
+
end
|
13
|
+
# F cumulative distribution function (cdf).
|
14
|
+
#
|
15
|
+
# Returns the integral of F-distribution
|
16
|
+
# with k1 and k2 degrees of freedom
|
17
|
+
# over [0, x].
|
18
|
+
# Distribution::F.cdf(20,3,2)
|
19
|
+
#
|
20
|
+
def cdf(x, k1, k2)
|
21
|
+
Statistics2.fdist(k1, k2,x)
|
22
|
+
end
|
24
23
|
end
|
24
|
+
end
|
25
25
|
end
|
data/lib/spss.rb
CHANGED
@@ -7,114 +7,114 @@
|
|
7
7
|
# Claudio Bustos mailto:clbustos@gmail.com
|
8
8
|
|
9
9
|
module SPSS # :nodoc: all
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
end
|
44
|
-
def to_spss
|
45
|
-
parse_elements(:to_spss)
|
46
|
-
end
|
47
|
-
end
|
10
|
+
module Dictionary
|
11
|
+
class Element
|
12
|
+
def add(a)
|
13
|
+
@elements.push(a)
|
14
|
+
end
|
15
|
+
def parse_elements(func=:to_s)
|
16
|
+
@elements.collect{|e| " "+e.send(func)}.join("\n")
|
17
|
+
end
|
18
|
+
def init_with config
|
19
|
+
config.each {|key,value|
|
20
|
+
self.send(key.to_s+"=",value) if methods.include? key.to_s
|
21
|
+
}
|
22
|
+
end
|
23
|
+
def initialize(config={})
|
24
|
+
@config=config
|
25
|
+
@elements=[]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
class Dictionary < Element
|
29
|
+
attr_accessor :locale, :date_time, :row_count
|
30
|
+
def initialize(config={})
|
31
|
+
super
|
32
|
+
init_with ({
|
33
|
+
:locale=>"en_US",
|
34
|
+
:date_time=>Time.new().strftime("%Y-%m-%dT%H:%M:%S"),
|
35
|
+
:row_count=>1
|
36
|
+
})
|
37
|
+
init_with config
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_xml
|
41
|
+
"<dictionary locale='#{@locale}' creationDateTime='#{@date_time}' rowCount='#{@row_count}' xmlns='http://xml.spss.com/spss/data'>\n"+parse_elements(:to_xml)+"\n</dictionary>"
|
48
42
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
class LabelSet
|
64
|
-
attr_accessor
|
65
|
-
def initialize(labels)
|
66
|
-
@labels=labels
|
67
|
-
end
|
68
|
-
def parse_xml(name)
|
69
|
-
"<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
|
70
|
-
end
|
71
|
-
def parse_spss()
|
72
|
-
@labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
|
73
|
-
end
|
43
|
+
end
|
44
|
+
def to_spss
|
45
|
+
parse_elements(:to_spss)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class MissingValue < Element
|
50
|
+
attr_accessor :data, :type, :from, :to
|
51
|
+
def initialize(data,type=nil)
|
52
|
+
@data=data
|
53
|
+
if type.nil? or type=="lowerBound" or type=="upperBound"
|
54
|
+
@type=type
|
55
|
+
else
|
56
|
+
raise Exception,"Incorrect value for type"
|
74
57
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
58
|
+
end
|
59
|
+
def to_xml
|
60
|
+
"<missingValue data='#{@data}' "+(type.nil? ? "":"type='#{type}'")+"/>"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
class LabelSet
|
64
|
+
attr_accessor
|
65
|
+
def initialize(labels)
|
66
|
+
@labels=labels
|
67
|
+
end
|
68
|
+
def parse_xml(name)
|
69
|
+
"<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
|
70
|
+
end
|
71
|
+
def parse_spss()
|
72
|
+
@labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
class Variable < Element
|
76
|
+
attr_accessor :aligment, :display_width, :label, :measurement_level, :name, :type, :decimals, :width, :type_format, :labelset, :missing_values
|
77
|
+
def initialize(config={})
|
78
|
+
super
|
79
|
+
@@var_number||=1
|
80
|
+
init_with({
|
81
|
+
:aligment => "left",
|
82
|
+
:display_width => 8,
|
83
|
+
:label => "Variable #{@@var_number}",
|
84
|
+
:measurement_level => "SCALE",
|
85
|
+
:name => "var#{@@var_number}",
|
86
|
+
:type => 0,
|
87
|
+
:decimals => 2,
|
88
|
+
:width => 10,
|
89
|
+
:type_format => "F",
|
90
|
+
:labelset => nil
|
91
|
+
})
|
92
|
+
init_with config
|
93
|
+
@missing_values=[]
|
94
|
+
@@var_number+=1
|
95
|
+
end
|
96
|
+
def to_xml
|
97
|
+
labelset_s=(@labelset.nil?) ? "":"\n"+@labelset.parse_xml(@name)
|
98
|
+
missing_values=(@missing_values.size>0) ? @missing_values.collect {|m| m.to_xml}.join("\n"):""
|
99
|
+
"<variable aligment='#{@aligment}' displayWidth='#{@display_width}' label='#{@label}' measurementLevel='#{@measurement_level}' name='#{@name}' type='#{@type}'>\n<variableFormat decimals='#{@decimals}' width='#{@width}' type='#{@type_format}' />\n"+parse_elements(:to_xml)+missing_values+"</variable>"+labelset_s
|
100
|
+
end
|
101
|
+
def to_spss
|
102
|
+
out=<<HERE
|
103
103
|
VARIABLE LABELS #{@name} '#{label}' .
|
104
104
|
VARIABLE ALIGMENT #{@name} (#{@aligment.upcase}) .
|
105
105
|
VARIABLE WIDTH #{@name} (#{@display_width}) .
|
106
106
|
VARIABLE LEVEL #{@name} (#{@measurement_level.upcase}) .
|
107
107
|
HERE
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
end
|
114
|
-
out
|
115
|
-
end
|
108
|
+
if !@labelset.nil?
|
109
|
+
out << "VALUE LABELS #{@name} "+labelset.parse_spss()+" ."
|
110
|
+
end
|
111
|
+
if @missing_values.size>0
|
112
|
+
out << "MISSING VALUES #{@name} ("+@missing_values.collect{|m| m.data}.join(",")+") ."
|
116
113
|
end
|
114
|
+
out
|
115
|
+
end
|
117
116
|
end
|
117
|
+
end
|
118
118
|
end
|
119
119
|
n=SPSS::Dictionary::Dictionary.new
|
120
120
|
ls=SPSS::Dictionary::LabelSet.new({1=>"Si",2=>"No"})
|
@@ -22,7 +22,7 @@ module Statsample
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
end
|
25
|
-
#
|
25
|
+
# = Polychoric correlation.
|
26
26
|
#
|
27
27
|
# The <em>polychoric</em> correlation is a measure of
|
28
28
|
# bivariate association arising when both observed variates
|
@@ -36,10 +36,33 @@ module Statsample
|
|
36
36
|
# 2. Two-step estimator and
|
37
37
|
# 3. Polychoric series estimate.
|
38
38
|
#
|
39
|
-
# By default,
|
40
|
-
# the estimation method with method attribute
|
39
|
+
# By default, two-step estimation are used. You can select
|
40
|
+
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
|
41
41
|
#
|
42
|
-
#
|
42
|
+
# == Use
|
43
|
+
#
|
44
|
+
# You should enter a Matrix with ordered data. For example:
|
45
|
+
# -------------------
|
46
|
+
# | y=0 | y=1 | y=2 |
|
47
|
+
# -------------------
|
48
|
+
# x = 0 | 1 | 10 | 20 |
|
49
|
+
# -------------------
|
50
|
+
# x = 1 | 20 | 20 | 50 |
|
51
|
+
# -------------------
|
52
|
+
#
|
53
|
+
# The code will be
|
54
|
+
#
|
55
|
+
# matrix=Matrix[[1,10,20],[20,20,50]]
|
56
|
+
# poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
|
57
|
+
# puts poly.r
|
58
|
+
#
|
59
|
+
# See extensive documentation on Uebersax(2002) and Drasgow(2006)
|
60
|
+
#
|
61
|
+
# == References
|
62
|
+
#
|
63
|
+
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
64
|
+
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
65
|
+
|
43
66
|
class Polychoric
|
44
67
|
include GetText
|
45
68
|
bindtextdomain("statsample")
|
@@ -86,23 +109,10 @@ module Statsample
|
|
86
109
|
def new_with_vectors(v1,v2)
|
87
110
|
Polychoric.new(Crosstab.new(v1,v2).to_matrix)
|
88
111
|
end
|
89
|
-
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
|
93
|
-
# -------------------
|
94
|
-
# x = 0 | 1 | 10 | 20 |
|
95
|
-
# -------------------
|
96
|
-
# x = 1 | 20 | 20 | 50 |
|
97
|
-
# -------------------
|
98
|
-
#
|
99
|
-
# The code will be
|
100
|
-
#
|
101
|
-
# matrix=Matrix[[1,10,20],[20,20,50]]
|
102
|
-
# poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
|
103
|
-
# puts poly.r
|
104
|
-
|
105
|
-
|
112
|
+
# Params:
|
113
|
+
# * matrix: Contingence table
|
114
|
+
# * opts: Any attribute
|
115
|
+
|
106
116
|
def initialize(matrix, opts=Hash.new)
|
107
117
|
@matrix=matrix
|
108
118
|
@n=matrix.column_size
|
@@ -309,7 +319,7 @@ module Statsample
|
|
309
319
|
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
310
320
|
#
|
311
321
|
def compute_two_step_mle_drasgow
|
312
|
-
if
|
322
|
+
if Statsample.has_gsl?
|
313
323
|
compute_two_step_mle_drasgow_gsl
|
314
324
|
else
|
315
325
|
compute_two_step_mle_drasgow_ruby
|