statsample 0.6.5 → 0.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
data/History.txt
CHANGED
@@ -1,4 +1,19 @@
|
|
1
|
+
=== 0.6.7 / 2010-03-23
|
2
|
+
* Bug fix: dependency on Reportbuilder should be set to "~>0.2.0", not "0.2"
|
3
|
+
=== 0.6.6 / 2010-03-22
|
4
|
+
* Set ReportBuilder dependency to '0.2.~' version, because future API break
|
5
|
+
* Removed Alglib dependency
|
6
|
+
* Factor::PrincipalAxis and Factor::PCA reworked
|
7
|
+
* Standarization of documentation on almost every file
|
8
|
+
* New Statsample::Test::Levene, to test equality of variances
|
9
|
+
* Constant HAS_GSL replaced by Statsample.has_gsl?
|
10
|
+
* PCA and Principal Axis test based on R and SPSS results
|
11
|
+
* Bug fix on test_dataset.rb / test_saveload
|
12
|
+
* Added Rakefile
|
13
|
+
* Demos for levene, Principal Axis
|
14
|
+
|
1
15
|
=== 0.6.5 / 2010-02-24
|
16
|
+
|
2
17
|
* Bug fix on test: Use tempfile instead of tempdir
|
3
18
|
* Multiple Regression: Calculation of constant standard error , using covariance matrix.
|
4
19
|
* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
|
data/Manifest.txt
CHANGED
@@ -2,6 +2,7 @@ History.txt
|
|
2
2
|
LICENSE.txt
|
3
3
|
Manifest.txt
|
4
4
|
README.txt
|
5
|
+
Rakefile
|
5
6
|
bin/statsample
|
6
7
|
data/crime.txt
|
7
8
|
data/locale/es/LC_MESSAGES/statsample.mo
|
@@ -12,9 +13,11 @@ data/tetmat_test.txt
|
|
12
13
|
demo/correlation_matrix.rb
|
13
14
|
demo/dominance_analysis_bootstrap.rb
|
14
15
|
demo/dominanceanalysis.rb
|
16
|
+
demo/levene.rb
|
15
17
|
demo/multiple_regression.rb
|
16
18
|
demo/multivariate_correlation.rb
|
17
19
|
demo/polychoric.rb
|
20
|
+
demo/principal_axis.rb
|
18
21
|
demo/tetrachoric.rb
|
19
22
|
lib/distribution.rb
|
20
23
|
lib/distribution/chisquare.rb
|
@@ -71,8 +74,10 @@ lib/statsample/reliability.rb
|
|
71
74
|
lib/statsample/resample.rb
|
72
75
|
lib/statsample/srs.rb
|
73
76
|
lib/statsample/test.rb
|
77
|
+
lib/statsample/test/levene.rb
|
74
78
|
lib/statsample/test/umannwhitney.rb
|
75
79
|
lib/statsample/vector.rb
|
80
|
+
po/es/statsample.mo
|
76
81
|
po/es/statsample.po
|
77
82
|
po/statsample.pot
|
78
83
|
setup.rb
|
@@ -100,6 +105,7 @@ test/test_reliability.rb
|
|
100
105
|
test/test_resample.rb
|
101
106
|
test/test_srs.rb
|
102
107
|
test/test_statistics.rb
|
108
|
+
test/test_stest.rb
|
103
109
|
test/test_stratified.rb
|
104
110
|
test/test_svg_graph.rb
|
105
111
|
test/test_umannwhitney.rb
|
data/README.txt
CHANGED
@@ -10,25 +10,43 @@ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.
|
|
10
10
|
Includes:
|
11
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
12
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
13
|
-
* Correlations: Pearson
|
14
|
-
* Regression: Simple, Multiple, Probit
|
13
|
+
* Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
|
14
|
+
* Regression: Simple, Multiple, Probit and Logit
|
15
15
|
* Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
|
16
16
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
17
17
|
* Sample calculation related formulas
|
18
18
|
|
19
19
|
== FEATURES:
|
20
20
|
|
21
|
-
*
|
22
|
-
*
|
23
|
-
*
|
24
|
-
*
|
25
|
-
*
|
26
|
-
*
|
27
|
-
*
|
28
|
-
*
|
29
|
-
*
|
21
|
+
* Classes for manipulation and storage of data:
|
22
|
+
* Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
|
23
|
+
* Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
|
24
|
+
* Statsample::Multiset: multiple datasets with same fields and type of vectors
|
25
|
+
* Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
|
26
|
+
* Multiple types of regression.
|
27
|
+
* Simple Regression : Statsample::Regression::Simple
|
28
|
+
* Multiple Regression: Statsample::Regression::Multiple
|
29
|
+
* Logit Regression: Statsample::Regression::Binomial::Logit
|
30
|
+
* Probit Regression: Statsample::Regression::Binomial::Probit
|
31
|
+
* Factorial Analysis algorithms on Statsample::Factor module.
|
32
|
+
* Classes for Extraction of factors:
|
33
|
+
* Statsample::Factor::PCA
|
34
|
+
* Statsample::Factor::PrincipalAxis
|
35
|
+
* Classes for Rotation of factors:
|
36
|
+
* Statsample::Factor::Varimax
|
37
|
+
* Statsample::Factor::Equimax
|
38
|
+
* Statsample::Factor::Quartimax
|
39
|
+
* Dominance Analysis. Based on Budescu and Azen papers, Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables and DominanceAnalysisBootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
|
40
|
+
* Module Statsample::Codification, to help to codify open questions
|
41
|
+
* Converters to import and export data:
|
42
|
+
* Statsample::Database : Can create sql to create tables, read and insert data
|
43
|
+
* Statsample::CSV : Read and write CSV files
|
44
|
+
* Statsample::Excel : Read and write Excel files
|
45
|
+
* Statsample::Mx : Write Mx Files
|
46
|
+
* Statsample::GGobi : Write Ggobi files
|
47
|
+
* Module Statsample::Crosstab provides function to create crosstab for categorical data
|
30
48
|
* Reliability analysis provides functions to analyze scales. Class ItemAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted. With HtmlReport, graph the histogram of the scale and the Item Characteristic Curve for each item
|
31
|
-
* Module SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
49
|
+
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
32
50
|
* Interfaces to gdchart, gnuplot and SVG::Graph
|
33
51
|
|
34
52
|
|
data/Rakefile
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
# -*- coding: utf-8 -*-
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hoe'
|
7
|
+
require './lib/statsample'
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
if File.exists? './local_rakefile.rb'
|
14
|
+
require './local_rakefile'
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Ruby Lint"
|
18
|
+
task :lint do
|
19
|
+
executable=Config::CONFIG['RUBY_INSTALL_NAME']
|
20
|
+
Dir.glob("lib/**/*.rb") {|f|
|
21
|
+
if !system %{#{executable} -w -c "#{f}"}
|
22
|
+
puts "Error on: #{f}"
|
23
|
+
end
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
desc "Update pot/po files."
|
29
|
+
task :updatepo do
|
30
|
+
require 'gettext/tools'
|
31
|
+
GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
|
32
|
+
end
|
33
|
+
|
34
|
+
desc "Create mo-files"
|
35
|
+
task :makemo do
|
36
|
+
require 'gettext/tools'
|
37
|
+
GetText.create_mofiles()
|
38
|
+
# GetText.create_mofiles(true, "po", "locale") # This is for "Ruby on Rails".
|
39
|
+
end
|
40
|
+
|
41
|
+
h=Hoe.spec('statsample') do
|
42
|
+
self.version=Statsample::VERSION
|
43
|
+
self.rubyforge_name = "ruby-statsample"
|
44
|
+
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
45
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>0.2.0"] << ["minimization", "~>0.1.0"]
|
46
|
+
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
47
|
+
self.need_rdoc=false
|
48
|
+
end
|
49
|
+
|
50
|
+
Rake::RDocTask.new(:docs) do |rd|
|
51
|
+
rd.main = h.readme_file
|
52
|
+
rd.options << '-d' if (`which dot` =~ /\/dot/) unless
|
53
|
+
ENV['NODOT'] || Hoe::WINDOZE
|
54
|
+
rd.rdoc_dir = 'doc'
|
55
|
+
|
56
|
+
rd.rdoc_files.include("lib/**/*.rb")
|
57
|
+
rd.rdoc_files += h.spec.extra_rdoc_files
|
58
|
+
rd.rdoc_files.reject! {|f| f=="Manifest.txt"}
|
59
|
+
title = h.spec.rdoc_options.grep(/^(-t|--title)=?$/).first
|
60
|
+
if title then
|
61
|
+
rd.options << title
|
62
|
+
|
63
|
+
unless title =~ /\=/ then # for ['-t', 'title here']
|
64
|
+
title_index = spec.rdoc_options.index(title)
|
65
|
+
rd.options << spec.rdoc_options[title_index + 1]
|
66
|
+
end
|
67
|
+
else
|
68
|
+
title = "#{h.name}-#{h.version} Documentation"
|
69
|
+
title = "#{h.rubyforge_name}'s " + title if h.rubyforge_name != h.name
|
70
|
+
rd.options << '--title' << title
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
desc 'publicar a rdocs con analytics'
|
76
|
+
task :publicar_docs => [:clean, :docs] do
|
77
|
+
ruby %{agregar_adsense_a_doc.rb}
|
78
|
+
path = File.expand_path("~/.rubyforge/user-config.yml")
|
79
|
+
config = YAML.load(File.read(path))
|
80
|
+
host = "#{config["username"]}@rubyforge.org"
|
81
|
+
|
82
|
+
remote_dir = "/var/www/gforge-projects/#{h.rubyforge_name}/#{h.remote_rdoc_dir
|
83
|
+
}"
|
84
|
+
local_dir = h.local_rdoc_dir
|
85
|
+
Dir.glob(local_dir+"/**/*") {|file|
|
86
|
+
sh %{chmod 755 #{file}}
|
87
|
+
}
|
88
|
+
sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
|
89
|
+
end
|
90
|
+
|
91
|
+
# vim: syntax=Ruby
|
data/demo/levene.rb
ADDED
data/demo/multiple_regression.rb
CHANGED
@@ -17,7 +17,7 @@ Benchmark.bm(7) do |x|
|
|
17
17
|
|
18
18
|
rb=ReportBuilder.new("Multiple Regression Engines")
|
19
19
|
|
20
|
-
if
|
20
|
+
if Statsample.has_gsl?
|
21
21
|
x.report("GSL:") {
|
22
22
|
lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y',:name=>"Multiple Regression using GSL")
|
23
23
|
rb.add(lr.summary)
|
@@ -25,12 +25,6 @@ if HAS_GSL
|
|
25
25
|
end
|
26
26
|
|
27
27
|
|
28
|
-
if HAS_ALGIB
|
29
|
-
x.report("Alglib:") {
|
30
|
-
lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y', :name=>"Multiple Regression using Alglib")
|
31
|
-
rb.add(lr.summary)
|
32
|
-
}
|
33
|
-
end
|
34
28
|
x.report("Ruby:") {
|
35
29
|
lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y',:name=>"Multiple Regression using RubyEngine")
|
36
30
|
rb.add(lr.summary)
|
data/demo/polychoric.rb
CHANGED
@@ -6,6 +6,7 @@ ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
|
6
6
|
|
7
7
|
# Estimation of polychoric correlation using two-step (default)
|
8
8
|
poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
|
9
|
+
|
9
10
|
puts poly.summary
|
10
11
|
|
11
12
|
# Estimation of polychoric correlation using joint method (slow)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
matrix=Matrix[
|
6
|
+
[1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
|
7
|
+
fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1,:smc=>false)
|
8
|
+
puts fa.summary
|
data/lib/distribution/f.rb
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
module Distribution
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
2
|
+
# Calculate cdf and inverse cdf for Fisher Distribution.
|
3
|
+
# Uses Statistics2 module
|
4
|
+
module F
|
5
|
+
class << self
|
6
|
+
# Return the P-value of the corresponding integral with
|
7
|
+
# k degrees of freedom
|
8
|
+
#
|
9
|
+
# Distribution::F.p_value(0.95,1,2)
|
10
|
+
def p_value(pr,k1,k2)
|
11
|
+
Statistics2.pfdist(k1,k2, pr)
|
12
|
+
end
|
13
|
+
# F cumulative distribution function (cdf).
|
14
|
+
#
|
15
|
+
# Returns the integral of F-distribution
|
16
|
+
# with k1 and k2 degrees of freedom
|
17
|
+
# over [0, x].
|
18
|
+
# Distribution::F.cdf(20,3,2)
|
19
|
+
#
|
20
|
+
def cdf(x, k1, k2)
|
21
|
+
Statistics2.fdist(k1, k2,x)
|
22
|
+
end
|
24
23
|
end
|
24
|
+
end
|
25
25
|
end
|
data/lib/spss.rb
CHANGED
@@ -7,114 +7,114 @@
|
|
7
7
|
# Claudio Bustos mailto:clbustos@gmail.com
|
8
8
|
|
9
9
|
module SPSS # :nodoc: all
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
end
|
44
|
-
def to_spss
|
45
|
-
parse_elements(:to_spss)
|
46
|
-
end
|
47
|
-
end
|
10
|
+
module Dictionary
|
11
|
+
class Element
|
12
|
+
def add(a)
|
13
|
+
@elements.push(a)
|
14
|
+
end
|
15
|
+
def parse_elements(func=:to_s)
|
16
|
+
@elements.collect{|e| " "+e.send(func)}.join("\n")
|
17
|
+
end
|
18
|
+
def init_with config
|
19
|
+
config.each {|key,value|
|
20
|
+
self.send(key.to_s+"=",value) if methods.include? key.to_s
|
21
|
+
}
|
22
|
+
end
|
23
|
+
def initialize(config={})
|
24
|
+
@config=config
|
25
|
+
@elements=[]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
class Dictionary < Element
|
29
|
+
attr_accessor :locale, :date_time, :row_count
|
30
|
+
def initialize(config={})
|
31
|
+
super
|
32
|
+
init_with ({
|
33
|
+
:locale=>"en_US",
|
34
|
+
:date_time=>Time.new().strftime("%Y-%m-%dT%H:%M:%S"),
|
35
|
+
:row_count=>1
|
36
|
+
})
|
37
|
+
init_with config
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_xml
|
41
|
+
"<dictionary locale='#{@locale}' creationDateTime='#{@date_time}' rowCount='#{@row_count}' xmlns='http://xml.spss.com/spss/data'>\n"+parse_elements(:to_xml)+"\n</dictionary>"
|
48
42
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
class LabelSet
|
64
|
-
attr_accessor
|
65
|
-
def initialize(labels)
|
66
|
-
@labels=labels
|
67
|
-
end
|
68
|
-
def parse_xml(name)
|
69
|
-
"<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
|
70
|
-
end
|
71
|
-
def parse_spss()
|
72
|
-
@labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
|
73
|
-
end
|
43
|
+
end
|
44
|
+
def to_spss
|
45
|
+
parse_elements(:to_spss)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class MissingValue < Element
|
50
|
+
attr_accessor :data, :type, :from, :to
|
51
|
+
def initialize(data,type=nil)
|
52
|
+
@data=data
|
53
|
+
if type.nil? or type=="lowerBound" or type=="upperBound"
|
54
|
+
@type=type
|
55
|
+
else
|
56
|
+
raise Exception,"Incorrect value for type"
|
74
57
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
58
|
+
end
|
59
|
+
def to_xml
|
60
|
+
"<missingValue data='#{@data}' "+(type.nil? ? "":"type='#{type}'")+"/>"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
class LabelSet
|
64
|
+
attr_accessor
|
65
|
+
def initialize(labels)
|
66
|
+
@labels=labels
|
67
|
+
end
|
68
|
+
def parse_xml(name)
|
69
|
+
"<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
|
70
|
+
end
|
71
|
+
def parse_spss()
|
72
|
+
@labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
class Variable < Element
|
76
|
+
attr_accessor :aligment, :display_width, :label, :measurement_level, :name, :type, :decimals, :width, :type_format, :labelset, :missing_values
|
77
|
+
def initialize(config={})
|
78
|
+
super
|
79
|
+
@@var_number||=1
|
80
|
+
init_with({
|
81
|
+
:aligment => "left",
|
82
|
+
:display_width => 8,
|
83
|
+
:label => "Variable #{@@var_number}",
|
84
|
+
:measurement_level => "SCALE",
|
85
|
+
:name => "var#{@@var_number}",
|
86
|
+
:type => 0,
|
87
|
+
:decimals => 2,
|
88
|
+
:width => 10,
|
89
|
+
:type_format => "F",
|
90
|
+
:labelset => nil
|
91
|
+
})
|
92
|
+
init_with config
|
93
|
+
@missing_values=[]
|
94
|
+
@@var_number+=1
|
95
|
+
end
|
96
|
+
def to_xml
|
97
|
+
labelset_s=(@labelset.nil?) ? "":"\n"+@labelset.parse_xml(@name)
|
98
|
+
missing_values=(@missing_values.size>0) ? @missing_values.collect {|m| m.to_xml}.join("\n"):""
|
99
|
+
"<variable aligment='#{@aligment}' displayWidth='#{@display_width}' label='#{@label}' measurementLevel='#{@measurement_level}' name='#{@name}' type='#{@type}'>\n<variableFormat decimals='#{@decimals}' width='#{@width}' type='#{@type_format}' />\n"+parse_elements(:to_xml)+missing_values+"</variable>"+labelset_s
|
100
|
+
end
|
101
|
+
def to_spss
|
102
|
+
out=<<HERE
|
103
103
|
VARIABLE LABELS #{@name} '#{label}' .
|
104
104
|
VARIABLE ALIGMENT #{@name} (#{@aligment.upcase}) .
|
105
105
|
VARIABLE WIDTH #{@name} (#{@display_width}) .
|
106
106
|
VARIABLE LEVEL #{@name} (#{@measurement_level.upcase}) .
|
107
107
|
HERE
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
end
|
114
|
-
out
|
115
|
-
end
|
108
|
+
if !@labelset.nil?
|
109
|
+
out << "VALUE LABELS #{@name} "+labelset.parse_spss()+" ."
|
110
|
+
end
|
111
|
+
if @missing_values.size>0
|
112
|
+
out << "MISSING VALUES #{@name} ("+@missing_values.collect{|m| m.data}.join(",")+") ."
|
116
113
|
end
|
114
|
+
out
|
115
|
+
end
|
117
116
|
end
|
117
|
+
end
|
118
118
|
end
|
119
119
|
n=SPSS::Dictionary::Dictionary.new
|
120
120
|
ls=SPSS::Dictionary::LabelSet.new({1=>"Si",2=>"No"})
|
@@ -22,7 +22,7 @@ module Statsample
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
end
|
25
|
-
#
|
25
|
+
# = Polychoric correlation.
|
26
26
|
#
|
27
27
|
# The <em>polychoric</em> correlation is a measure of
|
28
28
|
# bivariate association arising when both observed variates
|
@@ -36,10 +36,33 @@ module Statsample
|
|
36
36
|
# 2. Two-step estimator and
|
37
37
|
# 3. Polychoric series estimate.
|
38
38
|
#
|
39
|
-
# By default,
|
40
|
-
# the estimation method with method attribute
|
39
|
+
# By default, two-step estimation are used. You can select
|
40
|
+
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
|
41
41
|
#
|
42
|
-
#
|
42
|
+
# == Use
|
43
|
+
#
|
44
|
+
# You should enter a Matrix with ordered data. For example:
|
45
|
+
# -------------------
|
46
|
+
# | y=0 | y=1 | y=2 |
|
47
|
+
# -------------------
|
48
|
+
# x = 0 | 1 | 10 | 20 |
|
49
|
+
# -------------------
|
50
|
+
# x = 1 | 20 | 20 | 50 |
|
51
|
+
# -------------------
|
52
|
+
#
|
53
|
+
# The code will be
|
54
|
+
#
|
55
|
+
# matrix=Matrix[[1,10,20],[20,20,50]]
|
56
|
+
# poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
|
57
|
+
# puts poly.r
|
58
|
+
#
|
59
|
+
# See extensive documentation on Uebersax(2002) and Drasgow(2006)
|
60
|
+
#
|
61
|
+
# == References
|
62
|
+
#
|
63
|
+
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
64
|
+
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
65
|
+
|
43
66
|
class Polychoric
|
44
67
|
include GetText
|
45
68
|
bindtextdomain("statsample")
|
@@ -86,23 +109,10 @@ module Statsample
|
|
86
109
|
def new_with_vectors(v1,v2)
|
87
110
|
Polychoric.new(Crosstab.new(v1,v2).to_matrix)
|
88
111
|
end
|
89
|
-
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
|
93
|
-
# -------------------
|
94
|
-
# x = 0 | 1 | 10 | 20 |
|
95
|
-
# -------------------
|
96
|
-
# x = 1 | 20 | 20 | 50 |
|
97
|
-
# -------------------
|
98
|
-
#
|
99
|
-
# The code will be
|
100
|
-
#
|
101
|
-
# matrix=Matrix[[1,10,20],[20,20,50]]
|
102
|
-
# poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
|
103
|
-
# puts poly.r
|
104
|
-
|
105
|
-
|
112
|
+
# Params:
|
113
|
+
# * matrix: Contingence table
|
114
|
+
# * opts: Any attribute
|
115
|
+
|
106
116
|
def initialize(matrix, opts=Hash.new)
|
107
117
|
@matrix=matrix
|
108
118
|
@n=matrix.column_size
|
@@ -309,7 +319,7 @@ module Statsample
|
|
309
319
|
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
310
320
|
#
|
311
321
|
def compute_two_step_mle_drasgow
|
312
|
-
if
|
322
|
+
if Statsample.has_gsl?
|
313
323
|
compute_two_step_mle_drasgow_gsl
|
314
324
|
else
|
315
325
|
compute_two_step_mle_drasgow_ruby
|