statsample 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -0
- data/Manifest.txt +7 -0
- data/README.txt +3 -3
- data/data/repeated_fields.csv +7 -0
- data/data/tetmat_matrix.txt +5 -0
- data/data/tetmat_test.txt +1001 -0
- data/demo/spss_matrix.rb +3 -0
- data/lib/spss.rb +1 -1
- data/lib/statistics2.rb +1 -1
- data/lib/statsample.rb +30 -1
- data/lib/statsample/anova.rb +62 -66
- data/lib/statsample/bivariate.rb +273 -281
- data/lib/statsample/bivariate/tetrachoric.rb +418 -0
- data/lib/statsample/codification.rb +15 -15
- data/lib/statsample/combination.rb +108 -106
- data/lib/statsample/converter/csv18.rb +52 -52
- data/lib/statsample/converter/csv19.rb +45 -48
- data/lib/statsample/converter/spss.rb +47 -0
- data/lib/statsample/converters.rb +74 -77
- data/lib/statsample/crosstab.rb +21 -17
- data/lib/statsample/dataset.rb +595 -543
- data/lib/statsample/dominanceanalysis.rb +7 -10
- data/lib/statsample/htmlreport.rb +23 -0
- data/lib/statsample/regression/multiple/baseengine.rb +59 -59
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/reliability.rb +165 -145
- data/lib/statsample/vector.rb +16 -2
- data/test/test_anova.rb +16 -16
- data/test/test_bivariate.rb +146 -0
- data/test/test_csv.rb +6 -0
- data/test/test_dataset.rb +49 -5
- data/test/test_statistics.rb +6 -90
- data/test/test_vector.rb +27 -10
- metadata +10 -4
- data/test/test_r.rb +0 -9
- data/test/test_stata.rb +0 -11
@@ -1,113 +1,115 @@
|
|
1
1
|
module Statsample
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
2
|
+
# Combination class systematically generates all combinations of n elements, taken r at a time.
|
3
|
+
# With rbgsl, GSL::Combination is available for extra speed
|
4
|
+
# Source: http://snippets.dzone.com/posts/show/4666
|
5
|
+
# Use:
|
6
|
+
# comb=Statsample::Combination.new(3,5)
|
7
|
+
# => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
|
8
|
+
# comb.each{|c| p c }
|
9
|
+
# [0, 1, 2]
|
10
|
+
# [0, 1, 3]
|
11
|
+
# [0, 1, 4]
|
12
|
+
# [0, 2, 3]
|
13
|
+
# [0, 2, 4]
|
14
|
+
# [0, 3, 4]
|
15
|
+
# [1, 2, 3]
|
16
|
+
# [1, 2, 4]
|
17
|
+
# [1, 3, 4]
|
18
|
+
# [2, 3, 4]
|
19
|
+
#
|
20
|
+
class Combination
|
21
|
+
attr_reader :d
|
22
|
+
def initialize(k,n,only_ruby=false)
|
23
|
+
@k=k
|
24
|
+
@n=n
|
25
|
+
if HAS_GSL and !only_ruby
|
26
|
+
@d=CombinationGsl.new(@k,@n)
|
27
|
+
else
|
28
|
+
@d=CombinationRuby.new(@k,@n)
|
30
29
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
def reset
|
38
|
-
@d.reset
|
39
|
-
end
|
40
|
-
def next_value
|
41
|
-
@d.next_value
|
42
|
-
end
|
43
|
-
class CombinationRuby
|
44
|
-
attr_reader :data
|
45
|
-
def initialize(k,n)
|
46
|
-
raise "k<=n" if k>n
|
47
|
-
@k=k
|
48
|
-
@n=n
|
49
|
-
reset
|
50
|
-
end
|
51
|
-
def reset
|
52
|
-
@data=[]
|
53
|
-
(0...@k).each {|i|
|
54
|
-
@data[i] = i;
|
55
|
-
}
|
56
|
-
end
|
57
|
-
def each
|
58
|
-
reset
|
59
|
-
while a=next_value
|
60
|
-
yield a
|
61
|
-
end
|
62
|
-
end
|
63
|
-
def next_value
|
64
|
-
return false if !@data
|
65
|
-
old_comb=@data.dup
|
66
|
-
i = @k - 1;
|
67
|
-
@data[i]+=1
|
68
|
-
while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
|
69
|
-
i-=1;
|
70
|
-
@data[i]+=1;
|
71
|
-
end
|
72
|
-
|
73
|
-
if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
|
74
|
-
@data=false # No more combinations can be generated
|
75
|
-
else
|
76
|
-
# comb now looks like (..., x, n, n, n, ..., n).
|
77
|
-
# Turn it into (..., x, x + 1, x + 2, ...)
|
78
|
-
i = i+1
|
79
|
-
(i...@k).each{ |i1|
|
80
|
-
@data[i1] = @data[i1 - 1] + 1
|
81
|
-
}
|
82
|
-
end
|
83
|
-
return old_comb
|
30
|
+
end
|
31
|
+
def each
|
32
|
+
reset
|
33
|
+
while a=next_value
|
34
|
+
yield a
|
84
35
|
end
|
85
36
|
end
|
37
|
+
def reset
|
38
|
+
@d.reset
|
39
|
+
end
|
40
|
+
def next_value
|
41
|
+
@d.next_value
|
42
|
+
end
|
43
|
+
|
44
|
+
# Ruby engine for Combinations
|
45
|
+
class CombinationRuby
|
46
|
+
attr_reader :data
|
47
|
+
def initialize(k,n)
|
48
|
+
raise "k<=n" if k>n
|
49
|
+
@k=k
|
50
|
+
@n=n
|
51
|
+
reset
|
52
|
+
end
|
53
|
+
def reset
|
54
|
+
@data=[]
|
55
|
+
(0...@k).each {|i| @data[i] = i }
|
56
|
+
end
|
57
|
+
def each
|
58
|
+
reset
|
59
|
+
while a=next_value
|
60
|
+
yield a
|
61
|
+
end
|
62
|
+
end
|
63
|
+
def next_value
|
64
|
+
return false if !@data
|
65
|
+
old_comb=@data.dup
|
66
|
+
i = @k - 1;
|
67
|
+
@data[i]+=1
|
68
|
+
while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
|
69
|
+
i-=1;
|
70
|
+
@data[i]+=1;
|
71
|
+
end
|
72
|
+
|
73
|
+
if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
|
74
|
+
@data=false # No more combinations can be generated
|
75
|
+
else
|
76
|
+
# comb now looks like (..., x, n, n, n, ..., n).
|
77
|
+
# Turn it into (..., x, x + 1, x + 2, ...)
|
78
|
+
i = i+1
|
79
|
+
(i...@k).each{ |i1|
|
80
|
+
@data[i1] = @data[i1 - 1] + 1
|
81
|
+
}
|
82
|
+
end
|
83
|
+
return old_comb
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# rb-gsl engine for Combinations
|
86
88
|
class CombinationGsl
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
89
|
+
def initialize(k,n)
|
90
|
+
require 'gsl'
|
91
|
+
raise "k<=n" if k>n
|
92
|
+
@k=k
|
93
|
+
@n=n
|
94
|
+
reset
|
95
|
+
end
|
96
|
+
def reset
|
97
|
+
@c= ::GSL::Combination.calloc(@n, @k);
|
98
|
+
end
|
99
|
+
def next_value
|
100
|
+
return false if !@c
|
101
|
+
data=@c.data.to_a
|
102
|
+
if @c.next != GSL::SUCCESS
|
103
|
+
@c=false
|
104
|
+
end
|
105
|
+
return data
|
106
|
+
end
|
107
|
+
def each
|
108
|
+
reset
|
109
|
+
begin
|
110
|
+
yield @c.data.to_a
|
111
|
+
end while @c.next == GSL::SUCCESS
|
112
|
+
end
|
111
113
|
end
|
112
|
-
end
|
114
|
+
end
|
113
115
|
end
|
@@ -1,56 +1,56 @@
|
|
1
1
|
module Statsample
|
2
|
-
class CSV < SpreadsheetBase
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
class CSV < SpreadsheetBase
|
3
|
+
class << self
|
4
|
+
# Returns a Dataset based on a csv file
|
5
|
+
#
|
6
|
+
# USE:
|
7
|
+
# ds=Statsample::CSV.read("test_csv.csv")
|
8
|
+
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
9
|
require 'csv'
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
10
|
+
first_row=true
|
11
|
+
fields=[]
|
12
|
+
fields_data={}
|
13
|
+
ds=nil
|
14
|
+
line_number=0
|
15
|
+
|
16
|
+
::CSV.open(filename,'r',fs,rs) do |row|
|
17
|
+
line_number+=1
|
18
|
+
if(line_number<=ignore_lines)
|
19
|
+
#puts "Skip line"
|
20
|
+
next
|
21
|
+
end
|
22
|
+
row.collect!{|c|
|
23
|
+
c.to_s
|
24
|
+
}
|
25
|
+
if first_row
|
26
|
+
fields=extract_fields(row)
|
27
|
+
ds=Statsample::Dataset.new(fields)
|
28
|
+
first_row=false
|
29
|
+
else
|
30
|
+
rowa=process_row(row,empty)
|
31
|
+
ds.add_case(rowa,false)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
convert_to_scale(ds,fields)
|
35
|
+
ds.update_valid_data
|
36
|
+
ds
|
37
|
+
end
|
38
|
+
# Save a Dataset on a csv file
|
39
|
+
#
|
40
|
+
# USE:
|
41
|
+
# Statsample::CSV.write(ds,"test_csv.csv")
|
42
|
+
def write(dataset,filename, convert_comma=false,*opts)
|
43
|
+
require 'csv'
|
44
|
+
writer=::CSV.open(filename,'w',*opts)
|
45
|
+
writer << dataset.fields
|
46
|
+
dataset.each_array do |row|
|
47
|
+
if(convert_comma)
|
48
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
49
|
+
end
|
50
|
+
writer << row
|
51
|
+
end
|
52
|
+
writer.close
|
53
|
+
end
|
55
54
|
end
|
55
|
+
end
|
56
56
|
end
|
@@ -1,60 +1,57 @@
|
|
1
1
|
module Statsample
|
2
|
-
class CSV < SpreadsheetBase
|
3
|
-
|
2
|
+
class CSV < SpreadsheetBase
|
3
|
+
class << self
|
4
4
|
# Returns a Dataset based on a csv file
|
5
5
|
#
|
6
6
|
# USE:
|
7
7
|
# ds=Statsample::CSV.read("test_csv.csv")
|
8
|
-
|
8
|
+
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
9
|
require 'csv'
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
ds.update_valid_data
|
40
|
-
ds
|
41
|
-
end
|
10
|
+
first_row=true
|
11
|
+
fields=[]
|
12
|
+
fields_data={}
|
13
|
+
ds=nil
|
14
|
+
line_number=0
|
15
|
+
opts={}
|
16
|
+
opts[:col_sep]=fs unless fs.nil?
|
17
|
+
opts[:row_sep]=rs unless rs.nil?
|
18
|
+
csv=::CSV.open(filename,'r',opts)
|
19
|
+
csv.each do |row|
|
20
|
+
line_number+=1
|
21
|
+
if(line_number<=ignore_lines)
|
22
|
+
#puts "Skip line"
|
23
|
+
next
|
24
|
+
end
|
25
|
+
row.collect!{|c| c.to_s }
|
26
|
+
if first_row
|
27
|
+
fields=extract_fields(row)
|
28
|
+
ds=Statsample::Dataset.new(fields)
|
29
|
+
first_row=false
|
30
|
+
else
|
31
|
+
rowa=process_row(row,empty)
|
32
|
+
ds.add_case(rowa,false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
convert_to_scale(ds,fields)
|
36
|
+
ds.update_valid_data
|
37
|
+
ds
|
38
|
+
end
|
42
39
|
# Save a Dataset on a csv file
|
43
40
|
#
|
44
41
|
# USE:
|
45
42
|
# Statsample::CSV.write(ds,"test_csv.csv")
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
43
|
+
def write(dataset,filename, convert_comma=false,*opts)
|
44
|
+
require 'csv'
|
45
|
+
writer=::CSV.open(filename,'w',*opts)
|
46
|
+
writer << dataset.fields
|
47
|
+
dataset.each_array do|row|
|
48
|
+
if(convert_comma)
|
49
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
50
|
+
end
|
51
|
+
writer << row
|
52
|
+
end
|
53
|
+
writer.close
|
54
|
+
end
|
59
55
|
end
|
56
|
+
end
|
60
57
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Statsample
|
2
|
+
module SPSS
|
3
|
+
class << self
|
4
|
+
# Export a SPSS Matrix with tetrachoric correlations .
|
5
|
+
#
|
6
|
+
# Use:
|
7
|
+
# ds=Statsample::Excel.read("my_data.xls")
|
8
|
+
# puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
|
9
|
+
def tetrachoric_correlation_matrix(ds)
|
10
|
+
dsv=ds.dup_only_valid
|
11
|
+
# Delete all vectors doesn't have variation
|
12
|
+
dsv.fields.each{|f|
|
13
|
+
if dsv[f].factors.size==1
|
14
|
+
dsv.delete_vector(f)
|
15
|
+
else
|
16
|
+
dsv[f]=dsv[f].dichotomize
|
17
|
+
end
|
18
|
+
}
|
19
|
+
tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
|
20
|
+
n=dsv.fields.collect {|f|
|
21
|
+
sprintf("%d",dsv[f].size)
|
22
|
+
}
|
23
|
+
meanlist=dsv.fields.collect{|f|
|
24
|
+
sprintf("%0.3f", dsv[f].mean)
|
25
|
+
}
|
26
|
+
stddevlist=dsv.fields.collect{|f|
|
27
|
+
sprintf("%0.3f", dsv[f].sd)
|
28
|
+
}
|
29
|
+
out=<<-HEREDOC
|
30
|
+
MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
|
31
|
+
BEGIN DATA
|
32
|
+
N #{n.join(" ")}
|
33
|
+
MEAN #{meanlist.join(" ")}
|
34
|
+
STDDEV #{stddevlist.join(" ")}
|
35
|
+
HEREDOC
|
36
|
+
tcm.row_size.times {|i|
|
37
|
+
out +="CORR "
|
38
|
+
(i+1).times {|j|
|
39
|
+
out+=sprintf("%0.3f",tcm[i,j])+" "
|
40
|
+
}
|
41
|
+
out +="\n"
|
42
|
+
}
|
43
|
+
out+="END DATA.\nEXECUTE.\n"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|