statsample 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -0
- data/Manifest.txt +7 -0
- data/README.txt +3 -3
- data/data/repeated_fields.csv +7 -0
- data/data/tetmat_matrix.txt +5 -0
- data/data/tetmat_test.txt +1001 -0
- data/demo/spss_matrix.rb +3 -0
- data/lib/spss.rb +1 -1
- data/lib/statistics2.rb +1 -1
- data/lib/statsample.rb +30 -1
- data/lib/statsample/anova.rb +62 -66
- data/lib/statsample/bivariate.rb +273 -281
- data/lib/statsample/bivariate/tetrachoric.rb +418 -0
- data/lib/statsample/codification.rb +15 -15
- data/lib/statsample/combination.rb +108 -106
- data/lib/statsample/converter/csv18.rb +52 -52
- data/lib/statsample/converter/csv19.rb +45 -48
- data/lib/statsample/converter/spss.rb +47 -0
- data/lib/statsample/converters.rb +74 -77
- data/lib/statsample/crosstab.rb +21 -17
- data/lib/statsample/dataset.rb +595 -543
- data/lib/statsample/dominanceanalysis.rb +7 -10
- data/lib/statsample/htmlreport.rb +23 -0
- data/lib/statsample/regression/multiple/baseengine.rb +59 -59
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/reliability.rb +165 -145
- data/lib/statsample/vector.rb +16 -2
- data/test/test_anova.rb +16 -16
- data/test/test_bivariate.rb +146 -0
- data/test/test_csv.rb +6 -0
- data/test/test_dataset.rb +49 -5
- data/test/test_statistics.rb +6 -90
- data/test/test_vector.rb +27 -10
- metadata +10 -4
- data/test/test_r.rb +0 -9
- data/test/test_stata.rb +0 -11
@@ -1,113 +1,115 @@
|
|
1
1
|
module Statsample
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
2
|
+
# Combination class systematically generates all combinations of n elements, taken r at a time.
|
3
|
+
# With rbgsl, GSL::Combination is available for extra speed
|
4
|
+
# Source: http://snippets.dzone.com/posts/show/4666
|
5
|
+
# Use:
|
6
|
+
# comb=Statsample::Combination.new(3,5)
|
7
|
+
# => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
|
8
|
+
# comb.each{|c| p c }
|
9
|
+
# [0, 1, 2]
|
10
|
+
# [0, 1, 3]
|
11
|
+
# [0, 1, 4]
|
12
|
+
# [0, 2, 3]
|
13
|
+
# [0, 2, 4]
|
14
|
+
# [0, 3, 4]
|
15
|
+
# [1, 2, 3]
|
16
|
+
# [1, 2, 4]
|
17
|
+
# [1, 3, 4]
|
18
|
+
# [2, 3, 4]
|
19
|
+
#
|
20
|
+
class Combination
|
21
|
+
attr_reader :d
|
22
|
+
def initialize(k,n,only_ruby=false)
|
23
|
+
@k=k
|
24
|
+
@n=n
|
25
|
+
if HAS_GSL and !only_ruby
|
26
|
+
@d=CombinationGsl.new(@k,@n)
|
27
|
+
else
|
28
|
+
@d=CombinationRuby.new(@k,@n)
|
30
29
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
def reset
|
38
|
-
@d.reset
|
39
|
-
end
|
40
|
-
def next_value
|
41
|
-
@d.next_value
|
42
|
-
end
|
43
|
-
class CombinationRuby
|
44
|
-
attr_reader :data
|
45
|
-
def initialize(k,n)
|
46
|
-
raise "k<=n" if k>n
|
47
|
-
@k=k
|
48
|
-
@n=n
|
49
|
-
reset
|
50
|
-
end
|
51
|
-
def reset
|
52
|
-
@data=[]
|
53
|
-
(0...@k).each {|i|
|
54
|
-
@data[i] = i;
|
55
|
-
}
|
56
|
-
end
|
57
|
-
def each
|
58
|
-
reset
|
59
|
-
while a=next_value
|
60
|
-
yield a
|
61
|
-
end
|
62
|
-
end
|
63
|
-
def next_value
|
64
|
-
return false if !@data
|
65
|
-
old_comb=@data.dup
|
66
|
-
i = @k - 1;
|
67
|
-
@data[i]+=1
|
68
|
-
while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
|
69
|
-
i-=1;
|
70
|
-
@data[i]+=1;
|
71
|
-
end
|
72
|
-
|
73
|
-
if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
|
74
|
-
@data=false # No more combinations can be generated
|
75
|
-
else
|
76
|
-
# comb now looks like (..., x, n, n, n, ..., n).
|
77
|
-
# Turn it into (..., x, x + 1, x + 2, ...)
|
78
|
-
i = i+1
|
79
|
-
(i...@k).each{ |i1|
|
80
|
-
@data[i1] = @data[i1 - 1] + 1
|
81
|
-
}
|
82
|
-
end
|
83
|
-
return old_comb
|
30
|
+
end
|
31
|
+
def each
|
32
|
+
reset
|
33
|
+
while a=next_value
|
34
|
+
yield a
|
84
35
|
end
|
85
36
|
end
|
37
|
+
def reset
|
38
|
+
@d.reset
|
39
|
+
end
|
40
|
+
def next_value
|
41
|
+
@d.next_value
|
42
|
+
end
|
43
|
+
|
44
|
+
# Ruby engine for Combinations
|
45
|
+
class CombinationRuby
|
46
|
+
attr_reader :data
|
47
|
+
def initialize(k,n)
|
48
|
+
raise "k<=n" if k>n
|
49
|
+
@k=k
|
50
|
+
@n=n
|
51
|
+
reset
|
52
|
+
end
|
53
|
+
def reset
|
54
|
+
@data=[]
|
55
|
+
(0...@k).each {|i| @data[i] = i }
|
56
|
+
end
|
57
|
+
def each
|
58
|
+
reset
|
59
|
+
while a=next_value
|
60
|
+
yield a
|
61
|
+
end
|
62
|
+
end
|
63
|
+
def next_value
|
64
|
+
return false if !@data
|
65
|
+
old_comb=@data.dup
|
66
|
+
i = @k - 1;
|
67
|
+
@data[i]+=1
|
68
|
+
while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
|
69
|
+
i-=1;
|
70
|
+
@data[i]+=1;
|
71
|
+
end
|
72
|
+
|
73
|
+
if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
|
74
|
+
@data=false # No more combinations can be generated
|
75
|
+
else
|
76
|
+
# comb now looks like (..., x, n, n, n, ..., n).
|
77
|
+
# Turn it into (..., x, x + 1, x + 2, ...)
|
78
|
+
i = i+1
|
79
|
+
(i...@k).each{ |i1|
|
80
|
+
@data[i1] = @data[i1 - 1] + 1
|
81
|
+
}
|
82
|
+
end
|
83
|
+
return old_comb
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# rb-gsl engine for Combinations
|
86
88
|
class CombinationGsl
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
89
|
+
def initialize(k,n)
|
90
|
+
require 'gsl'
|
91
|
+
raise "k<=n" if k>n
|
92
|
+
@k=k
|
93
|
+
@n=n
|
94
|
+
reset
|
95
|
+
end
|
96
|
+
def reset
|
97
|
+
@c= ::GSL::Combination.calloc(@n, @k);
|
98
|
+
end
|
99
|
+
def next_value
|
100
|
+
return false if !@c
|
101
|
+
data=@c.data.to_a
|
102
|
+
if @c.next != GSL::SUCCESS
|
103
|
+
@c=false
|
104
|
+
end
|
105
|
+
return data
|
106
|
+
end
|
107
|
+
def each
|
108
|
+
reset
|
109
|
+
begin
|
110
|
+
yield @c.data.to_a
|
111
|
+
end while @c.next == GSL::SUCCESS
|
112
|
+
end
|
111
113
|
end
|
112
|
-
end
|
114
|
+
end
|
113
115
|
end
|
@@ -1,56 +1,56 @@
|
|
1
1
|
module Statsample
|
2
|
-
class CSV < SpreadsheetBase
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
class CSV < SpreadsheetBase
|
3
|
+
class << self
|
4
|
+
# Returns a Dataset based on a csv file
|
5
|
+
#
|
6
|
+
# USE:
|
7
|
+
# ds=Statsample::CSV.read("test_csv.csv")
|
8
|
+
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
9
|
require 'csv'
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
10
|
+
first_row=true
|
11
|
+
fields=[]
|
12
|
+
fields_data={}
|
13
|
+
ds=nil
|
14
|
+
line_number=0
|
15
|
+
|
16
|
+
::CSV.open(filename,'r',fs,rs) do |row|
|
17
|
+
line_number+=1
|
18
|
+
if(line_number<=ignore_lines)
|
19
|
+
#puts "Skip line"
|
20
|
+
next
|
21
|
+
end
|
22
|
+
row.collect!{|c|
|
23
|
+
c.to_s
|
24
|
+
}
|
25
|
+
if first_row
|
26
|
+
fields=extract_fields(row)
|
27
|
+
ds=Statsample::Dataset.new(fields)
|
28
|
+
first_row=false
|
29
|
+
else
|
30
|
+
rowa=process_row(row,empty)
|
31
|
+
ds.add_case(rowa,false)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
convert_to_scale(ds,fields)
|
35
|
+
ds.update_valid_data
|
36
|
+
ds
|
37
|
+
end
|
38
|
+
# Save a Dataset on a csv file
|
39
|
+
#
|
40
|
+
# USE:
|
41
|
+
# Statsample::CSV.write(ds,"test_csv.csv")
|
42
|
+
def write(dataset,filename, convert_comma=false,*opts)
|
43
|
+
require 'csv'
|
44
|
+
writer=::CSV.open(filename,'w',*opts)
|
45
|
+
writer << dataset.fields
|
46
|
+
dataset.each_array do |row|
|
47
|
+
if(convert_comma)
|
48
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
49
|
+
end
|
50
|
+
writer << row
|
51
|
+
end
|
52
|
+
writer.close
|
53
|
+
end
|
55
54
|
end
|
55
|
+
end
|
56
56
|
end
|
@@ -1,60 +1,57 @@
|
|
1
1
|
module Statsample
|
2
|
-
class CSV < SpreadsheetBase
|
3
|
-
|
2
|
+
class CSV < SpreadsheetBase
|
3
|
+
class << self
|
4
4
|
# Returns a Dataset based on a csv file
|
5
5
|
#
|
6
6
|
# USE:
|
7
7
|
# ds=Statsample::CSV.read("test_csv.csv")
|
8
|
-
|
8
|
+
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
9
|
require 'csv'
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
ds.update_valid_data
|
40
|
-
ds
|
41
|
-
end
|
10
|
+
first_row=true
|
11
|
+
fields=[]
|
12
|
+
fields_data={}
|
13
|
+
ds=nil
|
14
|
+
line_number=0
|
15
|
+
opts={}
|
16
|
+
opts[:col_sep]=fs unless fs.nil?
|
17
|
+
opts[:row_sep]=rs unless rs.nil?
|
18
|
+
csv=::CSV.open(filename,'r',opts)
|
19
|
+
csv.each do |row|
|
20
|
+
line_number+=1
|
21
|
+
if(line_number<=ignore_lines)
|
22
|
+
#puts "Skip line"
|
23
|
+
next
|
24
|
+
end
|
25
|
+
row.collect!{|c| c.to_s }
|
26
|
+
if first_row
|
27
|
+
fields=extract_fields(row)
|
28
|
+
ds=Statsample::Dataset.new(fields)
|
29
|
+
first_row=false
|
30
|
+
else
|
31
|
+
rowa=process_row(row,empty)
|
32
|
+
ds.add_case(rowa,false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
convert_to_scale(ds,fields)
|
36
|
+
ds.update_valid_data
|
37
|
+
ds
|
38
|
+
end
|
42
39
|
# Save a Dataset on a csv file
|
43
40
|
#
|
44
41
|
# USE:
|
45
42
|
# Statsample::CSV.write(ds,"test_csv.csv")
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
43
|
+
def write(dataset,filename, convert_comma=false,*opts)
|
44
|
+
require 'csv'
|
45
|
+
writer=::CSV.open(filename,'w',*opts)
|
46
|
+
writer << dataset.fields
|
47
|
+
dataset.each_array do|row|
|
48
|
+
if(convert_comma)
|
49
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
50
|
+
end
|
51
|
+
writer << row
|
52
|
+
end
|
53
|
+
writer.close
|
54
|
+
end
|
59
55
|
end
|
56
|
+
end
|
60
57
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Statsample
|
2
|
+
module SPSS
|
3
|
+
class << self
|
4
|
+
# Export a SPSS Matrix with tetrachoric correlations .
|
5
|
+
#
|
6
|
+
# Use:
|
7
|
+
# ds=Statsample::Excel.read("my_data.xls")
|
8
|
+
# puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
|
9
|
+
def tetrachoric_correlation_matrix(ds)
|
10
|
+
dsv=ds.dup_only_valid
|
11
|
+
# Delete all vectors doesn't have variation
|
12
|
+
dsv.fields.each{|f|
|
13
|
+
if dsv[f].factors.size==1
|
14
|
+
dsv.delete_vector(f)
|
15
|
+
else
|
16
|
+
dsv[f]=dsv[f].dichotomize
|
17
|
+
end
|
18
|
+
}
|
19
|
+
tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
|
20
|
+
n=dsv.fields.collect {|f|
|
21
|
+
sprintf("%d",dsv[f].size)
|
22
|
+
}
|
23
|
+
meanlist=dsv.fields.collect{|f|
|
24
|
+
sprintf("%0.3f", dsv[f].mean)
|
25
|
+
}
|
26
|
+
stddevlist=dsv.fields.collect{|f|
|
27
|
+
sprintf("%0.3f", dsv[f].sd)
|
28
|
+
}
|
29
|
+
out=<<-HEREDOC
|
30
|
+
MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
|
31
|
+
BEGIN DATA
|
32
|
+
N #{n.join(" ")}
|
33
|
+
MEAN #{meanlist.join(" ")}
|
34
|
+
STDDEV #{stddevlist.join(" ")}
|
35
|
+
HEREDOC
|
36
|
+
tcm.row_size.times {|i|
|
37
|
+
out +="CORR "
|
38
|
+
(i+1).times {|j|
|
39
|
+
out+=sprintf("%0.3f",tcm[i,j])+" "
|
40
|
+
}
|
41
|
+
out +="\n"
|
42
|
+
}
|
43
|
+
out+="END DATA.\nEXECUTE.\n"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|