statsample 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,113 +1,115 @@
1
1
  module Statsample
2
- # Combination class systematically generates all combinations of n elements, taken r at a time.
3
- # With rbgsl, GSL::Combination is available for extra speed
4
- # Source: http://snippets.dzone.com/posts/show/4666
5
- # Use:
6
- # comb=Statsample::Combination.new(3,5)
7
- # => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
8
- # comb.each{|c| p c }
9
- # [0, 1, 2]
10
- # [0, 1, 3]
11
- # [0, 1, 4]
12
- # [0, 2, 3]
13
- # [0, 2, 4]
14
- # [0, 3, 4]
15
- # [1, 2, 3]
16
- # [1, 2, 4]
17
- # [1, 3, 4]
18
- # [2, 3, 4]
19
- #
20
- class Combination
21
- attr_reader :d
22
- def initialize(k,n,only_ruby=false)
23
- @k=k
24
- @n=n
25
- if HAS_GSL and !only_ruby
26
- @d=CombinationGsl.new(@k,@n)
27
- else
28
- @d=CombinationRuby.new(@k,@n)
29
- end
2
+ # Combination class systematically generates all combinations of n elements, taken r at a time.
3
+ # With rbgsl, GSL::Combination is available for extra speed
4
+ # Source: http://snippets.dzone.com/posts/show/4666
5
+ # Use:
6
+ # comb=Statsample::Combination.new(3,5)
7
+ # => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
8
+ # comb.each{|c| p c }
9
+ # [0, 1, 2]
10
+ # [0, 1, 3]
11
+ # [0, 1, 4]
12
+ # [0, 2, 3]
13
+ # [0, 2, 4]
14
+ # [0, 3, 4]
15
+ # [1, 2, 3]
16
+ # [1, 2, 4]
17
+ # [1, 3, 4]
18
+ # [2, 3, 4]
19
+ #
20
+ class Combination
21
+ attr_reader :d
22
+ def initialize(k,n,only_ruby=false)
23
+ @k=k
24
+ @n=n
25
+ if HAS_GSL and !only_ruby
26
+ @d=CombinationGsl.new(@k,@n)
27
+ else
28
+ @d=CombinationRuby.new(@k,@n)
30
29
  end
31
- def each
32
- reset
33
- while a=next_value
34
- yield a
35
- end
36
- end
37
- def reset
38
- @d.reset
39
- end
40
- def next_value
41
- @d.next_value
42
- end
43
- class CombinationRuby
44
- attr_reader :data
45
- def initialize(k,n)
46
- raise "k<=n" if k>n
47
- @k=k
48
- @n=n
49
- reset
50
- end
51
- def reset
52
- @data=[]
53
- (0...@k).each {|i|
54
- @data[i] = i;
55
- }
56
- end
57
- def each
58
- reset
59
- while a=next_value
60
- yield a
61
- end
62
- end
63
- def next_value
64
- return false if !@data
65
- old_comb=@data.dup
66
- i = @k - 1;
67
- @data[i]+=1
68
- while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
69
- i-=1;
70
- @data[i]+=1;
71
- end
72
-
73
- if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
74
- @data=false # No more combinations can be generated
75
- else
76
- # comb now looks like (..., x, n, n, n, ..., n).
77
- # Turn it into (..., x, x + 1, x + 2, ...)
78
- i = i+1
79
- (i...@k).each{ |i1|
80
- @data[i1] = @data[i1 - 1] + 1
81
- }
82
- end
83
- return old_comb
30
+ end
31
+ def each
32
+ reset
33
+ while a=next_value
34
+ yield a
84
35
  end
85
36
  end
37
+ def reset
38
+ @d.reset
39
+ end
40
+ def next_value
41
+ @d.next_value
42
+ end
43
+
44
+ # Ruby engine for Combinations
45
+ class CombinationRuby
46
+ attr_reader :data
47
+ def initialize(k,n)
48
+ raise "k<=n" if k>n
49
+ @k=k
50
+ @n=n
51
+ reset
52
+ end
53
+ def reset
54
+ @data=[]
55
+ (0...@k).each {|i| @data[i] = i }
56
+ end
57
+ def each
58
+ reset
59
+ while a=next_value
60
+ yield a
61
+ end
62
+ end
63
+ def next_value
64
+ return false if !@data
65
+ old_comb=@data.dup
66
+ i = @k - 1;
67
+ @data[i]+=1
68
+ while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
69
+ i-=1;
70
+ @data[i]+=1;
71
+ end
72
+
73
+ if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
74
+ @data=false # No more combinations can be generated
75
+ else
76
+ # comb now looks like (..., x, n, n, n, ..., n).
77
+ # Turn it into (..., x, x + 1, x + 2, ...)
78
+ i = i+1
79
+ (i...@k).each{ |i1|
80
+ @data[i1] = @data[i1 - 1] + 1
81
+ }
82
+ end
83
+ return old_comb
84
+ end
85
+ end
86
+
87
+ # rb-gsl engine for Combinations
86
88
  class CombinationGsl
87
- def initialize(k,n)
88
- require 'gsl'
89
- raise "k<=n" if k>n
90
- @k=k
91
- @n=n
92
- reset
93
- end
94
- def reset
95
- @c= ::GSL::Combination.calloc(@n, @k);
96
- end
97
- def next_value
98
- return false if !@c
99
- data=@c.data.to_a
100
- if @c.next != GSL::SUCCESS
101
- @c=false
102
- end
103
- return data
104
- end
105
- def each
106
- reset
107
- begin
108
- yield @c.data.to_a
109
- end while @c.next == GSL::SUCCESS
110
- end
89
+ def initialize(k,n)
90
+ require 'gsl'
91
+ raise "k<=n" if k>n
92
+ @k=k
93
+ @n=n
94
+ reset
95
+ end
96
+ def reset
97
+ @c= ::GSL::Combination.calloc(@n, @k);
98
+ end
99
+ def next_value
100
+ return false if !@c
101
+ data=@c.data.to_a
102
+ if @c.next != GSL::SUCCESS
103
+ @c=false
104
+ end
105
+ return data
106
+ end
107
+ def each
108
+ reset
109
+ begin
110
+ yield @c.data.to_a
111
+ end while @c.next == GSL::SUCCESS
112
+ end
111
113
  end
112
- end
114
+ end
113
115
  end
@@ -1,56 +1,56 @@
1
1
  module Statsample
2
- class CSV < SpreadsheetBase
3
- class << self
4
- # Returns a Dataset based on a csv file
5
- #
6
- # USE:
7
- # ds=Statsample::CSV.read("test_csv.csv")
8
- def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
2
+ class CSV < SpreadsheetBase
3
+ class << self
4
+ # Returns a Dataset based on a csv file
5
+ #
6
+ # USE:
7
+ # ds=Statsample::CSV.read("test_csv.csv")
8
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
9
  require 'csv'
10
- first_row=true
11
- fields=[]
12
- fields_data={}
13
- ds=nil
14
- line_number=0
15
-
16
- ::CSV.open(filename,'r',fs,rs) do |row|
17
- line_number+=1
18
- if(line_number<=ignore_lines)
19
- #puts "Skip line"
20
- next
21
- end
22
- row.collect!{|c|
23
- c.to_s
24
- }
25
- if first_row
26
- fields=extract_fields(row)
27
- ds=Statsample::Dataset.new(fields)
28
- first_row=false
29
- else
30
- rowa=process_row(row,empty)
31
- ds.add_case(rowa,false)
32
- end
33
- end
34
- convert_to_scale(ds,fields)
35
- ds.update_valid_data
36
- ds
37
- end
38
- # Save a Dataset on a csv file
39
- #
40
- # USE:
41
- # Statsample::CSV.write(ds,"test_csv.csv")
42
- def write(dataset,filename, convert_comma=false,*opts)
43
- require 'csv'
44
- writer=::CSV.open(filename,'w',*opts)
45
- writer << dataset.fields
46
- dataset.each_array{|row|
47
- if(convert_comma)
48
- row.collect!{|v| v.to_s.gsub(".",",")}
49
- end
50
- writer << row
51
- }
52
- writer.close
53
- end
54
- end
10
+ first_row=true
11
+ fields=[]
12
+ fields_data={}
13
+ ds=nil
14
+ line_number=0
15
+
16
+ ::CSV.open(filename,'r',fs,rs) do |row|
17
+ line_number+=1
18
+ if(line_number<=ignore_lines)
19
+ #puts "Skip line"
20
+ next
21
+ end
22
+ row.collect!{|c|
23
+ c.to_s
24
+ }
25
+ if first_row
26
+ fields=extract_fields(row)
27
+ ds=Statsample::Dataset.new(fields)
28
+ first_row=false
29
+ else
30
+ rowa=process_row(row,empty)
31
+ ds.add_case(rowa,false)
32
+ end
33
+ end
34
+ convert_to_scale(ds,fields)
35
+ ds.update_valid_data
36
+ ds
37
+ end
38
+ # Save a Dataset on a csv file
39
+ #
40
+ # USE:
41
+ # Statsample::CSV.write(ds,"test_csv.csv")
42
+ def write(dataset,filename, convert_comma=false,*opts)
43
+ require 'csv'
44
+ writer=::CSV.open(filename,'w',*opts)
45
+ writer << dataset.fields
46
+ dataset.each_array do |row|
47
+ if(convert_comma)
48
+ row.collect!{|v| v.to_s.gsub(".",",")}
49
+ end
50
+ writer << row
51
+ end
52
+ writer.close
53
+ end
55
54
  end
55
+ end
56
56
  end
@@ -1,60 +1,57 @@
1
1
  module Statsample
2
- class CSV < SpreadsheetBase
3
- class << self
2
+ class CSV < SpreadsheetBase
3
+ class << self
4
4
  # Returns a Dataset based on a csv file
5
5
  #
6
6
  # USE:
7
7
  # ds=Statsample::CSV.read("test_csv.csv")
8
- def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
8
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
9
  require 'csv'
10
- first_row=true
11
- fields=[]
12
- fields_data={}
13
- ds=nil
14
- line_number=0
15
- opts={}
16
- opts[:col_sep]=fs unless fs.nil?
17
- opts[:row_sep]=rs unless rs.nil?
18
- csv=::CSV.open(filename,'r',opts)
19
-
20
- csv.each do |row|
21
- line_number+=1
22
- if(line_number<=ignore_lines)
23
- #puts "Skip line"
24
- next
25
- end
26
- row.collect!{|c|
27
- c.to_s
28
- }
29
- if first_row
30
- fields=extract_fields(row)
31
- ds=Statsample::Dataset.new(fields)
32
- first_row=false
33
- else
34
- rowa=process_row(row,empty)
35
- ds.add_case(rowa,false)
36
- end
37
- end
38
- convert_to_scale(ds,fields)
39
- ds.update_valid_data
40
- ds
41
- end
10
+ first_row=true
11
+ fields=[]
12
+ fields_data={}
13
+ ds=nil
14
+ line_number=0
15
+ opts={}
16
+ opts[:col_sep]=fs unless fs.nil?
17
+ opts[:row_sep]=rs unless rs.nil?
18
+ csv=::CSV.open(filename,'r',opts)
19
+ csv.each do |row|
20
+ line_number+=1
21
+ if(line_number<=ignore_lines)
22
+ #puts "Skip line"
23
+ next
24
+ end
25
+ row.collect!{|c| c.to_s }
26
+ if first_row
27
+ fields=extract_fields(row)
28
+ ds=Statsample::Dataset.new(fields)
29
+ first_row=false
30
+ else
31
+ rowa=process_row(row,empty)
32
+ ds.add_case(rowa,false)
33
+ end
34
+ end
35
+ convert_to_scale(ds,fields)
36
+ ds.update_valid_data
37
+ ds
38
+ end
42
39
  # Save a Dataset on a csv file
43
40
  #
44
41
  # USE:
45
42
  # Statsample::CSV.write(ds,"test_csv.csv")
46
- def write(dataset,filename, convert_comma=false,*opts)
47
- require 'csv'
48
- writer=::CSV.open(filename,'w',*opts)
49
- writer << dataset.fields
50
- dataset.each_array{|row|
51
- if(convert_comma)
52
- row.collect!{|v| v.to_s.gsub(".",",")}
53
- end
54
- writer << row
55
- }
56
- writer.close
57
- end
58
- end
43
+ def write(dataset,filename, convert_comma=false,*opts)
44
+ require 'csv'
45
+ writer=::CSV.open(filename,'w',*opts)
46
+ writer << dataset.fields
47
+ dataset.each_array do|row|
48
+ if(convert_comma)
49
+ row.collect!{|v| v.to_s.gsub(".",",")}
50
+ end
51
+ writer << row
52
+ end
53
+ writer.close
54
+ end
59
55
  end
56
+ end
60
57
  end
@@ -0,0 +1,47 @@
1
+ module Statsample
2
+ module SPSS
3
+ class << self
4
+ # Export a SPSS Matrix with tetrachoric correlations .
5
+ #
6
+ # Use:
7
+ # ds=Statsample::Excel.read("my_data.xls")
8
+ # puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
9
+ def tetrachoric_correlation_matrix(ds)
10
+ dsv=ds.dup_only_valid
11
+ # Delete all vectors doesn't have variation
12
+ dsv.fields.each{|f|
13
+ if dsv[f].factors.size==1
14
+ dsv.delete_vector(f)
15
+ else
16
+ dsv[f]=dsv[f].dichotomize
17
+ end
18
+ }
19
+ tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
20
+ n=dsv.fields.collect {|f|
21
+ sprintf("%d",dsv[f].size)
22
+ }
23
+ meanlist=dsv.fields.collect{|f|
24
+ sprintf("%0.3f", dsv[f].mean)
25
+ }
26
+ stddevlist=dsv.fields.collect{|f|
27
+ sprintf("%0.3f", dsv[f].sd)
28
+ }
29
+ out=<<-HEREDOC
30
+ MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
31
+ BEGIN DATA
32
+ N #{n.join(" ")}
33
+ MEAN #{meanlist.join(" ")}
34
+ STDDEV #{stddevlist.join(" ")}
35
+ HEREDOC
36
+ tcm.row_size.times {|i|
37
+ out +="CORR "
38
+ (i+1).times {|j|
39
+ out+=sprintf("%0.3f",tcm[i,j])+" "
40
+ }
41
+ out +="\n"
42
+ }
43
+ out+="END DATA.\nEXECUTE.\n"
44
+ end
45
+ end
46
+ end
47
+ end