statsample 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,113 +1,115 @@
1
1
  module Statsample
2
- # Combination class systematically generates all combinations of n elements, taken r at a time.
3
- # With rbgsl, GSL::Combination is available for extra speed
4
- # Source: http://snippets.dzone.com/posts/show/4666
5
- # Use:
6
- # comb=Statsample::Combination.new(3,5)
7
- # => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
8
- # comb.each{|c| p c }
9
- # [0, 1, 2]
10
- # [0, 1, 3]
11
- # [0, 1, 4]
12
- # [0, 2, 3]
13
- # [0, 2, 4]
14
- # [0, 3, 4]
15
- # [1, 2, 3]
16
- # [1, 2, 4]
17
- # [1, 3, 4]
18
- # [2, 3, 4]
19
- #
20
- class Combination
21
- attr_reader :d
22
- def initialize(k,n,only_ruby=false)
23
- @k=k
24
- @n=n
25
- if HAS_GSL and !only_ruby
26
- @d=CombinationGsl.new(@k,@n)
27
- else
28
- @d=CombinationRuby.new(@k,@n)
29
- end
2
+ # Combination class systematically generates all combinations of n elements, taken r at a time.
3
+ # With rbgsl, GSL::Combination is available for extra speed
4
+ # Source: http://snippets.dzone.com/posts/show/4666
5
+ # Use:
6
+ # comb=Statsample::Combination.new(3,5)
7
+ # => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
8
+ # comb.each{|c| p c }
9
+ # [0, 1, 2]
10
+ # [0, 1, 3]
11
+ # [0, 1, 4]
12
+ # [0, 2, 3]
13
+ # [0, 2, 4]
14
+ # [0, 3, 4]
15
+ # [1, 2, 3]
16
+ # [1, 2, 4]
17
+ # [1, 3, 4]
18
+ # [2, 3, 4]
19
+ #
20
+ class Combination
21
+ attr_reader :d
22
+ def initialize(k,n,only_ruby=false)
23
+ @k=k
24
+ @n=n
25
+ if HAS_GSL and !only_ruby
26
+ @d=CombinationGsl.new(@k,@n)
27
+ else
28
+ @d=CombinationRuby.new(@k,@n)
30
29
  end
31
- def each
32
- reset
33
- while a=next_value
34
- yield a
35
- end
36
- end
37
- def reset
38
- @d.reset
39
- end
40
- def next_value
41
- @d.next_value
42
- end
43
- class CombinationRuby
44
- attr_reader :data
45
- def initialize(k,n)
46
- raise "k<=n" if k>n
47
- @k=k
48
- @n=n
49
- reset
50
- end
51
- def reset
52
- @data=[]
53
- (0...@k).each {|i|
54
- @data[i] = i;
55
- }
56
- end
57
- def each
58
- reset
59
- while a=next_value
60
- yield a
61
- end
62
- end
63
- def next_value
64
- return false if !@data
65
- old_comb=@data.dup
66
- i = @k - 1;
67
- @data[i]+=1
68
- while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
69
- i-=1;
70
- @data[i]+=1;
71
- end
72
-
73
- if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
74
- @data=false # No more combinations can be generated
75
- else
76
- # comb now looks like (..., x, n, n, n, ..., n).
77
- # Turn it into (..., x, x + 1, x + 2, ...)
78
- i = i+1
79
- (i...@k).each{ |i1|
80
- @data[i1] = @data[i1 - 1] + 1
81
- }
82
- end
83
- return old_comb
30
+ end
31
+ def each
32
+ reset
33
+ while a=next_value
34
+ yield a
84
35
  end
85
36
  end
37
+ def reset
38
+ @d.reset
39
+ end
40
+ def next_value
41
+ @d.next_value
42
+ end
43
+
44
+ # Ruby engine for Combinations
45
+ class CombinationRuby
46
+ attr_reader :data
47
+ def initialize(k,n)
48
+ raise "k<=n" if k>n
49
+ @k=k
50
+ @n=n
51
+ reset
52
+ end
53
+ def reset
54
+ @data=[]
55
+ (0...@k).each {|i| @data[i] = i }
56
+ end
57
+ def each
58
+ reset
59
+ while a=next_value
60
+ yield a
61
+ end
62
+ end
63
+ def next_value
64
+ return false if !@data
65
+ old_comb=@data.dup
66
+ i = @k - 1;
67
+ @data[i]+=1
68
+ while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
69
+ i-=1;
70
+ @data[i]+=1;
71
+ end
72
+
73
+ if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
74
+ @data=false # No more combinations can be generated
75
+ else
76
+ # comb now looks like (..., x, n, n, n, ..., n).
77
+ # Turn it into (..., x, x + 1, x + 2, ...)
78
+ i = i+1
79
+ (i...@k).each{ |i1|
80
+ @data[i1] = @data[i1 - 1] + 1
81
+ }
82
+ end
83
+ return old_comb
84
+ end
85
+ end
86
+
87
+ # rb-gsl engine for Combinations
86
88
  class CombinationGsl
87
- def initialize(k,n)
88
- require 'gsl'
89
- raise "k<=n" if k>n
90
- @k=k
91
- @n=n
92
- reset
93
- end
94
- def reset
95
- @c= ::GSL::Combination.calloc(@n, @k);
96
- end
97
- def next_value
98
- return false if !@c
99
- data=@c.data.to_a
100
- if @c.next != GSL::SUCCESS
101
- @c=false
102
- end
103
- return data
104
- end
105
- def each
106
- reset
107
- begin
108
- yield @c.data.to_a
109
- end while @c.next == GSL::SUCCESS
110
- end
89
+ def initialize(k,n)
90
+ require 'gsl'
91
+ raise "k<=n" if k>n
92
+ @k=k
93
+ @n=n
94
+ reset
95
+ end
96
+ def reset
97
+ @c= ::GSL::Combination.calloc(@n, @k);
98
+ end
99
+ def next_value
100
+ return false if !@c
101
+ data=@c.data.to_a
102
+ if @c.next != GSL::SUCCESS
103
+ @c=false
104
+ end
105
+ return data
106
+ end
107
+ def each
108
+ reset
109
+ begin
110
+ yield @c.data.to_a
111
+ end while @c.next == GSL::SUCCESS
112
+ end
111
113
  end
112
- end
114
+ end
113
115
  end
@@ -1,56 +1,56 @@
1
1
  module Statsample
2
- class CSV < SpreadsheetBase
3
- class << self
4
- # Returns a Dataset based on a csv file
5
- #
6
- # USE:
7
- # ds=Statsample::CSV.read("test_csv.csv")
8
- def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
2
+ class CSV < SpreadsheetBase
3
+ class << self
4
+ # Returns a Dataset based on a csv file
5
+ #
6
+ # USE:
7
+ # ds=Statsample::CSV.read("test_csv.csv")
8
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
9
  require 'csv'
10
- first_row=true
11
- fields=[]
12
- fields_data={}
13
- ds=nil
14
- line_number=0
15
-
16
- ::CSV.open(filename,'r',fs,rs) do |row|
17
- line_number+=1
18
- if(line_number<=ignore_lines)
19
- #puts "Skip line"
20
- next
21
- end
22
- row.collect!{|c|
23
- c.to_s
24
- }
25
- if first_row
26
- fields=extract_fields(row)
27
- ds=Statsample::Dataset.new(fields)
28
- first_row=false
29
- else
30
- rowa=process_row(row,empty)
31
- ds.add_case(rowa,false)
32
- end
33
- end
34
- convert_to_scale(ds,fields)
35
- ds.update_valid_data
36
- ds
37
- end
38
- # Save a Dataset on a csv file
39
- #
40
- # USE:
41
- # Statsample::CSV.write(ds,"test_csv.csv")
42
- def write(dataset,filename, convert_comma=false,*opts)
43
- require 'csv'
44
- writer=::CSV.open(filename,'w',*opts)
45
- writer << dataset.fields
46
- dataset.each_array{|row|
47
- if(convert_comma)
48
- row.collect!{|v| v.to_s.gsub(".",",")}
49
- end
50
- writer << row
51
- }
52
- writer.close
53
- end
54
- end
10
+ first_row=true
11
+ fields=[]
12
+ fields_data={}
13
+ ds=nil
14
+ line_number=0
15
+
16
+ ::CSV.open(filename,'r',fs,rs) do |row|
17
+ line_number+=1
18
+ if(line_number<=ignore_lines)
19
+ #puts "Skip line"
20
+ next
21
+ end
22
+ row.collect!{|c|
23
+ c.to_s
24
+ }
25
+ if first_row
26
+ fields=extract_fields(row)
27
+ ds=Statsample::Dataset.new(fields)
28
+ first_row=false
29
+ else
30
+ rowa=process_row(row,empty)
31
+ ds.add_case(rowa,false)
32
+ end
33
+ end
34
+ convert_to_scale(ds,fields)
35
+ ds.update_valid_data
36
+ ds
37
+ end
38
+ # Save a Dataset on a csv file
39
+ #
40
+ # USE:
41
+ # Statsample::CSV.write(ds,"test_csv.csv")
42
+ def write(dataset,filename, convert_comma=false,*opts)
43
+ require 'csv'
44
+ writer=::CSV.open(filename,'w',*opts)
45
+ writer << dataset.fields
46
+ dataset.each_array do |row|
47
+ if(convert_comma)
48
+ row.collect!{|v| v.to_s.gsub(".",",")}
49
+ end
50
+ writer << row
51
+ end
52
+ writer.close
53
+ end
55
54
  end
55
+ end
56
56
  end
@@ -1,60 +1,57 @@
1
1
  module Statsample
2
- class CSV < SpreadsheetBase
3
- class << self
2
+ class CSV < SpreadsheetBase
3
+ class << self
4
4
  # Returns a Dataset based on a csv file
5
5
  #
6
6
  # USE:
7
7
  # ds=Statsample::CSV.read("test_csv.csv")
8
- def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
8
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
9
  require 'csv'
10
- first_row=true
11
- fields=[]
12
- fields_data={}
13
- ds=nil
14
- line_number=0
15
- opts={}
16
- opts[:col_sep]=fs unless fs.nil?
17
- opts[:row_sep]=rs unless rs.nil?
18
- csv=::CSV.open(filename,'r',opts)
19
-
20
- csv.each do |row|
21
- line_number+=1
22
- if(line_number<=ignore_lines)
23
- #puts "Skip line"
24
- next
25
- end
26
- row.collect!{|c|
27
- c.to_s
28
- }
29
- if first_row
30
- fields=extract_fields(row)
31
- ds=Statsample::Dataset.new(fields)
32
- first_row=false
33
- else
34
- rowa=process_row(row,empty)
35
- ds.add_case(rowa,false)
36
- end
37
- end
38
- convert_to_scale(ds,fields)
39
- ds.update_valid_data
40
- ds
41
- end
10
+ first_row=true
11
+ fields=[]
12
+ fields_data={}
13
+ ds=nil
14
+ line_number=0
15
+ opts={}
16
+ opts[:col_sep]=fs unless fs.nil?
17
+ opts[:row_sep]=rs unless rs.nil?
18
+ csv=::CSV.open(filename,'r',opts)
19
+ csv.each do |row|
20
+ line_number+=1
21
+ if(line_number<=ignore_lines)
22
+ #puts "Skip line"
23
+ next
24
+ end
25
+ row.collect!{|c| c.to_s }
26
+ if first_row
27
+ fields=extract_fields(row)
28
+ ds=Statsample::Dataset.new(fields)
29
+ first_row=false
30
+ else
31
+ rowa=process_row(row,empty)
32
+ ds.add_case(rowa,false)
33
+ end
34
+ end
35
+ convert_to_scale(ds,fields)
36
+ ds.update_valid_data
37
+ ds
38
+ end
42
39
  # Save a Dataset on a csv file
43
40
  #
44
41
  # USE:
45
42
  # Statsample::CSV.write(ds,"test_csv.csv")
46
- def write(dataset,filename, convert_comma=false,*opts)
47
- require 'csv'
48
- writer=::CSV.open(filename,'w',*opts)
49
- writer << dataset.fields
50
- dataset.each_array{|row|
51
- if(convert_comma)
52
- row.collect!{|v| v.to_s.gsub(".",",")}
53
- end
54
- writer << row
55
- }
56
- writer.close
57
- end
58
- end
43
+ def write(dataset,filename, convert_comma=false,*opts)
44
+ require 'csv'
45
+ writer=::CSV.open(filename,'w',*opts)
46
+ writer << dataset.fields
47
+ dataset.each_array do|row|
48
+ if(convert_comma)
49
+ row.collect!{|v| v.to_s.gsub(".",",")}
50
+ end
51
+ writer << row
52
+ end
53
+ writer.close
54
+ end
59
55
  end
56
+ end
60
57
  end
@@ -0,0 +1,47 @@
1
+ module Statsample
2
+ module SPSS
3
+ class << self
4
+ # Export a SPSS Matrix with tetrachoric correlations .
5
+ #
6
+ # Use:
7
+ # ds=Statsample::Excel.read("my_data.xls")
8
+ # puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
9
+ def tetrachoric_correlation_matrix(ds)
10
+ dsv=ds.dup_only_valid
11
+ # Delete all vectors doesn't have variation
12
+ dsv.fields.each{|f|
13
+ if dsv[f].factors.size==1
14
+ dsv.delete_vector(f)
15
+ else
16
+ dsv[f]=dsv[f].dichotomize
17
+ end
18
+ }
19
+ tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
20
+ n=dsv.fields.collect {|f|
21
+ sprintf("%d",dsv[f].size)
22
+ }
23
+ meanlist=dsv.fields.collect{|f|
24
+ sprintf("%0.3f", dsv[f].mean)
25
+ }
26
+ stddevlist=dsv.fields.collect{|f|
27
+ sprintf("%0.3f", dsv[f].sd)
28
+ }
29
+ out=<<-HEREDOC
30
+ MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
31
+ BEGIN DATA
32
+ N #{n.join(" ")}
33
+ MEAN #{meanlist.join(" ")}
34
+ STDDEV #{stddevlist.join(" ")}
35
+ HEREDOC
36
+ tcm.row_size.times {|i|
37
+ out +="CORR "
38
+ (i+1).times {|j|
39
+ out+=sprintf("%0.3f",tcm[i,j])+" "
40
+ }
41
+ out +="\n"
42
+ }
43
+ out+="END DATA.\nEXECUTE.\n"
44
+ end
45
+ end
46
+ end
47
+ end