statsample 0.6.5 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -1,32 +1,32 @@
1
1
  require 'yaml'
2
2
 
3
3
  module Statsample
4
- # This module aids to code open questions
5
- # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
6
- # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
7
- # * Recode the vectors, loading the yaml file:
8
- # * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
9
- # * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
10
- #
11
- # Usage:
12
- # recode_file="recodification.yaml"
13
- # phase=:first # flag
14
- # if phase==:first
15
- # File.open(recode_file,"w") {|fp|
16
- # Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
17
- # }
18
- # # Edit the file recodification.yaml and verify changes
19
- # elsif phase==:second
20
- # File.open(recode_file,"r") {|fp|
21
- # Statsample::Codification.verify(fp,['vector1'])
22
- # }
23
- # # Add new vectors to the dataset
24
- # elsif phase==:third
25
- # File.open(recode_file,"r") {|fp|
26
- # Statsample::Codification.recode_dataset_split!(ds,fp,"*")
27
- # }
28
- # end
29
- #
4
+ # This module aids to code open questions
5
+ # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
6
+ # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
7
+ # * Recode the vectors, loading the yaml file:
8
+ # * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
9
+ # * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
10
+ #
11
+ # Usage:
12
+ # recode_file="recodification.yaml"
13
+ # phase=:first # flag
14
+ # if phase==:first
15
+ # File.open(recode_file,"w") {|fp|
16
+ # Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
17
+ # }
18
+ # # Edit the file recodification.yaml and verify changes
19
+ # elsif phase==:second
20
+ # File.open(recode_file,"r") {|fp|
21
+ # Statsample::Codification.verify(fp,['vector1'])
22
+ # }
23
+ # # Add new vectors to the dataset
24
+ # elsif phase==:third
25
+ # File.open(recode_file,"r") {|fp|
26
+ # Statsample::Codification.recode_dataset_split!(ds,fp,"*")
27
+ # }
28
+ # end
29
+ #
30
30
  module Codification
31
31
  class << self
32
32
  # Create a hash, based on vectors, to create the dictionary.
@@ -38,7 +38,7 @@ module Statsample
38
38
  raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
39
39
  v=dataset[v_name]
40
40
  split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
41
-
41
+
42
42
  factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
43
43
  h[v_name]=factors
44
44
  h
@@ -48,7 +48,7 @@ module Statsample
48
48
  # Create a yaml to create a dictionary, based on vectors
49
49
  # The keys will be vectors name on dataset and the values
50
50
  # will be hashes, with keys = values, for recodification
51
- #
51
+ #
52
52
  # v1=%w{a,b b,c d}.to_vector
53
53
  # ds={"v1"=>v1}.to_dataset
54
54
  # Statsample::Codification.create_yaml(ds,['v1'])
@@ -63,7 +63,7 @@ module Statsample
63
63
  # * field: name of vector
64
64
  # * original: original name
65
65
  # * recoded: new code
66
-
66
+
67
67
  def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
68
68
  require 'spreadsheet'
69
69
  if File.exists?(filename)
@@ -98,7 +98,7 @@ module Statsample
98
98
  end
99
99
  h
100
100
  end
101
-
101
+
102
102
  def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
103
103
  h.inject({}) do |a,v|
104
104
  v[1].split(sep).each do |val|
@@ -108,11 +108,11 @@ module Statsample
108
108
  a
109
109
  end
110
110
  end
111
-
111
+
112
112
  def dictionary(h, sep=Statsample::SPLIT_TOKEN)
113
113
  h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
114
114
  end
115
-
115
+
116
116
  def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
117
117
  dict=dictionary(h,sep)
118
118
  new_data=v.splitted(sep)
@@ -125,45 +125,45 @@ module Statsample
125
125
  end
126
126
  end
127
127
  def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
128
- _recode_dataset(dataset,dictionary_hash ,sep,false)
129
- end
130
- def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
131
- _recode_dataset(dataset, dictionary_hash, sep,true)
132
- end
133
-
134
- def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
135
- v_names||=h.keys
136
- v_names.each do |v_name|
137
- raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
138
- recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
139
- if c.nil?
140
- nil
141
- else
142
- c.join(sep)
143
- end
144
- }.to_vector
145
- if(split)
128
+ _recode_dataset(dataset,dictionary_hash ,sep,false)
129
+ end
130
+ def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
131
+ _recode_dataset(dataset, dictionary_hash, sep,true)
132
+ end
133
+
134
+ def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
135
+ v_names||=h.keys
136
+ v_names.each do |v_name|
137
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
138
+ recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
139
+ if c.nil?
140
+ nil
141
+ else
142
+ c.join(sep)
143
+ end
144
+ }.to_vector
145
+ if(split)
146
146
  recoded.split_by_separator(sep).each {|k,v|
147
147
  dataset[v_name+"_"+k]=v
148
148
  }
149
- else
150
- dataset[v_name+"_recoded"]=recoded
151
- end
149
+ else
150
+ dataset[v_name+"_recoded"]=recoded
152
151
  end
153
152
  end
154
-
155
-
156
- def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
157
- require 'pp'
158
- v_names||=h.keys
159
- v_names.each{|v_name|
160
- inverse=inverse_hash(h[v_name],sep)
161
- io.puts "- Field: #{v_name}"
162
- inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
163
- io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
164
- }
153
+ end
154
+
155
+
156
+ def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
157
+ require 'pp'
158
+ v_names||=h.keys
159
+ v_names.each{|v_name|
160
+ inverse=inverse_hash(h[v_name],sep)
161
+ io.puts "- Field: #{v_name}"
162
+ inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
163
+ io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
165
164
  }
166
- end
165
+ }
166
+ end
167
167
  end
168
168
  end
169
169
  end
@@ -1,8 +1,7 @@
1
1
  module Statsample
2
2
  # Combination class systematically generates all combinations of n elements, taken r at a time.
3
3
  # With rbgsl, GSL::Combination is available for extra speed
4
- # Source: http://snippets.dzone.com/posts/show/4666
5
- # Use:
4
+ # == Use:
6
5
  # comb=Statsample::Combination.new(3,5)
7
6
  # => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
8
7
  # comb.each{|c| p c }
@@ -16,23 +15,25 @@ module Statsample
16
15
  # [1, 2, 4]
17
16
  # [1, 3, 4]
18
17
  # [2, 3, 4]
18
+ # == Reference:
19
+ # * http://snippets.dzone.com/posts/show/4666
19
20
  #
20
21
  class Combination
21
22
  attr_reader :d
22
23
  def initialize(k,n,only_ruby=false)
23
- @k=k
24
- @n=n
25
- if HAS_GSL and !only_ruby
26
- @d=CombinationGsl.new(@k,@n)
27
- else
28
- @d=CombinationRuby.new(@k,@n)
29
- end
24
+ @k=k
25
+ @n=n
26
+ if Statsample.has_gsl? and !only_ruby
27
+ @d=CombinationGsl.new(@k,@n)
28
+ else
29
+ @d=CombinationRuby.new(@k,@n)
30
+ end
30
31
  end
31
32
  def each
32
- reset
33
- while a=next_value
34
- yield a
35
- end
33
+ reset
34
+ while a=next_value
35
+ yield a
36
+ end
36
37
  end
37
38
  def reset
38
39
  @d.reset
@@ -43,70 +44,70 @@ module Statsample
43
44
  class CombinationRuby # :nodoc:
44
45
  attr_reader :data
45
46
  def initialize(k,n)
46
- raise "k<=n" if k>n
47
- @k=k
48
- @n=n
49
- reset
47
+ raise "k<=n" if k>n
48
+ @k=k
49
+ @n=n
50
+ reset
50
51
  end
51
52
  def reset
52
- @data=[]
53
- (0...@k).each {|i| @data[i] = i }
53
+ @data=[]
54
+ (0...@k).each {|i| @data[i] = i }
54
55
  end
55
56
  def each
56
- reset
57
- while a=next_value
58
- yield a
59
- end
57
+ reset
58
+ while a=next_value
59
+ yield a
60
+ end
60
61
  end
61
62
  def next_value
62
- return false if !@data
63
- old_comb=@data.dup
64
- i = @k - 1;
65
- @data[i]+=1
66
- while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
67
- i-=1;
68
- @data[i]+=1;
69
- end
70
-
71
- if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
72
- @data=false # No more combinations can be generated
73
- else
74
- # comb now looks like (..., x, n, n, n, ..., n).
75
- # Turn it into (..., x, x + 1, x + 2, ...)
76
- i = i+1
77
- (i...@k).each{ |i1|
78
- @data[i1] = @data[i1 - 1] + 1
79
- }
80
- end
81
- return old_comb
63
+ return false if !@data
64
+ old_comb=@data.dup
65
+ i = @k - 1;
66
+ @data[i]+=1
67
+ while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
68
+ i-=1;
69
+ @data[i]+=1;
70
+ end
71
+
72
+ if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
73
+ @data=false # No more combinations can be generated
74
+ else
75
+ # comb now looks like (..., x, n, n, n, ..., n).
76
+ # Turn it into (..., x, x + 1, x + 2, ...)
77
+ i = i+1
78
+ (i...@k).each{ |i1|
79
+ @data[i1] = @data[i1 - 1] + 1
80
+ }
81
+ end
82
+ return old_comb
82
83
  end
83
84
  end
84
85
 
85
86
  # rb-gsl engine for Combinations
86
87
  class CombinationGsl # :nodoc:
87
88
  def initialize(k,n)
88
- require 'gsl'
89
- raise "k<=n" if k>n
90
- @k=k
91
- @n=n
92
- reset
89
+ require 'gsl'
90
+ raise "k<=n" if k>n
91
+ @k=k
92
+ @n=n
93
+ reset
93
94
  end
94
95
  def reset
95
- @c= ::GSL::Combination.calloc(@n, @k);
96
+ @c= ::GSL::Combination.calloc(@n, @k);
96
97
  end
97
98
  def next_value
98
- return false if !@c
99
- data=@c.data.to_a
100
- if @c.next != GSL::SUCCESS
101
- @c=false
102
- end
103
- return data
99
+ return false if !@c
100
+ data=@c.data.to_a
101
+ if @c.next != GSL::SUCCESS
102
+ @c=false
103
+ end
104
+ return data
104
105
  end
105
106
  def each
106
- reset
107
- begin
108
- yield @c.data.to_a
109
- end while @c.next == GSL::SUCCESS
107
+ reset
108
+ begin
109
+ yield @c.data.to_a
110
+ end while @c.next == GSL::SUCCESS
110
111
  end
111
112
  end
112
113
  end
@@ -1,10 +1,10 @@
1
1
  module Statsample
2
2
  class CSV < SpreadsheetBase
3
- class << self
4
- # Returns a Dataset based on a csv file
5
- #
6
- # USE:
7
- # ds=Statsample::CSV.read("test_csv.csv")
3
+ class << self
4
+ # Returns a Dataset based on a csv file
5
+ #
6
+ # USE:
7
+ # ds=Statsample::CSV.read("test_csv.csv")
8
8
  def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
9
  require 'csv'
10
10
  first_row=true
@@ -36,17 +36,17 @@ module Statsample
36
36
  ds.update_valid_data
37
37
  ds
38
38
  end
39
- # Save a Dataset on a csv file
40
- #
41
- # USE:
42
- # Statsample::CSV.write(ds,"test_csv.csv")
39
+ # Save a Dataset on a csv file
40
+ #
41
+ # USE:
42
+ # Statsample::CSV.write(ds,"test_csv.csv")
43
43
  def write(dataset,filename, convert_comma=false,*opts)
44
- require 'csv'
44
+ require 'csv'
45
45
  writer=::CSV.open(filename,'w',*opts)
46
46
  writer << dataset.fields
47
47
  dataset.each_array do|row|
48
48
  if(convert_comma)
49
- row.collect!{|v| v.to_s.gsub(".",",")}
49
+ row.collect!{|v| v.to_s.gsub(".",",")}
50
50
  end
51
51
  writer << row
52
52
  end
@@ -54,4 +54,4 @@ module Statsample
54
54
  end
55
55
  end
56
56
  end
57
- end
57
+ end
@@ -175,7 +175,7 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
175
175
  end
176
176
  }
177
177
  end
178
- private :process_row
178
+ private :process_row, :preprocess_row
179
179
 
180
180
  # Returns a dataset based on a xls file
181
181
  # USE: