statsample 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -1,32 +1,32 @@
1
1
  require 'yaml'
2
2
 
3
3
  module Statsample
4
- # This module aids to code open questions
5
- # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
6
- # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
7
- # * Recode the vectors, loading the yaml file:
8
- # * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
9
- # * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
10
- #
11
- # Usage:
12
- # recode_file="recodification.yaml"
13
- # phase=:first # flag
14
- # if phase==:first
15
- # File.open(recode_file,"w") {|fp|
16
- # Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
17
- # }
18
- # # Edit the file recodification.yaml and verify changes
19
- # elsif phase==:second
20
- # File.open(recode_file,"r") {|fp|
21
- # Statsample::Codification.verify(fp,['vector1'])
22
- # }
23
- # # Add new vectors to the dataset
24
- # elsif phase==:third
25
- # File.open(recode_file,"r") {|fp|
26
- # Statsample::Codification.recode_dataset_split!(ds,fp,"*")
27
- # }
28
- # end
29
- #
4
+ # This module aids to code open questions
5
+ # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
6
+ # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
7
+ # * Recode the vectors, loading the yaml file:
8
+ # * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
9
+ # * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
10
+ #
11
+ # Usage:
12
+ # recode_file="recodification.yaml"
13
+ # phase=:first # flag
14
+ # if phase==:first
15
+ # File.open(recode_file,"w") {|fp|
16
+ # Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
17
+ # }
18
+ # # Edit the file recodification.yaml and verify changes
19
+ # elsif phase==:second
20
+ # File.open(recode_file,"r") {|fp|
21
+ # Statsample::Codification.verify(fp,['vector1'])
22
+ # }
23
+ # # Add new vectors to the dataset
24
+ # elsif phase==:third
25
+ # File.open(recode_file,"r") {|fp|
26
+ # Statsample::Codification.recode_dataset_split!(ds,fp,"*")
27
+ # }
28
+ # end
29
+ #
30
30
  module Codification
31
31
  class << self
32
32
  # Create a hash, based on vectors, to create the dictionary.
@@ -38,7 +38,7 @@ module Statsample
38
38
  raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
39
39
  v=dataset[v_name]
40
40
  split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
41
-
41
+
42
42
  factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
43
43
  h[v_name]=factors
44
44
  h
@@ -48,7 +48,7 @@ module Statsample
48
48
  # Create a yaml to create a dictionary, based on vectors
49
49
  # The keys will be vectors name on dataset and the values
50
50
  # will be hashes, with keys = values, for recodification
51
- #
51
+ #
52
52
  # v1=%w{a,b b,c d}.to_vector
53
53
  # ds={"v1"=>v1}.to_dataset
54
54
  # Statsample::Codification.create_yaml(ds,['v1'])
@@ -63,7 +63,7 @@ module Statsample
63
63
  # * field: name of vector
64
64
  # * original: original name
65
65
  # * recoded: new code
66
-
66
+
67
67
  def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
68
68
  require 'spreadsheet'
69
69
  if File.exists?(filename)
@@ -98,7 +98,7 @@ module Statsample
98
98
  end
99
99
  h
100
100
  end
101
-
101
+
102
102
  def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
103
103
  h.inject({}) do |a,v|
104
104
  v[1].split(sep).each do |val|
@@ -108,11 +108,11 @@ module Statsample
108
108
  a
109
109
  end
110
110
  end
111
-
111
+
112
112
  def dictionary(h, sep=Statsample::SPLIT_TOKEN)
113
113
  h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
114
114
  end
115
-
115
+
116
116
  def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
117
117
  dict=dictionary(h,sep)
118
118
  new_data=v.splitted(sep)
@@ -125,45 +125,45 @@ module Statsample
125
125
  end
126
126
  end
127
127
  def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
128
- _recode_dataset(dataset,dictionary_hash ,sep,false)
129
- end
130
- def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
131
- _recode_dataset(dataset, dictionary_hash, sep,true)
132
- end
133
-
134
- def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
135
- v_names||=h.keys
136
- v_names.each do |v_name|
137
- raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
138
- recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
139
- if c.nil?
140
- nil
141
- else
142
- c.join(sep)
143
- end
144
- }.to_vector
145
- if(split)
128
+ _recode_dataset(dataset,dictionary_hash ,sep,false)
129
+ end
130
+ def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
131
+ _recode_dataset(dataset, dictionary_hash, sep,true)
132
+ end
133
+
134
+ def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
135
+ v_names||=h.keys
136
+ v_names.each do |v_name|
137
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
138
+ recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
139
+ if c.nil?
140
+ nil
141
+ else
142
+ c.join(sep)
143
+ end
144
+ }.to_vector
145
+ if(split)
146
146
  recoded.split_by_separator(sep).each {|k,v|
147
147
  dataset[v_name+"_"+k]=v
148
148
  }
149
- else
150
- dataset[v_name+"_recoded"]=recoded
151
- end
149
+ else
150
+ dataset[v_name+"_recoded"]=recoded
152
151
  end
153
152
  end
154
-
155
-
156
- def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
157
- require 'pp'
158
- v_names||=h.keys
159
- v_names.each{|v_name|
160
- inverse=inverse_hash(h[v_name],sep)
161
- io.puts "- Field: #{v_name}"
162
- inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
163
- io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
164
- }
153
+ end
154
+
155
+
156
+ def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
157
+ require 'pp'
158
+ v_names||=h.keys
159
+ v_names.each{|v_name|
160
+ inverse=inverse_hash(h[v_name],sep)
161
+ io.puts "- Field: #{v_name}"
162
+ inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
163
+ io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
165
164
  }
166
- end
165
+ }
166
+ end
167
167
  end
168
168
  end
169
169
  end
@@ -1,8 +1,7 @@
1
1
  module Statsample
2
2
  # Combination class systematically generates all combinations of n elements, taken r at a time.
3
3
  # With rbgsl, GSL::Combination is available for extra speed
4
- # Source: http://snippets.dzone.com/posts/show/4666
5
- # Use:
4
+ # == Use:
6
5
  # comb=Statsample::Combination.new(3,5)
7
6
  # => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
8
7
  # comb.each{|c| p c }
@@ -16,23 +15,25 @@ module Statsample
16
15
  # [1, 2, 4]
17
16
  # [1, 3, 4]
18
17
  # [2, 3, 4]
18
+ # == Reference:
19
+ # * http://snippets.dzone.com/posts/show/4666
19
20
  #
20
21
  class Combination
21
22
  attr_reader :d
22
23
  def initialize(k,n,only_ruby=false)
23
- @k=k
24
- @n=n
25
- if HAS_GSL and !only_ruby
26
- @d=CombinationGsl.new(@k,@n)
27
- else
28
- @d=CombinationRuby.new(@k,@n)
29
- end
24
+ @k=k
25
+ @n=n
26
+ if Statsample.has_gsl? and !only_ruby
27
+ @d=CombinationGsl.new(@k,@n)
28
+ else
29
+ @d=CombinationRuby.new(@k,@n)
30
+ end
30
31
  end
31
32
  def each
32
- reset
33
- while a=next_value
34
- yield a
35
- end
33
+ reset
34
+ while a=next_value
35
+ yield a
36
+ end
36
37
  end
37
38
  def reset
38
39
  @d.reset
@@ -43,70 +44,70 @@ module Statsample
43
44
  class CombinationRuby # :nodoc:
44
45
  attr_reader :data
45
46
  def initialize(k,n)
46
- raise "k<=n" if k>n
47
- @k=k
48
- @n=n
49
- reset
47
+ raise "k<=n" if k>n
48
+ @k=k
49
+ @n=n
50
+ reset
50
51
  end
51
52
  def reset
52
- @data=[]
53
- (0...@k).each {|i| @data[i] = i }
53
+ @data=[]
54
+ (0...@k).each {|i| @data[i] = i }
54
55
  end
55
56
  def each
56
- reset
57
- while a=next_value
58
- yield a
59
- end
57
+ reset
58
+ while a=next_value
59
+ yield a
60
+ end
60
61
  end
61
62
  def next_value
62
- return false if !@data
63
- old_comb=@data.dup
64
- i = @k - 1;
65
- @data[i]+=1
66
- while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
67
- i-=1;
68
- @data[i]+=1;
69
- end
70
-
71
- if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
72
- @data=false # No more combinations can be generated
73
- else
74
- # comb now looks like (..., x, n, n, n, ..., n).
75
- # Turn it into (..., x, x + 1, x + 2, ...)
76
- i = i+1
77
- (i...@k).each{ |i1|
78
- @data[i1] = @data[i1 - 1] + 1
79
- }
80
- end
81
- return old_comb
63
+ return false if !@data
64
+ old_comb=@data.dup
65
+ i = @k - 1;
66
+ @data[i]+=1
67
+ while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
68
+ i-=1;
69
+ @data[i]+=1;
70
+ end
71
+
72
+ if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
73
+ @data=false # No more combinations can be generated
74
+ else
75
+ # comb now looks like (..., x, n, n, n, ..., n).
76
+ # Turn it into (..., x, x + 1, x + 2, ...)
77
+ i = i+1
78
+ (i...@k).each{ |i1|
79
+ @data[i1] = @data[i1 - 1] + 1
80
+ }
81
+ end
82
+ return old_comb
82
83
  end
83
84
  end
84
85
 
85
86
  # rb-gsl engine for Combinations
86
87
  class CombinationGsl # :nodoc:
87
88
  def initialize(k,n)
88
- require 'gsl'
89
- raise "k<=n" if k>n
90
- @k=k
91
- @n=n
92
- reset
89
+ require 'gsl'
90
+ raise "k<=n" if k>n
91
+ @k=k
92
+ @n=n
93
+ reset
93
94
  end
94
95
  def reset
95
- @c= ::GSL::Combination.calloc(@n, @k);
96
+ @c= ::GSL::Combination.calloc(@n, @k);
96
97
  end
97
98
  def next_value
98
- return false if !@c
99
- data=@c.data.to_a
100
- if @c.next != GSL::SUCCESS
101
- @c=false
102
- end
103
- return data
99
+ return false if !@c
100
+ data=@c.data.to_a
101
+ if @c.next != GSL::SUCCESS
102
+ @c=false
103
+ end
104
+ return data
104
105
  end
105
106
  def each
106
- reset
107
- begin
108
- yield @c.data.to_a
109
- end while @c.next == GSL::SUCCESS
107
+ reset
108
+ begin
109
+ yield @c.data.to_a
110
+ end while @c.next == GSL::SUCCESS
110
111
  end
111
112
  end
112
113
  end
@@ -1,10 +1,10 @@
1
1
  module Statsample
2
2
  class CSV < SpreadsheetBase
3
- class << self
4
- # Returns a Dataset based on a csv file
5
- #
6
- # USE:
7
- # ds=Statsample::CSV.read("test_csv.csv")
3
+ class << self
4
+ # Returns a Dataset based on a csv file
5
+ #
6
+ # USE:
7
+ # ds=Statsample::CSV.read("test_csv.csv")
8
8
  def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
9
  require 'csv'
10
10
  first_row=true
@@ -36,17 +36,17 @@ module Statsample
36
36
  ds.update_valid_data
37
37
  ds
38
38
  end
39
- # Save a Dataset on a csv file
40
- #
41
- # USE:
42
- # Statsample::CSV.write(ds,"test_csv.csv")
39
+ # Save a Dataset on a csv file
40
+ #
41
+ # USE:
42
+ # Statsample::CSV.write(ds,"test_csv.csv")
43
43
  def write(dataset,filename, convert_comma=false,*opts)
44
- require 'csv'
44
+ require 'csv'
45
45
  writer=::CSV.open(filename,'w',*opts)
46
46
  writer << dataset.fields
47
47
  dataset.each_array do|row|
48
48
  if(convert_comma)
49
- row.collect!{|v| v.to_s.gsub(".",",")}
49
+ row.collect!{|v| v.to_s.gsub(".",",")}
50
50
  end
51
51
  writer << row
52
52
  end
@@ -54,4 +54,4 @@ module Statsample
54
54
  end
55
55
  end
56
56
  end
57
- end
57
+ end
@@ -175,7 +175,7 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
175
175
  end
176
176
  }
177
177
  end
178
- private :process_row
178
+ private :process_row, :preprocess_row
179
179
 
180
180
  # Returns a dataset based on a xls file
181
181
  # USE: