statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
@@ -1,32 +1,32 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
|
3
3
|
module Statsample
|
4
|
-
# This module aids to code open questions
|
5
|
-
# * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
|
6
|
-
# * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
|
7
|
-
# * Recode the vectors, loading the yaml file:
|
8
|
-
# * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
|
9
|
-
# * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
|
10
|
-
#
|
11
|
-
# Usage:
|
12
|
-
# recode_file="recodification.yaml"
|
13
|
-
# phase=:first # flag
|
14
|
-
# if phase==:first
|
15
|
-
# File.open(recode_file,"w") {|fp|
|
16
|
-
# Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
|
17
|
-
# }
|
18
|
-
# # Edit the file recodification.yaml and verify changes
|
19
|
-
# elsif phase==:second
|
20
|
-
# File.open(recode_file,"r") {|fp|
|
21
|
-
# Statsample::Codification.verify(fp,['vector1'])
|
22
|
-
# }
|
23
|
-
# # Add new vectors to the dataset
|
24
|
-
# elsif phase==:third
|
25
|
-
# File.open(recode_file,"r") {|fp|
|
26
|
-
# Statsample::Codification.recode_dataset_split!(ds,fp,"*")
|
27
|
-
# }
|
28
|
-
# end
|
29
|
-
#
|
4
|
+
# This module aids to code open questions
|
5
|
+
# * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
|
6
|
+
# * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
|
7
|
+
# * Recode the vectors, loading the yaml file:
|
8
|
+
# * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
|
9
|
+
# * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
|
10
|
+
#
|
11
|
+
# Usage:
|
12
|
+
# recode_file="recodification.yaml"
|
13
|
+
# phase=:first # flag
|
14
|
+
# if phase==:first
|
15
|
+
# File.open(recode_file,"w") {|fp|
|
16
|
+
# Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
|
17
|
+
# }
|
18
|
+
# # Edit the file recodification.yaml and verify changes
|
19
|
+
# elsif phase==:second
|
20
|
+
# File.open(recode_file,"r") {|fp|
|
21
|
+
# Statsample::Codification.verify(fp,['vector1'])
|
22
|
+
# }
|
23
|
+
# # Add new vectors to the dataset
|
24
|
+
# elsif phase==:third
|
25
|
+
# File.open(recode_file,"r") {|fp|
|
26
|
+
# Statsample::Codification.recode_dataset_split!(ds,fp,"*")
|
27
|
+
# }
|
28
|
+
# end
|
29
|
+
#
|
30
30
|
module Codification
|
31
31
|
class << self
|
32
32
|
# Create a hash, based on vectors, to create the dictionary.
|
@@ -38,7 +38,7 @@ module Statsample
|
|
38
38
|
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
|
39
39
|
v=dataset[v_name]
|
40
40
|
split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
|
41
|
-
|
41
|
+
|
42
42
|
factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
|
43
43
|
h[v_name]=factors
|
44
44
|
h
|
@@ -48,7 +48,7 @@ module Statsample
|
|
48
48
|
# Create a yaml to create a dictionary, based on vectors
|
49
49
|
# The keys will be vectors name on dataset and the values
|
50
50
|
# will be hashes, with keys = values, for recodification
|
51
|
-
#
|
51
|
+
#
|
52
52
|
# v1=%w{a,b b,c d}.to_vector
|
53
53
|
# ds={"v1"=>v1}.to_dataset
|
54
54
|
# Statsample::Codification.create_yaml(ds,['v1'])
|
@@ -63,7 +63,7 @@ module Statsample
|
|
63
63
|
# * field: name of vector
|
64
64
|
# * original: original name
|
65
65
|
# * recoded: new code
|
66
|
-
|
66
|
+
|
67
67
|
def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
|
68
68
|
require 'spreadsheet'
|
69
69
|
if File.exists?(filename)
|
@@ -98,7 +98,7 @@ module Statsample
|
|
98
98
|
end
|
99
99
|
h
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
|
103
103
|
h.inject({}) do |a,v|
|
104
104
|
v[1].split(sep).each do |val|
|
@@ -108,11 +108,11 @@ module Statsample
|
|
108
108
|
a
|
109
109
|
end
|
110
110
|
end
|
111
|
-
|
111
|
+
|
112
112
|
def dictionary(h, sep=Statsample::SPLIT_TOKEN)
|
113
113
|
h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
|
114
114
|
end
|
115
|
-
|
115
|
+
|
116
116
|
def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
|
117
117
|
dict=dictionary(h,sep)
|
118
118
|
new_data=v.splitted(sep)
|
@@ -125,45 +125,45 @@ module Statsample
|
|
125
125
|
end
|
126
126
|
end
|
127
127
|
def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
128
|
+
_recode_dataset(dataset,dictionary_hash ,sep,false)
|
129
|
+
end
|
130
|
+
def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
|
131
|
+
_recode_dataset(dataset, dictionary_hash, sep,true)
|
132
|
+
end
|
133
|
+
|
134
|
+
def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
|
135
|
+
v_names||=h.keys
|
136
|
+
v_names.each do |v_name|
|
137
|
+
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
|
138
|
+
recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
|
139
|
+
if c.nil?
|
140
|
+
nil
|
141
|
+
else
|
142
|
+
c.join(sep)
|
143
|
+
end
|
144
|
+
}.to_vector
|
145
|
+
if(split)
|
146
146
|
recoded.split_by_separator(sep).each {|k,v|
|
147
147
|
dataset[v_name+"_"+k]=v
|
148
148
|
}
|
149
|
-
|
150
|
-
|
151
|
-
end
|
149
|
+
else
|
150
|
+
dataset[v_name+"_recoded"]=recoded
|
152
151
|
end
|
153
152
|
end
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
}
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
|
157
|
+
require 'pp'
|
158
|
+
v_names||=h.keys
|
159
|
+
v_names.each{|v_name|
|
160
|
+
inverse=inverse_hash(h[v_name],sep)
|
161
|
+
io.puts "- Field: #{v_name}"
|
162
|
+
inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
|
163
|
+
io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
|
165
164
|
}
|
166
|
-
|
165
|
+
}
|
166
|
+
end
|
167
167
|
end
|
168
168
|
end
|
169
169
|
end
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Statsample
|
2
2
|
# Combination class systematically generates all combinations of n elements, taken r at a time.
|
3
3
|
# With rbgsl, GSL::Combination is available for extra speed
|
4
|
-
#
|
5
|
-
# Use:
|
4
|
+
# == Use:
|
6
5
|
# comb=Statsample::Combination.new(3,5)
|
7
6
|
# => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
|
8
7
|
# comb.each{|c| p c }
|
@@ -16,23 +15,25 @@ module Statsample
|
|
16
15
|
# [1, 2, 4]
|
17
16
|
# [1, 3, 4]
|
18
17
|
# [2, 3, 4]
|
18
|
+
# == Reference:
|
19
|
+
# * http://snippets.dzone.com/posts/show/4666
|
19
20
|
#
|
20
21
|
class Combination
|
21
22
|
attr_reader :d
|
22
23
|
def initialize(k,n,only_ruby=false)
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
24
|
+
@k=k
|
25
|
+
@n=n
|
26
|
+
if Statsample.has_gsl? and !only_ruby
|
27
|
+
@d=CombinationGsl.new(@k,@n)
|
28
|
+
else
|
29
|
+
@d=CombinationRuby.new(@k,@n)
|
30
|
+
end
|
30
31
|
end
|
31
32
|
def each
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
reset
|
34
|
+
while a=next_value
|
35
|
+
yield a
|
36
|
+
end
|
36
37
|
end
|
37
38
|
def reset
|
38
39
|
@d.reset
|
@@ -43,70 +44,70 @@ module Statsample
|
|
43
44
|
class CombinationRuby # :nodoc:
|
44
45
|
attr_reader :data
|
45
46
|
def initialize(k,n)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
raise "k<=n" if k>n
|
48
|
+
@k=k
|
49
|
+
@n=n
|
50
|
+
reset
|
50
51
|
end
|
51
52
|
def reset
|
52
|
-
|
53
|
-
|
53
|
+
@data=[]
|
54
|
+
(0...@k).each {|i| @data[i] = i }
|
54
55
|
end
|
55
56
|
def each
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
reset
|
58
|
+
while a=next_value
|
59
|
+
yield a
|
60
|
+
end
|
60
61
|
end
|
61
62
|
def next_value
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
63
|
+
return false if !@data
|
64
|
+
old_comb=@data.dup
|
65
|
+
i = @k - 1;
|
66
|
+
@data[i]+=1
|
67
|
+
while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
|
68
|
+
i-=1;
|
69
|
+
@data[i]+=1;
|
70
|
+
end
|
71
|
+
|
72
|
+
if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
|
73
|
+
@data=false # No more combinations can be generated
|
74
|
+
else
|
75
|
+
# comb now looks like (..., x, n, n, n, ..., n).
|
76
|
+
# Turn it into (..., x, x + 1, x + 2, ...)
|
77
|
+
i = i+1
|
78
|
+
(i...@k).each{ |i1|
|
79
|
+
@data[i1] = @data[i1 - 1] + 1
|
80
|
+
}
|
81
|
+
end
|
82
|
+
return old_comb
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
85
86
|
# rb-gsl engine for Combinations
|
86
87
|
class CombinationGsl # :nodoc:
|
87
88
|
def initialize(k,n)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
89
|
+
require 'gsl'
|
90
|
+
raise "k<=n" if k>n
|
91
|
+
@k=k
|
92
|
+
@n=n
|
93
|
+
reset
|
93
94
|
end
|
94
95
|
def reset
|
95
|
-
|
96
|
+
@c= ::GSL::Combination.calloc(@n, @k);
|
96
97
|
end
|
97
98
|
def next_value
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
99
|
+
return false if !@c
|
100
|
+
data=@c.data.to_a
|
101
|
+
if @c.next != GSL::SUCCESS
|
102
|
+
@c=false
|
103
|
+
end
|
104
|
+
return data
|
104
105
|
end
|
105
106
|
def each
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
107
|
+
reset
|
108
|
+
begin
|
109
|
+
yield @c.data.to_a
|
110
|
+
end while @c.next == GSL::SUCCESS
|
110
111
|
end
|
111
112
|
end
|
112
113
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Statsample
|
2
2
|
class CSV < SpreadsheetBase
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
class << self
|
4
|
+
# Returns a Dataset based on a csv file
|
5
|
+
#
|
6
|
+
# USE:
|
7
|
+
# ds=Statsample::CSV.read("test_csv.csv")
|
8
8
|
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
9
|
require 'csv'
|
10
10
|
first_row=true
|
@@ -36,17 +36,17 @@ module Statsample
|
|
36
36
|
ds.update_valid_data
|
37
37
|
ds
|
38
38
|
end
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
# Save a Dataset on a csv file
|
40
|
+
#
|
41
|
+
# USE:
|
42
|
+
# Statsample::CSV.write(ds,"test_csv.csv")
|
43
43
|
def write(dataset,filename, convert_comma=false,*opts)
|
44
|
-
require 'csv'
|
44
|
+
require 'csv'
|
45
45
|
writer=::CSV.open(filename,'w',*opts)
|
46
46
|
writer << dataset.fields
|
47
47
|
dataset.each_array do|row|
|
48
48
|
if(convert_comma)
|
49
|
-
|
49
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
50
50
|
end
|
51
51
|
writer << row
|
52
52
|
end
|
@@ -54,4 +54,4 @@ module Statsample
|
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
57
|
-
end
|
57
|
+
end
|