statsample 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +3 -0
- data/README.txt +7 -3
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/regression.rb +12 -3
- data/lib/statsample/converters.rb +48 -26
- data/lib/statsample/crosstab.rb +6 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +12 -10
- data/lib/statsample/dominanceanalysis.rb +8 -6
- data/lib/statsample/regression/multiple/alglibengine.rb +2 -0
- data/lib/statsample/regression/multiple/gslengine.rb +8 -4
- data/lib/statsample/regression/multiple/rubyengine.rb +2 -1
- data/lib/statsample/regression/multiple.rb +6 -13
- data/lib/statsample/vector.rb +8 -0
- data/lib/statsample.rb +34 -6
- data/po/es/statsample.po +108 -0
- data/po/statsample.pot +113 -0
- data/test/test_anova.rb +2 -1
- data/test/test_codification.rb +2 -1
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.csv +1 -1
- data/test/test_csv.rb +15 -2
- data/test/test_dataset.rb +2 -1
- data/test/test_ggobi.rb +2 -2
- data/test/test_multiset.rb +2 -2
- data/test/test_regression.rb +2 -1
- data/test/test_reliability.rb +2 -1
- data/test/test_resample.rb +2 -1
- data/test/test_srs.rb +2 -1
- data/test/test_statistics.rb +14 -1
- data/test/test_stratified.rb +2 -2
- data/test/test_svg_graph.rb +2 -1
- data/test/test_vector.rb +2 -1
- data/test/test_xls.rb +13 -1
- data/test/test_xls.xls +0 -0
- metadata +10 -7
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
=== 0.3.3 / 2009-08-11
|
2
|
+
* Added i18n support. For now, only spanish translation available
|
3
|
+
* Bug fix: Test now load libraries on ../lib path
|
4
|
+
* Excel and CSV importers automatically modify type of vector to Scale when all data are numbers or nils values
|
5
|
+
*
|
6
|
+
|
1
7
|
=== 0.3.2 / 2009-08-04
|
2
8
|
|
3
9
|
* Added Regression::Multiple::GslEngine
|
data/Manifest.txt
CHANGED
@@ -3,6 +3,7 @@ LICENSE.txt
|
|
3
3
|
Manifest.txt
|
4
4
|
README.txt
|
5
5
|
bin/statsample
|
6
|
+
data/locale/es/LC_MESSAGES/statsample.mo
|
6
7
|
demo/benchmark.rb
|
7
8
|
demo/chi-square.rb
|
8
9
|
demo/crosstab.rb
|
@@ -47,6 +48,8 @@ lib/statsample/resample.rb
|
|
47
48
|
lib/statsample/srs.rb
|
48
49
|
lib/statsample/test.rb
|
49
50
|
lib/statsample/vector.rb
|
51
|
+
po/es/statsample.po
|
52
|
+
po/statsample.pot
|
50
53
|
setup.rb
|
51
54
|
test/_test_chart.rb
|
52
55
|
test/test_anova.rb
|
data/README.txt
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
= Statsample
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
http://ruby-statsample.rubyforge.org/
|
4
|
+
|
5
5
|
|
6
6
|
== DESCRIPTION:
|
7
7
|
|
8
|
-
|
8
|
+
A suite for your basic and advanced statistics needs. Descriptive statistics, multiple regression, dominance analysis, scale's reliability analysis, bivariate statistics and others procedures.
|
9
9
|
|
10
10
|
== FEATURES:
|
11
11
|
|
@@ -63,6 +63,10 @@ Optional:
|
|
63
63
|
* Plotting: gnuplot and rbgnuplot, SVG::Graph
|
64
64
|
* Advanced Statistical: gsl and rb-gsl (http://rb-gsl.rubyforge.org/)
|
65
65
|
|
66
|
+
== DOWNLOAD
|
67
|
+
* Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/
|
68
|
+
* SVN and Wiki: http://code.google.com/p/ruby-statsample/
|
69
|
+
|
66
70
|
== INSTALL:
|
67
71
|
|
68
72
|
sudo gem install ruby-statsample
|
Binary file
|
data/demo/regression.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'benchmark'
|
2
3
|
tests=300
|
3
4
|
include Statsample
|
4
5
|
r = GSL::Rng.alloc(GSL::Rng::TAUS,Time.now.to_i)
|
@@ -24,9 +25,17 @@ if !File.exists? "regression.dab"
|
|
24
25
|
else
|
25
26
|
da=Statsample.load("regression.dab")
|
26
27
|
end
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
times=1
|
29
|
+
Benchmark.bm(7) do |x|
|
30
|
+
x.report("GslEngine:") {
|
31
|
+
da.lr_class=Regression::Multiple::GslEngine
|
32
|
+
da.bootstrap(times)
|
33
|
+
}
|
34
|
+
x.report("AlglibEngine:") {
|
35
|
+
da.lr_class=Regression::Multiple::AlglibEngine
|
36
|
+
da.bootstrap(times)
|
37
|
+
}
|
38
|
+
end
|
30
39
|
|
31
40
|
puts da.summary
|
32
41
|
da.save("regression.dab")
|
@@ -78,7 +78,46 @@ module Statsample
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
end
|
81
|
-
|
81
|
+
class SpreadsheetBase
|
82
|
+
class << self
|
83
|
+
def extract_fields(row)
|
84
|
+
fields=row.to_a.collect{|c| c.downcase}
|
85
|
+
if fields.size!=fields.uniq.size
|
86
|
+
repeated=fields.inject({}) {|a,v|
|
87
|
+
(a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v|k}.join(",")
|
88
|
+
raise "There are some repeated fields on the header:#{repeated}. Please, fix"
|
89
|
+
end
|
90
|
+
fields
|
91
|
+
end
|
92
|
+
|
93
|
+
def process_row(row,empty)
|
94
|
+
row.to_a.collect do |c|
|
95
|
+
if empty.include?(c)
|
96
|
+
nil
|
97
|
+
else
|
98
|
+
if c.is_a? String and c.is_number?
|
99
|
+
if c=~/^\d+$/
|
100
|
+
c.to_i
|
101
|
+
else
|
102
|
+
c.gsub(",",".").to_f
|
103
|
+
end
|
104
|
+
else
|
105
|
+
c
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
def convert_to_scale(ds,fields)
|
111
|
+
fields.each do |f|
|
112
|
+
if ds[f].can_be_scale?
|
113
|
+
ds[f].type=:scale
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
class Excel < SpreadsheetBase
|
82
121
|
class << self
|
83
122
|
def write(dataset,filename)
|
84
123
|
require 'spreadsheet'
|
@@ -101,7 +140,6 @@ module Statsample
|
|
101
140
|
#
|
102
141
|
def read(filename, worksheet_id=0, ignore_lines=0, empty=[''])
|
103
142
|
require 'spreadsheet'
|
104
|
-
|
105
143
|
first_row=true
|
106
144
|
fields=[]
|
107
145
|
fields_data={}
|
@@ -121,35 +159,28 @@ module Statsample
|
|
121
159
|
if c.is_a? Spreadsheet::Formula
|
122
160
|
nil
|
123
161
|
else
|
124
|
-
c
|
162
|
+
c
|
125
163
|
end
|
126
164
|
}
|
127
165
|
if first_row
|
128
|
-
fields=row
|
129
|
-
if fields.size!=fields.uniq.size
|
130
|
-
repeated=fields.inject({}) {|a,v|
|
131
|
-
(a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v|k}.join(",")
|
132
|
-
raise "There are some repeated fields on the header:#{repeated}. Please, fix"
|
133
|
-
end
|
166
|
+
fields=extract_fields(row)
|
134
167
|
ds=Statsample::Dataset.new(fields)
|
135
168
|
first_row=false
|
136
169
|
else
|
137
|
-
rowa=row
|
138
|
-
|
139
|
-
empty.include?(c) ? nil: c
|
140
|
-
}
|
170
|
+
rowa=process_row(row,empty)
|
141
171
|
(fields.size - rowa.size).times {|i|
|
142
172
|
rowa << nil
|
143
173
|
}
|
144
174
|
ds.add_case(rowa,false)
|
145
175
|
end
|
146
176
|
end
|
177
|
+
convert_to_scale(ds,fields)
|
147
178
|
ds.update_valid_data
|
148
179
|
ds
|
149
180
|
end
|
150
181
|
end
|
151
182
|
end
|
152
|
-
|
183
|
+
class CSV < SpreadsheetBase
|
153
184
|
class << self
|
154
185
|
# Returns a Dataset based on a csv file
|
155
186
|
#
|
@@ -157,7 +188,6 @@ module Statsample
|
|
157
188
|
# ds=Statsample::CSV.read("test_csv.csv")
|
158
189
|
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
159
190
|
require 'csv'
|
160
|
-
|
161
191
|
first_row=true
|
162
192
|
fields=[]
|
163
193
|
fields_data={}
|
@@ -173,23 +203,15 @@ module Statsample
|
|
173
203
|
c.to_s
|
174
204
|
}
|
175
205
|
if first_row
|
176
|
-
fields=row
|
177
|
-
if fields.size!=fields.uniq.size
|
178
|
-
repeated=fields.inject({}) {|a,v|
|
179
|
-
(a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v|k}.join(",")
|
180
|
-
|
181
|
-
raise "There are some repeated fields on the header:#{repeated}. Please, fix"
|
182
|
-
end
|
206
|
+
fields=extract_fields(row)
|
183
207
|
ds=Statsample::Dataset.new(fields)
|
184
208
|
first_row=false
|
185
209
|
else
|
186
|
-
rowa=row
|
187
|
-
empty.include?(c) ? nil: c
|
188
|
-
}
|
189
|
-
|
210
|
+
rowa=process_row(row,empty)
|
190
211
|
ds.add_case(rowa,false)
|
191
212
|
end
|
192
213
|
end
|
214
|
+
convert_to_scale(ds,fields)
|
193
215
|
ds.update_valid_data
|
194
216
|
ds
|
195
217
|
end
|
data/lib/statsample/crosstab.rb
CHANGED
@@ -4,6 +4,8 @@ module Statsample
|
|
4
4
|
# The first vector will be at rows and the second will the the columns
|
5
5
|
#
|
6
6
|
class Crosstab
|
7
|
+
include GetText
|
8
|
+
bindtextdomain("statsample")
|
7
9
|
attr_reader :v_rows, :v_cols
|
8
10
|
attr_accessor :row_label, :column_label
|
9
11
|
def initialize(v1,v2)
|
@@ -88,10 +90,10 @@ module Statsample
|
|
88
90
|
total=0
|
89
91
|
total_cols=cn.inject({}) {|a,x| a[x]=0;a}
|
90
92
|
out.add "Chi Square: #{chi_square}\n"
|
91
|
-
out.add
|
92
|
-
out.add
|
93
|
+
out.add(_("Rows: %s\n") % @row_label) unless @row_label.nil?
|
94
|
+
out.add(_("Columns: %s\n") % @column_label) unless @column_label.nil?
|
93
95
|
|
94
|
-
t=Statsample::ReportTable.new([""]+cols_names+["Total"])
|
96
|
+
t=Statsample::ReportTable.new([""]+cols_names+[_("Total")])
|
95
97
|
rn.each{|row|
|
96
98
|
total_row=0
|
97
99
|
t_row=[@v_rows.labeling(row)]
|
@@ -106,7 +108,7 @@ module Statsample
|
|
106
108
|
t.add_row(t_row)
|
107
109
|
}
|
108
110
|
t.add_horizontal_line
|
109
|
-
t_row=["Total"]
|
111
|
+
t_row=[_("Total")]
|
110
112
|
cn.each{|v|
|
111
113
|
t_row.push(total_cols[v])
|
112
114
|
}
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module Statsample
|
2
2
|
class DominanceAnalysis
|
3
3
|
class Bootstrap
|
4
|
+
include GetText
|
4
5
|
include Writable
|
6
|
+
bindtextdomain("statsample")
|
5
7
|
attr_reader :samples_td,:samples_cd,:samples_gd,:samples_ga, :fields
|
6
8
|
attr_writer :lr_class
|
7
9
|
attr_accessor :ds
|
@@ -24,10 +26,10 @@ class DominanceAnalysis
|
|
24
26
|
end
|
25
27
|
@da
|
26
28
|
end
|
27
|
-
def bootstrap(number_samples,n=nil)
|
29
|
+
def bootstrap(number_samples,n=nil,report=false)
|
28
30
|
number_samples.times{ |t|
|
29
31
|
@n_samples+=1
|
30
|
-
puts "Bootstrap
|
32
|
+
puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
|
31
33
|
ds_boot=@ds.bootstrap(n)
|
32
34
|
da_1=DominanceAnalysis.new(ds_boot,@y_var,@lr_class)
|
33
35
|
da_1.total_dominance.each{|k,v|
|
@@ -66,15 +68,15 @@ class DominanceAnalysis
|
|
66
68
|
alfa=0.95
|
67
69
|
t=GSL::Cdf.tdist_Pinv(1-((1-alfa) / 2),@n_samples - 1)
|
68
70
|
out.extend report_type
|
69
|
-
out.add "Summary for Bootstrap Dominance Analysis of "
|
70
|
-
out.add "
|
71
|
+
out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
|
72
|
+
out.add _("Sample size: %d\n") % @n_samples
|
71
73
|
out.add "t:#{t}\n"
|
72
74
|
out.add "Linear Regression Engine: #{@lr_class.name}"
|
73
75
|
out.nl
|
74
76
|
table=ReportTable.new
|
75
|
-
header=["pairs","sD","Dij","SE(Dij)","Pij","Pji","Pno","
|
77
|
+
header=[_("pairs"),"sD","Dij",_("SE(Dij)"),"Pij","Pji","Pno",_("Reproducibility")]
|
76
78
|
table.header=header
|
77
|
-
table.add_row(["Complete dominance"])
|
79
|
+
table.add_row([_("Complete dominance")])
|
78
80
|
table.add_horizontal_line
|
79
81
|
@pairs.each{|pair|
|
80
82
|
std=@samples_td[pair].to_vector(:scale)
|
@@ -82,7 +84,7 @@ class DominanceAnalysis
|
|
82
84
|
table.add_row(summary_pairs(pair,std,ttd))
|
83
85
|
}
|
84
86
|
table.add_horizontal_line
|
85
|
-
table.add_row(["Conditional dominance"])
|
87
|
+
table.add_row([_("Conditional dominance")])
|
86
88
|
table.add_horizontal_line
|
87
89
|
@pairs.each{|pair|
|
88
90
|
std=@samples_cd[pair].to_vector(:scale)
|
@@ -91,7 +93,7 @@ class DominanceAnalysis
|
|
91
93
|
|
92
94
|
}
|
93
95
|
table.add_horizontal_line
|
94
|
-
table.add_row(["General Dominance"])
|
96
|
+
table.add_row([_("General Dominance")])
|
95
97
|
table.add_horizontal_line
|
96
98
|
@pairs.each{|pair|
|
97
99
|
std=@samples_gd[pair].to_vector(:scale)
|
@@ -99,9 +101,9 @@ class DominanceAnalysis
|
|
99
101
|
table.add_row(summary_pairs(pair,std,ttd))
|
100
102
|
}
|
101
103
|
out.parse_table(table)
|
102
|
-
out.add("General averages")
|
104
|
+
out.add(_("General averages"))
|
103
105
|
table=Statsample::ReportTable.new
|
104
|
-
table.header=["var","mean","se","p.5","p.95"]
|
106
|
+
table.header=[_("var"),_("mean"),_("se"),_("p.5"),_("p.95")]
|
105
107
|
@fields.each{|f|
|
106
108
|
v=@samples_ga[f].to_vector(:scale)
|
107
109
|
row=[@ds.vector_label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
3
|
class DominanceAnalysis
|
4
|
+
include GetText
|
5
|
+
bindtextdomain("statsample")
|
4
6
|
def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
|
5
7
|
@y_var=y_var
|
6
8
|
@dy=ds[@y_var]
|
@@ -164,10 +166,10 @@ module Statsample
|
|
164
166
|
def summary(report_type=ConsoleSummary)
|
165
167
|
out=""
|
166
168
|
out.extend report_type
|
167
|
-
out << "Summary for Dominance Analysis of "
|
169
|
+
out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
|
168
170
|
t=Statsample::ReportTable.new
|
169
171
|
t.header=["","r2","sign"]+@fields
|
170
|
-
row=["Model 0","",""]+@fields.collect{|f|
|
172
|
+
row=[_("Model 0"),"",""]+@fields.collect{|f|
|
171
173
|
sprintf("%0.3f",md(f).r2)
|
172
174
|
}
|
173
175
|
t.add_row(row)
|
@@ -181,7 +183,7 @@ module Statsample
|
|
181
183
|
a=average_k(i)
|
182
184
|
if !a.nil?
|
183
185
|
t.add_horizontal_line
|
184
|
-
row=["k
|
186
|
+
row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
|
185
187
|
sprintf("%0.3f",a[f])
|
186
188
|
}
|
187
189
|
t.add_row(row)
|
@@ -194,18 +196,18 @@ module Statsample
|
|
194
196
|
g=general_averages
|
195
197
|
t.add_horizontal_line
|
196
198
|
|
197
|
-
row=["Overall averages","",""]+@fields.collect{|f|
|
199
|
+
row=[_("Overall averages"),"",""]+@fields.collect{|f|
|
198
200
|
sprintf("%0.3f",g[f])
|
199
201
|
}
|
200
202
|
t.add_row(row)
|
201
203
|
out.parse_table(t)
|
202
204
|
|
203
205
|
out.nl
|
204
|
-
out << "Pairwise\n"
|
206
|
+
out << _("Pairwise")+"\n"
|
205
207
|
td=total_dominance
|
206
208
|
cd=conditional_dominance
|
207
209
|
gd=general_dominance
|
208
|
-
t=Statsample::ReportTable.new(["Pairs","T","C","G"])
|
210
|
+
t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
|
209
211
|
pairs.each{|p|
|
210
212
|
name=p.join(" - ")
|
211
213
|
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
@@ -4,6 +4,8 @@ module Regression
|
|
4
4
|
module Multiple
|
5
5
|
# Class for Multiple Regression Analysis
|
6
6
|
# Requires Alglib gem and uses a listwise aproach.
|
7
|
+
# Faster than GslEngine on massive prediction use, because process is c-based.
|
8
|
+
# Prefer GslEngine if you need good memory use.
|
7
9
|
# If you need pairwise, use RubyEngine
|
8
10
|
# Example:
|
9
11
|
#
|
@@ -4,6 +4,8 @@ module Regression
|
|
4
4
|
module Multiple
|
5
5
|
# Class for Multiple Regression Analysis
|
6
6
|
# Requires rbgsl and uses a listwise aproach.
|
7
|
+
# Slower on prediction of values than Alglib, because predict is ruby based.
|
8
|
+
# Better memory management on multiple (+1000) series of regression.
|
7
9
|
# If you need pairwise, use RubyEngine
|
8
10
|
# Example:
|
9
11
|
#
|
@@ -42,9 +44,11 @@ class GslEngine < BaseEngine
|
|
42
44
|
}
|
43
45
|
@dep_columns=columns.dup
|
44
46
|
@lr_s=nil
|
45
|
-
|
46
|
-
@constant
|
47
|
-
@
|
47
|
+
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
48
|
+
@constant=c[constant_col]
|
49
|
+
@coeffs_a=c.to_a.slice(0...constant_col)
|
50
|
+
@coeffs=assign_names(@coeffs_a)
|
51
|
+
c=nil
|
48
52
|
end
|
49
53
|
|
50
54
|
def _dump(i)
|
@@ -56,7 +60,7 @@ class GslEngine < BaseEngine
|
|
56
60
|
end
|
57
61
|
|
58
62
|
def coeffs
|
59
|
-
|
63
|
+
@coeffs
|
60
64
|
end
|
61
65
|
# Coefficients using a constant
|
62
66
|
# Based on http://www.xycoon.com/ols1.htm
|
@@ -2,7 +2,8 @@ module Statsample
|
|
2
2
|
module Regression
|
3
3
|
module Multiple
|
4
4
|
# Pure Ruby Class for Multiple Regression Analysis.
|
5
|
-
# Slower than AlglibEngine, but is pure ruby and
|
5
|
+
# Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values.
|
6
|
+
# Coeffient calculation uses correlation matrix between the vectors
|
6
7
|
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
|
7
8
|
#
|
8
9
|
# Example:
|
@@ -15,14 +15,14 @@ module Regression
|
|
15
15
|
|
16
16
|
|
17
17
|
module Multiple
|
18
|
-
# Creates an object for listwise regression.
|
19
|
-
#
|
18
|
+
# Creates an object for listwise regression.
|
19
|
+
# Alglib is faster, so is prefered over GSL
|
20
20
|
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
21
21
|
def self.listwise(ds,y_var)
|
22
|
-
if
|
23
|
-
GslEngine.new(ds,y_var)
|
24
|
-
elsif HAS_ALGIB
|
22
|
+
if HAS_ALGIB
|
25
23
|
AlglibEngine.new(ds,y_var)
|
24
|
+
elsif HAS_GSL
|
25
|
+
GslEngine.new(ds,y_var)
|
26
26
|
else
|
27
27
|
ds2=ds.dup_only_valid
|
28
28
|
RubyEngine.new(ds2,y_var)
|
@@ -132,14 +132,7 @@ module Multiple
|
|
132
132
|
ds.each{|k,v|
|
133
133
|
ds[k]=v.to_vector(:scale)
|
134
134
|
}
|
135
|
-
|
136
|
-
lr_class=AlglibEngine
|
137
|
-
ds=ds.to_dataset
|
138
|
-
else
|
139
|
-
lr_class=RubyEngine
|
140
|
-
ds=ds.to_dataset.dup_only_valid
|
141
|
-
end
|
142
|
-
lr=lr_class.new(ds,var)
|
135
|
+
lr=Multiple.listwise(ds.to_dataset,var)
|
143
136
|
1-lr.r2
|
144
137
|
end
|
145
138
|
# Tolerances for each coefficient
|
data/lib/statsample/vector.rb
CHANGED
@@ -393,6 +393,14 @@ class Vector < DelegateClass(Array)
|
|
393
393
|
return "INTEGER"
|
394
394
|
end
|
395
395
|
end
|
396
|
+
# Return true if all data is Numeric or nil
|
397
|
+
def can_be_scale?
|
398
|
+
if @data.find {|v| !v.nil? and !v.is_a? Numeric}
|
399
|
+
false
|
400
|
+
else
|
401
|
+
true
|
402
|
+
end
|
403
|
+
end
|
396
404
|
def summary(out="")
|
397
405
|
@delegate.summary(@labels,out)
|
398
406
|
end
|
data/lib/statsample.rb
CHANGED
@@ -29,19 +29,43 @@ class Numeric
|
|
29
29
|
def square ; self * self ; end
|
30
30
|
end
|
31
31
|
|
32
|
+
class String
|
33
|
+
def is_number?
|
34
|
+
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
35
|
+
true
|
36
|
+
else
|
37
|
+
false
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
32
41
|
|
33
42
|
def create_test(*args,&proc)
|
34
43
|
description=args.shift
|
35
44
|
fields=args
|
36
45
|
[description, fields, Proc.new]
|
37
46
|
end
|
38
|
-
|
47
|
+
# Test extensions
|
39
48
|
begin
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
49
|
+
require 'gettext'
|
50
|
+
rescue LoadError
|
51
|
+
def bindtextdomain(d)
|
52
|
+
d
|
53
|
+
end
|
54
|
+
|
55
|
+
# Bored module
|
56
|
+
module GetText
|
57
|
+
def _(t)
|
58
|
+
t
|
59
|
+
end
|
60
|
+
end
|
44
61
|
end
|
62
|
+
|
63
|
+
begin
|
64
|
+
require 'rbgsl'
|
65
|
+
HAS_GSL=true
|
66
|
+
rescue LoadError
|
67
|
+
HAS_GSL=false
|
68
|
+
end
|
45
69
|
begin
|
46
70
|
require 'alglib'
|
47
71
|
HAS_ALGIB=true
|
@@ -66,7 +90,7 @@ end
|
|
66
90
|
# * Dataset: An union of vectors.
|
67
91
|
#
|
68
92
|
module Statsample
|
69
|
-
VERSION = '0.3.
|
93
|
+
VERSION = '0.3.3'
|
70
94
|
SPLIT_TOKEN = ","
|
71
95
|
autoload(:Database, 'statsample/converters')
|
72
96
|
autoload(:Anova, 'statsample/anova')
|
@@ -82,6 +106,10 @@ module Statsample
|
|
82
106
|
autoload(:Reliability, 'statsample/reliability')
|
83
107
|
autoload(:Bivariate, 'statsample/bivariate')
|
84
108
|
autoload(:Multivariate, 'statsample/multivariate')
|
109
|
+
autoload(:Multiset, 'statsample/multiset')
|
110
|
+
autoload(:StratifiedSample, 'statsample/multiset')
|
111
|
+
|
112
|
+
|
85
113
|
autoload(:Regression, 'statsample/regression')
|
86
114
|
autoload(:Test, 'statsample/test')
|
87
115
|
def self.load(filename)
|
data/po/es/statsample.po
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
msgid ""
|
2
|
+
msgstr ""
|
3
|
+
"Project-Id-Version: statsample 0.3.3\n"
|
4
|
+
"POT-Creation-Date: 2009-08-10 11:59-0400\n"
|
5
|
+
"PO-Revision-Date: 2009-08-04 15:57-0300\n"
|
6
|
+
"Last-Translator: Claudio Bustos <clbustos@gmail.com>\n"
|
7
|
+
"Language-Team: Desarrollador\n"
|
8
|
+
"MIME-Version: 1.0\n"
|
9
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
10
|
+
"Content-Transfer-Encoding: 8bit\n"
|
11
|
+
"X-Poedit-Language: Spanish\n"
|
12
|
+
"X-Poedit-SourceCharset: utf-8\n"
|
13
|
+
|
14
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:32
|
15
|
+
msgid "Bootstrap %d of %d"
|
16
|
+
msgstr "Bootstrap: %d de %d"
|
17
|
+
|
18
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:71
|
19
|
+
msgid "Summary for Bootstrap Dominance Analysis of %s on %s\n"
|
20
|
+
msgstr "Resultados del Análisis de Dominancia Bootstrap de %s en %s\n"
|
21
|
+
|
22
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:72
|
23
|
+
msgid "Sample size: %d\n"
|
24
|
+
msgstr "Tamaño de muestra: %d\n"
|
25
|
+
|
26
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
27
|
+
msgid "pairs"
|
28
|
+
msgstr "pares"
|
29
|
+
|
30
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
31
|
+
msgid "SE(Dij)"
|
32
|
+
msgstr "EE(Dij)"
|
33
|
+
|
34
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
35
|
+
msgid "Reproducibility"
|
36
|
+
msgstr "Reproducibilidad"
|
37
|
+
|
38
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:79
|
39
|
+
msgid "Complete dominance"
|
40
|
+
msgstr "Dominancia Completa"
|
41
|
+
|
42
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:87
|
43
|
+
msgid "Conditional dominance"
|
44
|
+
msgstr "Dominancia Condicional"
|
45
|
+
|
46
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:96
|
47
|
+
msgid "General Dominance"
|
48
|
+
msgstr "Dominancia General"
|
49
|
+
|
50
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:104
|
51
|
+
msgid "General averages"
|
52
|
+
msgstr "Promedios generales"
|
53
|
+
|
54
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
55
|
+
msgid "var"
|
56
|
+
msgstr "var"
|
57
|
+
|
58
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
59
|
+
msgid "mean"
|
60
|
+
msgstr "promedio"
|
61
|
+
|
62
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
63
|
+
msgid "se"
|
64
|
+
msgstr "de"
|
65
|
+
|
66
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
67
|
+
msgid "p.5"
|
68
|
+
msgstr "p.5"
|
69
|
+
|
70
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
71
|
+
msgid "p.95"
|
72
|
+
msgstr "p.95"
|
73
|
+
|
74
|
+
#: lib/statsample/crosstab.rb:93
|
75
|
+
msgid "Rows: %s\n"
|
76
|
+
msgstr ""
|
77
|
+
|
78
|
+
#: lib/statsample/crosstab.rb:94
|
79
|
+
msgid "Columns: %s\n"
|
80
|
+
msgstr ""
|
81
|
+
|
82
|
+
#: lib/statsample/crosstab.rb:96 lib/statsample/crosstab.rb:111
|
83
|
+
msgid "Total"
|
84
|
+
msgstr ""
|
85
|
+
|
86
|
+
#: lib/statsample/dominanceanalysis.rb:169
|
87
|
+
msgid "Summary for Dominance Analysis of %s on %s\n"
|
88
|
+
msgstr "Resultado del Análisis de Dominancia de %s en %s\n"
|
89
|
+
|
90
|
+
#: lib/statsample/dominanceanalysis.rb:172
|
91
|
+
msgid "Model 0"
|
92
|
+
msgstr "Modelo 0"
|
93
|
+
|
94
|
+
#: lib/statsample/dominanceanalysis.rb:186
|
95
|
+
msgid "k=%d Average"
|
96
|
+
msgstr "k=%d Promedio"
|
97
|
+
|
98
|
+
#: lib/statsample/dominanceanalysis.rb:199
|
99
|
+
msgid "Overall averages"
|
100
|
+
msgstr "Promedios generales"
|
101
|
+
|
102
|
+
#: lib/statsample/dominanceanalysis.rb:206
|
103
|
+
msgid "Pairwise"
|
104
|
+
msgstr "De a pares"
|
105
|
+
|
106
|
+
#: lib/statsample/dominanceanalysis.rb:210
|
107
|
+
msgid "Pairs"
|
108
|
+
msgstr "Pares"
|
data/po/statsample.pot
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# Statsample po template.
|
2
|
+
# Copyright (C) 2009-2009 Claudio Bustos
|
3
|
+
# This file is distributed under the same license as the Statsample package.
|
4
|
+
# Claudio Bustos <clbustos_AT_gmail.com>
|
5
|
+
#
|
6
|
+
#, fuzzy
|
7
|
+
msgid ""
|
8
|
+
msgstr ""
|
9
|
+
"Project-Id-Version: statsample 0.3.3\n"
|
10
|
+
"POT-Creation-Date: 2009-08-10 11:59-0400\n"
|
11
|
+
"PO-Revision-Date: 2009-08-04 15:36-0400\n"
|
12
|
+
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
13
|
+
"Language-Team: LANGUAGE <LL@li.org>\n"
|
14
|
+
"MIME-Version: 1.0\n"
|
15
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
16
|
+
"Content-Transfer-Encoding: 8bit\n"
|
17
|
+
"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"
|
18
|
+
|
19
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:32
|
20
|
+
msgid "Bootstrap %d of %d"
|
21
|
+
msgstr ""
|
22
|
+
|
23
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:71
|
24
|
+
msgid "Summary for Bootstrap Dominance Analysis of %s on %s\n"
|
25
|
+
msgstr ""
|
26
|
+
|
27
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:72
|
28
|
+
msgid "Sample size: %d\n"
|
29
|
+
msgstr ""
|
30
|
+
|
31
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
32
|
+
msgid "pairs"
|
33
|
+
msgstr ""
|
34
|
+
|
35
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
36
|
+
msgid "SE(Dij)"
|
37
|
+
msgstr ""
|
38
|
+
|
39
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
40
|
+
msgid "Reproducibility"
|
41
|
+
msgstr ""
|
42
|
+
|
43
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:79
|
44
|
+
msgid "Complete dominance"
|
45
|
+
msgstr ""
|
46
|
+
|
47
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:87
|
48
|
+
msgid "Conditional dominance"
|
49
|
+
msgstr ""
|
50
|
+
|
51
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:96
|
52
|
+
msgid "General Dominance"
|
53
|
+
msgstr ""
|
54
|
+
|
55
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:104
|
56
|
+
msgid "General averages"
|
57
|
+
msgstr ""
|
58
|
+
|
59
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
60
|
+
msgid "var"
|
61
|
+
msgstr ""
|
62
|
+
|
63
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
64
|
+
msgid "mean"
|
65
|
+
msgstr ""
|
66
|
+
|
67
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
68
|
+
msgid "se"
|
69
|
+
msgstr ""
|
70
|
+
|
71
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
72
|
+
msgid "p.5"
|
73
|
+
msgstr ""
|
74
|
+
|
75
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
76
|
+
msgid "p.95"
|
77
|
+
msgstr ""
|
78
|
+
|
79
|
+
#: lib/statsample/crosstab.rb:93
|
80
|
+
msgid "Rows: %s\n"
|
81
|
+
msgstr ""
|
82
|
+
|
83
|
+
#: lib/statsample/crosstab.rb:94
|
84
|
+
msgid "Columns: %s\n"
|
85
|
+
msgstr ""
|
86
|
+
|
87
|
+
#: lib/statsample/crosstab.rb:96 lib/statsample/crosstab.rb:111
|
88
|
+
msgid "Total"
|
89
|
+
msgstr ""
|
90
|
+
|
91
|
+
#: lib/statsample/dominanceanalysis.rb:169
|
92
|
+
msgid "Summary for Dominance Analysis of %s on %s\n"
|
93
|
+
msgstr ""
|
94
|
+
|
95
|
+
#: lib/statsample/dominanceanalysis.rb:172
|
96
|
+
msgid "Model 0"
|
97
|
+
msgstr ""
|
98
|
+
|
99
|
+
#: lib/statsample/dominanceanalysis.rb:186
|
100
|
+
msgid "k=%d Average"
|
101
|
+
msgstr ""
|
102
|
+
|
103
|
+
#: lib/statsample/dominanceanalysis.rb:199
|
104
|
+
msgid "Overall averages"
|
105
|
+
msgstr ""
|
106
|
+
|
107
|
+
#: lib/statsample/dominanceanalysis.rb:206
|
108
|
+
msgid "Pairwise"
|
109
|
+
msgstr ""
|
110
|
+
|
111
|
+
#: lib/statsample/dominanceanalysis.rb:210
|
112
|
+
msgid "Pairs"
|
113
|
+
msgstr ""
|
data/test/test_anova.rb
CHANGED
data/test/test_codification.rb
CHANGED
data/test/test_crosstab.rb
CHANGED
data/test/test_csv.csv
CHANGED
data/test/test_csv.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
2
3
|
require 'tmpdir'
|
3
4
|
require 'test/unit'
|
4
5
|
|
@@ -8,8 +9,20 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
|
|
8
9
|
super
|
9
10
|
end
|
10
11
|
def test_read
|
11
|
-
|
12
|
+
assert_equal(6,@ds.cases)
|
12
13
|
assert_equal(%w{id name age city a1},@ds.fields)
|
14
|
+
id=[1,2,3,4,5,6].to_vector(:scale)
|
15
|
+
name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
|
16
|
+
age=[20,23,25,27,5.5,nil].to_vector(:scale)
|
17
|
+
city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
|
18
|
+
a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
|
19
|
+
ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
|
20
|
+
ds_exp.fields.each{|f|
|
21
|
+
assert_equal(ds_exp[f],@ds[f])
|
22
|
+
}
|
23
|
+
assert_equal(ds_exp,@ds)
|
24
|
+
|
25
|
+
|
13
26
|
end
|
14
27
|
def test_nil
|
15
28
|
assert_equal(nil,@ds['age'][5])
|
data/test/test_dataset.rb
CHANGED
data/test/test_ggobi.rb
CHANGED
data/test/test_multiset.rb
CHANGED
data/test/test_regression.rb
CHANGED
data/test/test_reliability.rb
CHANGED
data/test/test_resample.rb
CHANGED
data/test/test_srs.rb
CHANGED
data/test/test_statistics.rb
CHANGED
@@ -1,10 +1,23 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
2
3
|
require 'test/unit'
|
3
4
|
class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
4
5
|
|
5
6
|
def initialize(*args)
|
6
7
|
super
|
7
8
|
end
|
9
|
+
def test_is_number
|
10
|
+
assert("10".is_number?)
|
11
|
+
assert("-10".is_number?)
|
12
|
+
assert("0.1".is_number?)
|
13
|
+
assert("-0.1".is_number?)
|
14
|
+
assert("10e3".is_number?)
|
15
|
+
assert("10e-3".is_number?)
|
16
|
+
assert(!"1212-1212-1".is_number?)
|
17
|
+
assert(!"a10".is_number?)
|
18
|
+
assert(!"".is_number?)
|
19
|
+
|
20
|
+
end
|
8
21
|
def test_chi_square
|
9
22
|
assert_raise TypeError do
|
10
23
|
Statsample::Test.chi_square(1,1)
|
data/test/test_stratified.rb
CHANGED
data/test/test_svg_graph.rb
CHANGED
data/test/test_vector.rb
CHANGED
data/test/test_xls.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
2
3
|
require 'test/unit'
|
3
4
|
require 'tmpdir'
|
4
5
|
begin
|
@@ -15,6 +16,17 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
|
|
15
16
|
def test_read
|
16
17
|
assert_equal(6,@ds.cases)
|
17
18
|
assert_equal(%w{id name age city a1},@ds.fields)
|
19
|
+
id=[1,2,3,4,5,6].to_vector(:scale)
|
20
|
+
name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
|
21
|
+
age=[20,23,25,nil,5.5,nil].to_vector(:scale)
|
22
|
+
city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
|
23
|
+
a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
|
24
|
+
ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
|
25
|
+
ds_exp.fields.each{|f|
|
26
|
+
assert_equal(ds_exp[f],@ds[f])
|
27
|
+
}
|
28
|
+
assert_equal(ds_exp,@ds)
|
29
|
+
|
18
30
|
end
|
19
31
|
def test_nil
|
20
32
|
assert_equal(nil,@ds['age'][5])
|
data/test/test_xls.xls
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-11 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.0.0
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: hoe
|
@@ -40,9 +40,9 @@ dependencies:
|
|
40
40
|
requirements:
|
41
41
|
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 2.3.
|
43
|
+
version: 2.3.3
|
44
44
|
version:
|
45
|
-
description:
|
45
|
+
description: A suite for your basic and advanced statistics needs. Descriptive statistics, multiple regression, dominance analysis, scale's reliability analysis, bivariate statistics and others procedures.
|
46
46
|
email:
|
47
47
|
- clbustos@gmail.com
|
48
48
|
executables:
|
@@ -60,6 +60,7 @@ files:
|
|
60
60
|
- Manifest.txt
|
61
61
|
- README.txt
|
62
62
|
- bin/statsample
|
63
|
+
- data/locale/es/LC_MESSAGES/statsample.mo
|
63
64
|
- demo/benchmark.rb
|
64
65
|
- demo/chi-square.rb
|
65
66
|
- demo/crosstab.rb
|
@@ -104,6 +105,8 @@ files:
|
|
104
105
|
- lib/statsample/srs.rb
|
105
106
|
- lib/statsample/test.rb
|
106
107
|
- lib/statsample/vector.rb
|
108
|
+
- po/es/statsample.po
|
109
|
+
- po/statsample.pot
|
107
110
|
- setup.rb
|
108
111
|
- test/_test_chart.rb
|
109
112
|
- test/test_anova.rb
|
@@ -125,7 +128,7 @@ files:
|
|
125
128
|
- test/test_xls.rb
|
126
129
|
- test/test_xls.xls
|
127
130
|
has_rdoc: true
|
128
|
-
homepage: http://rubyforge.org/
|
131
|
+
homepage: http://ruby-statsample.rubyforge.org/
|
129
132
|
licenses: []
|
130
133
|
|
131
134
|
post_install_message:
|
@@ -152,7 +155,7 @@ rubyforge_project: ruby-statsample
|
|
152
155
|
rubygems_version: 1.3.5
|
153
156
|
signing_key:
|
154
157
|
specification_version: 3
|
155
|
-
summary:
|
158
|
+
summary: A suite for your basic and advanced statistics needs
|
156
159
|
test_files:
|
157
160
|
- test/test_anova.rb
|
158
161
|
- test/test_codification.rb
|