statsample 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +3 -0
- data/README.txt +7 -3
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/regression.rb +12 -3
- data/lib/statsample/converters.rb +48 -26
- data/lib/statsample/crosstab.rb +6 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +12 -10
- data/lib/statsample/dominanceanalysis.rb +8 -6
- data/lib/statsample/regression/multiple/alglibengine.rb +2 -0
- data/lib/statsample/regression/multiple/gslengine.rb +8 -4
- data/lib/statsample/regression/multiple/rubyengine.rb +2 -1
- data/lib/statsample/regression/multiple.rb +6 -13
- data/lib/statsample/vector.rb +8 -0
- data/lib/statsample.rb +34 -6
- data/po/es/statsample.po +108 -0
- data/po/statsample.pot +113 -0
- data/test/test_anova.rb +2 -1
- data/test/test_codification.rb +2 -1
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.csv +1 -1
- data/test/test_csv.rb +15 -2
- data/test/test_dataset.rb +2 -1
- data/test/test_ggobi.rb +2 -2
- data/test/test_multiset.rb +2 -2
- data/test/test_regression.rb +2 -1
- data/test/test_reliability.rb +2 -1
- data/test/test_resample.rb +2 -1
- data/test/test_srs.rb +2 -1
- data/test/test_statistics.rb +14 -1
- data/test/test_stratified.rb +2 -2
- data/test/test_svg_graph.rb +2 -1
- data/test/test_vector.rb +2 -1
- data/test/test_xls.rb +13 -1
- data/test/test_xls.xls +0 -0
- metadata +10 -7
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
=== 0.3.3 / 2009-08-11
|
2
|
+
* Added i18n support. For now, only spanish translation available
|
3
|
+
* Bug fix: Test now load libraries on ../lib path
|
4
|
+
* Excel and CSV importers automatically modify type of vector to Scale when all data are numbers or nils values
|
5
|
+
*
|
6
|
+
|
1
7
|
=== 0.3.2 / 2009-08-04
|
2
8
|
|
3
9
|
* Added Regression::Multiple::GslEngine
|
data/Manifest.txt
CHANGED
@@ -3,6 +3,7 @@ LICENSE.txt
|
|
3
3
|
Manifest.txt
|
4
4
|
README.txt
|
5
5
|
bin/statsample
|
6
|
+
data/locale/es/LC_MESSAGES/statsample.mo
|
6
7
|
demo/benchmark.rb
|
7
8
|
demo/chi-square.rb
|
8
9
|
demo/crosstab.rb
|
@@ -47,6 +48,8 @@ lib/statsample/resample.rb
|
|
47
48
|
lib/statsample/srs.rb
|
48
49
|
lib/statsample/test.rb
|
49
50
|
lib/statsample/vector.rb
|
51
|
+
po/es/statsample.po
|
52
|
+
po/statsample.pot
|
50
53
|
setup.rb
|
51
54
|
test/_test_chart.rb
|
52
55
|
test/test_anova.rb
|
data/README.txt
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
= Statsample
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
http://ruby-statsample.rubyforge.org/
|
4
|
+
|
5
5
|
|
6
6
|
== DESCRIPTION:
|
7
7
|
|
8
|
-
|
8
|
+
A suite for your basic and advanced statistics needs. Descriptive statistics, multiple regression, dominance analysis, scale's reliability analysis, bivariate statistics and others procedures.
|
9
9
|
|
10
10
|
== FEATURES:
|
11
11
|
|
@@ -63,6 +63,10 @@ Optional:
|
|
63
63
|
* Plotting: gnuplot and rbgnuplot, SVG::Graph
|
64
64
|
* Advanced Statistical: gsl and rb-gsl (http://rb-gsl.rubyforge.org/)
|
65
65
|
|
66
|
+
== DOWNLOAD
|
67
|
+
* Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/
|
68
|
+
* SVN and Wiki: http://code.google.com/p/ruby-statsample/
|
69
|
+
|
66
70
|
== INSTALL:
|
67
71
|
|
68
72
|
sudo gem install ruby-statsample
|
Binary file
|
data/demo/regression.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'benchmark'
|
2
3
|
tests=300
|
3
4
|
include Statsample
|
4
5
|
r = GSL::Rng.alloc(GSL::Rng::TAUS,Time.now.to_i)
|
@@ -24,9 +25,17 @@ if !File.exists? "regression.dab"
|
|
24
25
|
else
|
25
26
|
da=Statsample.load("regression.dab")
|
26
27
|
end
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
times=1
|
29
|
+
Benchmark.bm(7) do |x|
|
30
|
+
x.report("GslEngine:") {
|
31
|
+
da.lr_class=Regression::Multiple::GslEngine
|
32
|
+
da.bootstrap(times)
|
33
|
+
}
|
34
|
+
x.report("AlglibEngine:") {
|
35
|
+
da.lr_class=Regression::Multiple::AlglibEngine
|
36
|
+
da.bootstrap(times)
|
37
|
+
}
|
38
|
+
end
|
30
39
|
|
31
40
|
puts da.summary
|
32
41
|
da.save("regression.dab")
|
@@ -78,7 +78,46 @@ module Statsample
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
end
|
81
|
-
|
81
|
+
class SpreadsheetBase
|
82
|
+
class << self
|
83
|
+
def extract_fields(row)
|
84
|
+
fields=row.to_a.collect{|c| c.downcase}
|
85
|
+
if fields.size!=fields.uniq.size
|
86
|
+
repeated=fields.inject({}) {|a,v|
|
87
|
+
(a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v|k}.join(",")
|
88
|
+
raise "There are some repeated fields on the header:#{repeated}. Please, fix"
|
89
|
+
end
|
90
|
+
fields
|
91
|
+
end
|
92
|
+
|
93
|
+
def process_row(row,empty)
|
94
|
+
row.to_a.collect do |c|
|
95
|
+
if empty.include?(c)
|
96
|
+
nil
|
97
|
+
else
|
98
|
+
if c.is_a? String and c.is_number?
|
99
|
+
if c=~/^\d+$/
|
100
|
+
c.to_i
|
101
|
+
else
|
102
|
+
c.gsub(",",".").to_f
|
103
|
+
end
|
104
|
+
else
|
105
|
+
c
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
def convert_to_scale(ds,fields)
|
111
|
+
fields.each do |f|
|
112
|
+
if ds[f].can_be_scale?
|
113
|
+
ds[f].type=:scale
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
class Excel < SpreadsheetBase
|
82
121
|
class << self
|
83
122
|
def write(dataset,filename)
|
84
123
|
require 'spreadsheet'
|
@@ -101,7 +140,6 @@ module Statsample
|
|
101
140
|
#
|
102
141
|
def read(filename, worksheet_id=0, ignore_lines=0, empty=[''])
|
103
142
|
require 'spreadsheet'
|
104
|
-
|
105
143
|
first_row=true
|
106
144
|
fields=[]
|
107
145
|
fields_data={}
|
@@ -121,35 +159,28 @@ module Statsample
|
|
121
159
|
if c.is_a? Spreadsheet::Formula
|
122
160
|
nil
|
123
161
|
else
|
124
|
-
c
|
162
|
+
c
|
125
163
|
end
|
126
164
|
}
|
127
165
|
if first_row
|
128
|
-
fields=row
|
129
|
-
if fields.size!=fields.uniq.size
|
130
|
-
repeated=fields.inject({}) {|a,v|
|
131
|
-
(a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v|k}.join(",")
|
132
|
-
raise "There are some repeated fields on the header:#{repeated}. Please, fix"
|
133
|
-
end
|
166
|
+
fields=extract_fields(row)
|
134
167
|
ds=Statsample::Dataset.new(fields)
|
135
168
|
first_row=false
|
136
169
|
else
|
137
|
-
rowa=row
|
138
|
-
|
139
|
-
empty.include?(c) ? nil: c
|
140
|
-
}
|
170
|
+
rowa=process_row(row,empty)
|
141
171
|
(fields.size - rowa.size).times {|i|
|
142
172
|
rowa << nil
|
143
173
|
}
|
144
174
|
ds.add_case(rowa,false)
|
145
175
|
end
|
146
176
|
end
|
177
|
+
convert_to_scale(ds,fields)
|
147
178
|
ds.update_valid_data
|
148
179
|
ds
|
149
180
|
end
|
150
181
|
end
|
151
182
|
end
|
152
|
-
|
183
|
+
class CSV < SpreadsheetBase
|
153
184
|
class << self
|
154
185
|
# Returns a Dataset based on a csv file
|
155
186
|
#
|
@@ -157,7 +188,6 @@ module Statsample
|
|
157
188
|
# ds=Statsample::CSV.read("test_csv.csv")
|
158
189
|
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
159
190
|
require 'csv'
|
160
|
-
|
161
191
|
first_row=true
|
162
192
|
fields=[]
|
163
193
|
fields_data={}
|
@@ -173,23 +203,15 @@ module Statsample
|
|
173
203
|
c.to_s
|
174
204
|
}
|
175
205
|
if first_row
|
176
|
-
fields=row
|
177
|
-
if fields.size!=fields.uniq.size
|
178
|
-
repeated=fields.inject({}) {|a,v|
|
179
|
-
(a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v|k}.join(",")
|
180
|
-
|
181
|
-
raise "There are some repeated fields on the header:#{repeated}. Please, fix"
|
182
|
-
end
|
206
|
+
fields=extract_fields(row)
|
183
207
|
ds=Statsample::Dataset.new(fields)
|
184
208
|
first_row=false
|
185
209
|
else
|
186
|
-
rowa=row
|
187
|
-
empty.include?(c) ? nil: c
|
188
|
-
}
|
189
|
-
|
210
|
+
rowa=process_row(row,empty)
|
190
211
|
ds.add_case(rowa,false)
|
191
212
|
end
|
192
213
|
end
|
214
|
+
convert_to_scale(ds,fields)
|
193
215
|
ds.update_valid_data
|
194
216
|
ds
|
195
217
|
end
|
data/lib/statsample/crosstab.rb
CHANGED
@@ -4,6 +4,8 @@ module Statsample
|
|
4
4
|
# The first vector will be at rows and the second will the the columns
|
5
5
|
#
|
6
6
|
class Crosstab
|
7
|
+
include GetText
|
8
|
+
bindtextdomain("statsample")
|
7
9
|
attr_reader :v_rows, :v_cols
|
8
10
|
attr_accessor :row_label, :column_label
|
9
11
|
def initialize(v1,v2)
|
@@ -88,10 +90,10 @@ module Statsample
|
|
88
90
|
total=0
|
89
91
|
total_cols=cn.inject({}) {|a,x| a[x]=0;a}
|
90
92
|
out.add "Chi Square: #{chi_square}\n"
|
91
|
-
out.add
|
92
|
-
out.add
|
93
|
+
out.add(_("Rows: %s\n") % @row_label) unless @row_label.nil?
|
94
|
+
out.add(_("Columns: %s\n") % @column_label) unless @column_label.nil?
|
93
95
|
|
94
|
-
t=Statsample::ReportTable.new([""]+cols_names+["Total"])
|
96
|
+
t=Statsample::ReportTable.new([""]+cols_names+[_("Total")])
|
95
97
|
rn.each{|row|
|
96
98
|
total_row=0
|
97
99
|
t_row=[@v_rows.labeling(row)]
|
@@ -106,7 +108,7 @@ module Statsample
|
|
106
108
|
t.add_row(t_row)
|
107
109
|
}
|
108
110
|
t.add_horizontal_line
|
109
|
-
t_row=["Total"]
|
111
|
+
t_row=[_("Total")]
|
110
112
|
cn.each{|v|
|
111
113
|
t_row.push(total_cols[v])
|
112
114
|
}
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module Statsample
|
2
2
|
class DominanceAnalysis
|
3
3
|
class Bootstrap
|
4
|
+
include GetText
|
4
5
|
include Writable
|
6
|
+
bindtextdomain("statsample")
|
5
7
|
attr_reader :samples_td,:samples_cd,:samples_gd,:samples_ga, :fields
|
6
8
|
attr_writer :lr_class
|
7
9
|
attr_accessor :ds
|
@@ -24,10 +26,10 @@ class DominanceAnalysis
|
|
24
26
|
end
|
25
27
|
@da
|
26
28
|
end
|
27
|
-
def bootstrap(number_samples,n=nil)
|
29
|
+
def bootstrap(number_samples,n=nil,report=false)
|
28
30
|
number_samples.times{ |t|
|
29
31
|
@n_samples+=1
|
30
|
-
puts "Bootstrap
|
32
|
+
puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
|
31
33
|
ds_boot=@ds.bootstrap(n)
|
32
34
|
da_1=DominanceAnalysis.new(ds_boot,@y_var,@lr_class)
|
33
35
|
da_1.total_dominance.each{|k,v|
|
@@ -66,15 +68,15 @@ class DominanceAnalysis
|
|
66
68
|
alfa=0.95
|
67
69
|
t=GSL::Cdf.tdist_Pinv(1-((1-alfa) / 2),@n_samples - 1)
|
68
70
|
out.extend report_type
|
69
|
-
out.add "Summary for Bootstrap Dominance Analysis of "
|
70
|
-
out.add "
|
71
|
+
out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
|
72
|
+
out.add _("Sample size: %d\n") % @n_samples
|
71
73
|
out.add "t:#{t}\n"
|
72
74
|
out.add "Linear Regression Engine: #{@lr_class.name}"
|
73
75
|
out.nl
|
74
76
|
table=ReportTable.new
|
75
|
-
header=["pairs","sD","Dij","SE(Dij)","Pij","Pji","Pno","
|
77
|
+
header=[_("pairs"),"sD","Dij",_("SE(Dij)"),"Pij","Pji","Pno",_("Reproducibility")]
|
76
78
|
table.header=header
|
77
|
-
table.add_row(["Complete dominance"])
|
79
|
+
table.add_row([_("Complete dominance")])
|
78
80
|
table.add_horizontal_line
|
79
81
|
@pairs.each{|pair|
|
80
82
|
std=@samples_td[pair].to_vector(:scale)
|
@@ -82,7 +84,7 @@ class DominanceAnalysis
|
|
82
84
|
table.add_row(summary_pairs(pair,std,ttd))
|
83
85
|
}
|
84
86
|
table.add_horizontal_line
|
85
|
-
table.add_row(["Conditional dominance"])
|
87
|
+
table.add_row([_("Conditional dominance")])
|
86
88
|
table.add_horizontal_line
|
87
89
|
@pairs.each{|pair|
|
88
90
|
std=@samples_cd[pair].to_vector(:scale)
|
@@ -91,7 +93,7 @@ class DominanceAnalysis
|
|
91
93
|
|
92
94
|
}
|
93
95
|
table.add_horizontal_line
|
94
|
-
table.add_row(["General Dominance"])
|
96
|
+
table.add_row([_("General Dominance")])
|
95
97
|
table.add_horizontal_line
|
96
98
|
@pairs.each{|pair|
|
97
99
|
std=@samples_gd[pair].to_vector(:scale)
|
@@ -99,9 +101,9 @@ class DominanceAnalysis
|
|
99
101
|
table.add_row(summary_pairs(pair,std,ttd))
|
100
102
|
}
|
101
103
|
out.parse_table(table)
|
102
|
-
out.add("General averages")
|
104
|
+
out.add(_("General averages"))
|
103
105
|
table=Statsample::ReportTable.new
|
104
|
-
table.header=["var","mean","se","p.5","p.95"]
|
106
|
+
table.header=[_("var"),_("mean"),_("se"),_("p.5"),_("p.95")]
|
105
107
|
@fields.each{|f|
|
106
108
|
v=@samples_ga[f].to_vector(:scale)
|
107
109
|
row=[@ds.vector_label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
3
|
class DominanceAnalysis
|
4
|
+
include GetText
|
5
|
+
bindtextdomain("statsample")
|
4
6
|
def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
|
5
7
|
@y_var=y_var
|
6
8
|
@dy=ds[@y_var]
|
@@ -164,10 +166,10 @@ module Statsample
|
|
164
166
|
def summary(report_type=ConsoleSummary)
|
165
167
|
out=""
|
166
168
|
out.extend report_type
|
167
|
-
out << "Summary for Dominance Analysis of "
|
169
|
+
out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
|
168
170
|
t=Statsample::ReportTable.new
|
169
171
|
t.header=["","r2","sign"]+@fields
|
170
|
-
row=["Model 0","",""]+@fields.collect{|f|
|
172
|
+
row=[_("Model 0"),"",""]+@fields.collect{|f|
|
171
173
|
sprintf("%0.3f",md(f).r2)
|
172
174
|
}
|
173
175
|
t.add_row(row)
|
@@ -181,7 +183,7 @@ module Statsample
|
|
181
183
|
a=average_k(i)
|
182
184
|
if !a.nil?
|
183
185
|
t.add_horizontal_line
|
184
|
-
row=["k
|
186
|
+
row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
|
185
187
|
sprintf("%0.3f",a[f])
|
186
188
|
}
|
187
189
|
t.add_row(row)
|
@@ -194,18 +196,18 @@ module Statsample
|
|
194
196
|
g=general_averages
|
195
197
|
t.add_horizontal_line
|
196
198
|
|
197
|
-
row=["Overall averages","",""]+@fields.collect{|f|
|
199
|
+
row=[_("Overall averages"),"",""]+@fields.collect{|f|
|
198
200
|
sprintf("%0.3f",g[f])
|
199
201
|
}
|
200
202
|
t.add_row(row)
|
201
203
|
out.parse_table(t)
|
202
204
|
|
203
205
|
out.nl
|
204
|
-
out << "Pairwise\n"
|
206
|
+
out << _("Pairwise")+"\n"
|
205
207
|
td=total_dominance
|
206
208
|
cd=conditional_dominance
|
207
209
|
gd=general_dominance
|
208
|
-
t=Statsample::ReportTable.new(["Pairs","T","C","G"])
|
210
|
+
t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
|
209
211
|
pairs.each{|p|
|
210
212
|
name=p.join(" - ")
|
211
213
|
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
@@ -4,6 +4,8 @@ module Regression
|
|
4
4
|
module Multiple
|
5
5
|
# Class for Multiple Regression Analysis
|
6
6
|
# Requires Alglib gem and uses a listwise aproach.
|
7
|
+
# Faster than GslEngine on massive prediction use, because process is c-based.
|
8
|
+
# Prefer GslEngine if you need good memory use.
|
7
9
|
# If you need pairwise, use RubyEngine
|
8
10
|
# Example:
|
9
11
|
#
|
@@ -4,6 +4,8 @@ module Regression
|
|
4
4
|
module Multiple
|
5
5
|
# Class for Multiple Regression Analysis
|
6
6
|
# Requires rbgsl and uses a listwise aproach.
|
7
|
+
# Slower on prediction of values than Alglib, because predict is ruby based.
|
8
|
+
# Better memory management on multiple (+1000) series of regression.
|
7
9
|
# If you need pairwise, use RubyEngine
|
8
10
|
# Example:
|
9
11
|
#
|
@@ -42,9 +44,11 @@ class GslEngine < BaseEngine
|
|
42
44
|
}
|
43
45
|
@dep_columns=columns.dup
|
44
46
|
@lr_s=nil
|
45
|
-
|
46
|
-
@constant
|
47
|
-
@
|
47
|
+
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
48
|
+
@constant=c[constant_col]
|
49
|
+
@coeffs_a=c.to_a.slice(0...constant_col)
|
50
|
+
@coeffs=assign_names(@coeffs_a)
|
51
|
+
c=nil
|
48
52
|
end
|
49
53
|
|
50
54
|
def _dump(i)
|
@@ -56,7 +60,7 @@ class GslEngine < BaseEngine
|
|
56
60
|
end
|
57
61
|
|
58
62
|
def coeffs
|
59
|
-
|
63
|
+
@coeffs
|
60
64
|
end
|
61
65
|
# Coefficients using a constant
|
62
66
|
# Based on http://www.xycoon.com/ols1.htm
|
@@ -2,7 +2,8 @@ module Statsample
|
|
2
2
|
module Regression
|
3
3
|
module Multiple
|
4
4
|
# Pure Ruby Class for Multiple Regression Analysis.
|
5
|
-
# Slower than AlglibEngine, but is pure ruby and
|
5
|
+
# Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values.
|
6
|
+
# Coeffient calculation uses correlation matrix between the vectors
|
6
7
|
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
|
7
8
|
#
|
8
9
|
# Example:
|
@@ -15,14 +15,14 @@ module Regression
|
|
15
15
|
|
16
16
|
|
17
17
|
module Multiple
|
18
|
-
# Creates an object for listwise regression.
|
19
|
-
#
|
18
|
+
# Creates an object for listwise regression.
|
19
|
+
# Alglib is faster, so is prefered over GSL
|
20
20
|
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
21
21
|
def self.listwise(ds,y_var)
|
22
|
-
if
|
23
|
-
GslEngine.new(ds,y_var)
|
24
|
-
elsif HAS_ALGIB
|
22
|
+
if HAS_ALGIB
|
25
23
|
AlglibEngine.new(ds,y_var)
|
24
|
+
elsif HAS_GSL
|
25
|
+
GslEngine.new(ds,y_var)
|
26
26
|
else
|
27
27
|
ds2=ds.dup_only_valid
|
28
28
|
RubyEngine.new(ds2,y_var)
|
@@ -132,14 +132,7 @@ module Multiple
|
|
132
132
|
ds.each{|k,v|
|
133
133
|
ds[k]=v.to_vector(:scale)
|
134
134
|
}
|
135
|
-
|
136
|
-
lr_class=AlglibEngine
|
137
|
-
ds=ds.to_dataset
|
138
|
-
else
|
139
|
-
lr_class=RubyEngine
|
140
|
-
ds=ds.to_dataset.dup_only_valid
|
141
|
-
end
|
142
|
-
lr=lr_class.new(ds,var)
|
135
|
+
lr=Multiple.listwise(ds.to_dataset,var)
|
143
136
|
1-lr.r2
|
144
137
|
end
|
145
138
|
# Tolerances for each coefficient
|
data/lib/statsample/vector.rb
CHANGED
@@ -393,6 +393,14 @@ class Vector < DelegateClass(Array)
|
|
393
393
|
return "INTEGER"
|
394
394
|
end
|
395
395
|
end
|
396
|
+
# Return true if all data is Numeric or nil
|
397
|
+
def can_be_scale?
|
398
|
+
if @data.find {|v| !v.nil? and !v.is_a? Numeric}
|
399
|
+
false
|
400
|
+
else
|
401
|
+
true
|
402
|
+
end
|
403
|
+
end
|
396
404
|
def summary(out="")
|
397
405
|
@delegate.summary(@labels,out)
|
398
406
|
end
|
data/lib/statsample.rb
CHANGED
@@ -29,19 +29,43 @@ class Numeric
|
|
29
29
|
def square ; self * self ; end
|
30
30
|
end
|
31
31
|
|
32
|
+
class String
|
33
|
+
def is_number?
|
34
|
+
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
35
|
+
true
|
36
|
+
else
|
37
|
+
false
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
32
41
|
|
33
42
|
def create_test(*args,&proc)
|
34
43
|
description=args.shift
|
35
44
|
fields=args
|
36
45
|
[description, fields, Proc.new]
|
37
46
|
end
|
38
|
-
|
47
|
+
# Test extensions
|
39
48
|
begin
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
49
|
+
require 'gettext'
|
50
|
+
rescue LoadError
|
51
|
+
def bindtextdomain(d)
|
52
|
+
d
|
53
|
+
end
|
54
|
+
|
55
|
+
# Bored module
|
56
|
+
module GetText
|
57
|
+
def _(t)
|
58
|
+
t
|
59
|
+
end
|
60
|
+
end
|
44
61
|
end
|
62
|
+
|
63
|
+
begin
|
64
|
+
require 'rbgsl'
|
65
|
+
HAS_GSL=true
|
66
|
+
rescue LoadError
|
67
|
+
HAS_GSL=false
|
68
|
+
end
|
45
69
|
begin
|
46
70
|
require 'alglib'
|
47
71
|
HAS_ALGIB=true
|
@@ -66,7 +90,7 @@ end
|
|
66
90
|
# * Dataset: An union of vectors.
|
67
91
|
#
|
68
92
|
module Statsample
|
69
|
-
VERSION = '0.3.
|
93
|
+
VERSION = '0.3.3'
|
70
94
|
SPLIT_TOKEN = ","
|
71
95
|
autoload(:Database, 'statsample/converters')
|
72
96
|
autoload(:Anova, 'statsample/anova')
|
@@ -82,6 +106,10 @@ module Statsample
|
|
82
106
|
autoload(:Reliability, 'statsample/reliability')
|
83
107
|
autoload(:Bivariate, 'statsample/bivariate')
|
84
108
|
autoload(:Multivariate, 'statsample/multivariate')
|
109
|
+
autoload(:Multiset, 'statsample/multiset')
|
110
|
+
autoload(:StratifiedSample, 'statsample/multiset')
|
111
|
+
|
112
|
+
|
85
113
|
autoload(:Regression, 'statsample/regression')
|
86
114
|
autoload(:Test, 'statsample/test')
|
87
115
|
def self.load(filename)
|
data/po/es/statsample.po
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
msgid ""
|
2
|
+
msgstr ""
|
3
|
+
"Project-Id-Version: statsample 0.3.3\n"
|
4
|
+
"POT-Creation-Date: 2009-08-10 11:59-0400\n"
|
5
|
+
"PO-Revision-Date: 2009-08-04 15:57-0300\n"
|
6
|
+
"Last-Translator: Claudio Bustos <clbustos@gmail.com>\n"
|
7
|
+
"Language-Team: Desarrollador\n"
|
8
|
+
"MIME-Version: 1.0\n"
|
9
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
10
|
+
"Content-Transfer-Encoding: 8bit\n"
|
11
|
+
"X-Poedit-Language: Spanish\n"
|
12
|
+
"X-Poedit-SourceCharset: utf-8\n"
|
13
|
+
|
14
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:32
|
15
|
+
msgid "Bootstrap %d of %d"
|
16
|
+
msgstr "Bootstrap: %d de %d"
|
17
|
+
|
18
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:71
|
19
|
+
msgid "Summary for Bootstrap Dominance Analysis of %s on %s\n"
|
20
|
+
msgstr "Resultados del Análisis de Dominancia Bootstrap de %s en %s\n"
|
21
|
+
|
22
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:72
|
23
|
+
msgid "Sample size: %d\n"
|
24
|
+
msgstr "Tamaño de muestra: %d\n"
|
25
|
+
|
26
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
27
|
+
msgid "pairs"
|
28
|
+
msgstr "pares"
|
29
|
+
|
30
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
31
|
+
msgid "SE(Dij)"
|
32
|
+
msgstr "EE(Dij)"
|
33
|
+
|
34
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
35
|
+
msgid "Reproducibility"
|
36
|
+
msgstr "Reproducibilidad"
|
37
|
+
|
38
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:79
|
39
|
+
msgid "Complete dominance"
|
40
|
+
msgstr "Dominancia Completa"
|
41
|
+
|
42
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:87
|
43
|
+
msgid "Conditional dominance"
|
44
|
+
msgstr "Dominancia Condicional"
|
45
|
+
|
46
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:96
|
47
|
+
msgid "General Dominance"
|
48
|
+
msgstr "Dominancia General"
|
49
|
+
|
50
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:104
|
51
|
+
msgid "General averages"
|
52
|
+
msgstr "Promedios generales"
|
53
|
+
|
54
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
55
|
+
msgid "var"
|
56
|
+
msgstr "var"
|
57
|
+
|
58
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
59
|
+
msgid "mean"
|
60
|
+
msgstr "promedio"
|
61
|
+
|
62
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
63
|
+
msgid "se"
|
64
|
+
msgstr "de"
|
65
|
+
|
66
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
67
|
+
msgid "p.5"
|
68
|
+
msgstr "p.5"
|
69
|
+
|
70
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
71
|
+
msgid "p.95"
|
72
|
+
msgstr "p.95"
|
73
|
+
|
74
|
+
#: lib/statsample/crosstab.rb:93
|
75
|
+
msgid "Rows: %s\n"
|
76
|
+
msgstr ""
|
77
|
+
|
78
|
+
#: lib/statsample/crosstab.rb:94
|
79
|
+
msgid "Columns: %s\n"
|
80
|
+
msgstr ""
|
81
|
+
|
82
|
+
#: lib/statsample/crosstab.rb:96 lib/statsample/crosstab.rb:111
|
83
|
+
msgid "Total"
|
84
|
+
msgstr ""
|
85
|
+
|
86
|
+
#: lib/statsample/dominanceanalysis.rb:169
|
87
|
+
msgid "Summary for Dominance Analysis of %s on %s\n"
|
88
|
+
msgstr "Resultado del Análisis de Dominancia de %s en %s\n"
|
89
|
+
|
90
|
+
#: lib/statsample/dominanceanalysis.rb:172
|
91
|
+
msgid "Model 0"
|
92
|
+
msgstr "Modelo 0"
|
93
|
+
|
94
|
+
#: lib/statsample/dominanceanalysis.rb:186
|
95
|
+
msgid "k=%d Average"
|
96
|
+
msgstr "k=%d Promedio"
|
97
|
+
|
98
|
+
#: lib/statsample/dominanceanalysis.rb:199
|
99
|
+
msgid "Overall averages"
|
100
|
+
msgstr "Promedios generales"
|
101
|
+
|
102
|
+
#: lib/statsample/dominanceanalysis.rb:206
|
103
|
+
msgid "Pairwise"
|
104
|
+
msgstr "De a pares"
|
105
|
+
|
106
|
+
#: lib/statsample/dominanceanalysis.rb:210
|
107
|
+
msgid "Pairs"
|
108
|
+
msgstr "Pares"
|
data/po/statsample.pot
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# Statsample po template.
|
2
|
+
# Copyright (C) 2009-2009 Claudio Bustos
|
3
|
+
# This file is distributed under the same license as the Statsample package.
|
4
|
+
# Claudio Bustos <clbustos_AT_gmail.com>
|
5
|
+
#
|
6
|
+
#, fuzzy
|
7
|
+
msgid ""
|
8
|
+
msgstr ""
|
9
|
+
"Project-Id-Version: statsample 0.3.3\n"
|
10
|
+
"POT-Creation-Date: 2009-08-10 11:59-0400\n"
|
11
|
+
"PO-Revision-Date: 2009-08-04 15:36-0400\n"
|
12
|
+
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
13
|
+
"Language-Team: LANGUAGE <LL@li.org>\n"
|
14
|
+
"MIME-Version: 1.0\n"
|
15
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
16
|
+
"Content-Transfer-Encoding: 8bit\n"
|
17
|
+
"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"
|
18
|
+
|
19
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:32
|
20
|
+
msgid "Bootstrap %d of %d"
|
21
|
+
msgstr ""
|
22
|
+
|
23
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:71
|
24
|
+
msgid "Summary for Bootstrap Dominance Analysis of %s on %s\n"
|
25
|
+
msgstr ""
|
26
|
+
|
27
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:72
|
28
|
+
msgid "Sample size: %d\n"
|
29
|
+
msgstr ""
|
30
|
+
|
31
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
32
|
+
msgid "pairs"
|
33
|
+
msgstr ""
|
34
|
+
|
35
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
36
|
+
msgid "SE(Dij)"
|
37
|
+
msgstr ""
|
38
|
+
|
39
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:77
|
40
|
+
msgid "Reproducibility"
|
41
|
+
msgstr ""
|
42
|
+
|
43
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:79
|
44
|
+
msgid "Complete dominance"
|
45
|
+
msgstr ""
|
46
|
+
|
47
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:87
|
48
|
+
msgid "Conditional dominance"
|
49
|
+
msgstr ""
|
50
|
+
|
51
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:96
|
52
|
+
msgid "General Dominance"
|
53
|
+
msgstr ""
|
54
|
+
|
55
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:104
|
56
|
+
msgid "General averages"
|
57
|
+
msgstr ""
|
58
|
+
|
59
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
60
|
+
msgid "var"
|
61
|
+
msgstr ""
|
62
|
+
|
63
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
64
|
+
msgid "mean"
|
65
|
+
msgstr ""
|
66
|
+
|
67
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
68
|
+
msgid "se"
|
69
|
+
msgstr ""
|
70
|
+
|
71
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
72
|
+
msgid "p.5"
|
73
|
+
msgstr ""
|
74
|
+
|
75
|
+
#: lib/statsample/dominanceanalysis/bootstrap.rb:106
|
76
|
+
msgid "p.95"
|
77
|
+
msgstr ""
|
78
|
+
|
79
|
+
#: lib/statsample/crosstab.rb:93
|
80
|
+
msgid "Rows: %s\n"
|
81
|
+
msgstr ""
|
82
|
+
|
83
|
+
#: lib/statsample/crosstab.rb:94
|
84
|
+
msgid "Columns: %s\n"
|
85
|
+
msgstr ""
|
86
|
+
|
87
|
+
#: lib/statsample/crosstab.rb:96 lib/statsample/crosstab.rb:111
|
88
|
+
msgid "Total"
|
89
|
+
msgstr ""
|
90
|
+
|
91
|
+
#: lib/statsample/dominanceanalysis.rb:169
|
92
|
+
msgid "Summary for Dominance Analysis of %s on %s\n"
|
93
|
+
msgstr ""
|
94
|
+
|
95
|
+
#: lib/statsample/dominanceanalysis.rb:172
|
96
|
+
msgid "Model 0"
|
97
|
+
msgstr ""
|
98
|
+
|
99
|
+
#: lib/statsample/dominanceanalysis.rb:186
|
100
|
+
msgid "k=%d Average"
|
101
|
+
msgstr ""
|
102
|
+
|
103
|
+
#: lib/statsample/dominanceanalysis.rb:199
|
104
|
+
msgid "Overall averages"
|
105
|
+
msgstr ""
|
106
|
+
|
107
|
+
#: lib/statsample/dominanceanalysis.rb:206
|
108
|
+
msgid "Pairwise"
|
109
|
+
msgstr ""
|
110
|
+
|
111
|
+
#: lib/statsample/dominanceanalysis.rb:210
|
112
|
+
msgid "Pairs"
|
113
|
+
msgstr ""
|
data/test/test_anova.rb
CHANGED
data/test/test_codification.rb
CHANGED
data/test/test_crosstab.rb
CHANGED
data/test/test_csv.csv
CHANGED
data/test/test_csv.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
2
3
|
require 'tmpdir'
|
3
4
|
require 'test/unit'
|
4
5
|
|
@@ -8,8 +9,20 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
|
|
8
9
|
super
|
9
10
|
end
|
10
11
|
def test_read
|
11
|
-
|
12
|
+
assert_equal(6,@ds.cases)
|
12
13
|
assert_equal(%w{id name age city a1},@ds.fields)
|
14
|
+
id=[1,2,3,4,5,6].to_vector(:scale)
|
15
|
+
name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
|
16
|
+
age=[20,23,25,27,5.5,nil].to_vector(:scale)
|
17
|
+
city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
|
18
|
+
a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
|
19
|
+
ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
|
20
|
+
ds_exp.fields.each{|f|
|
21
|
+
assert_equal(ds_exp[f],@ds[f])
|
22
|
+
}
|
23
|
+
assert_equal(ds_exp,@ds)
|
24
|
+
|
25
|
+
|
13
26
|
end
|
14
27
|
def test_nil
|
15
28
|
assert_equal(nil,@ds['age'][5])
|
data/test/test_dataset.rb
CHANGED
data/test/test_ggobi.rb
CHANGED
data/test/test_multiset.rb
CHANGED
data/test/test_regression.rb
CHANGED
data/test/test_reliability.rb
CHANGED
data/test/test_resample.rb
CHANGED
data/test/test_srs.rb
CHANGED
data/test/test_statistics.rb
CHANGED
@@ -1,10 +1,23 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
2
3
|
require 'test/unit'
|
3
4
|
class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
4
5
|
|
5
6
|
def initialize(*args)
|
6
7
|
super
|
7
8
|
end
|
9
|
+
def test_is_number
|
10
|
+
assert("10".is_number?)
|
11
|
+
assert("-10".is_number?)
|
12
|
+
assert("0.1".is_number?)
|
13
|
+
assert("-0.1".is_number?)
|
14
|
+
assert("10e3".is_number?)
|
15
|
+
assert("10e-3".is_number?)
|
16
|
+
assert(!"1212-1212-1".is_number?)
|
17
|
+
assert(!"a10".is_number?)
|
18
|
+
assert(!"".is_number?)
|
19
|
+
|
20
|
+
end
|
8
21
|
def test_chi_square
|
9
22
|
assert_raise TypeError do
|
10
23
|
Statsample::Test.chi_square(1,1)
|
data/test/test_stratified.rb
CHANGED
data/test/test_svg_graph.rb
CHANGED
data/test/test_vector.rb
CHANGED
data/test/test_xls.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
2
3
|
require 'test/unit'
|
3
4
|
require 'tmpdir'
|
4
5
|
begin
|
@@ -15,6 +16,17 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
|
|
15
16
|
def test_read
|
16
17
|
assert_equal(6,@ds.cases)
|
17
18
|
assert_equal(%w{id name age city a1},@ds.fields)
|
19
|
+
id=[1,2,3,4,5,6].to_vector(:scale)
|
20
|
+
name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
|
21
|
+
age=[20,23,25,nil,5.5,nil].to_vector(:scale)
|
22
|
+
city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
|
23
|
+
a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
|
24
|
+
ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
|
25
|
+
ds_exp.fields.each{|f|
|
26
|
+
assert_equal(ds_exp[f],@ds[f])
|
27
|
+
}
|
28
|
+
assert_equal(ds_exp,@ds)
|
29
|
+
|
18
30
|
end
|
19
31
|
def test_nil
|
20
32
|
assert_equal(nil,@ds['age'][5])
|
data/test/test_xls.xls
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-11 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.0.0
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: hoe
|
@@ -40,9 +40,9 @@ dependencies:
|
|
40
40
|
requirements:
|
41
41
|
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 2.3.
|
43
|
+
version: 2.3.3
|
44
44
|
version:
|
45
|
-
description:
|
45
|
+
description: A suite for your basic and advanced statistics needs. Descriptive statistics, multiple regression, dominance analysis, scale's reliability analysis, bivariate statistics and others procedures.
|
46
46
|
email:
|
47
47
|
- clbustos@gmail.com
|
48
48
|
executables:
|
@@ -60,6 +60,7 @@ files:
|
|
60
60
|
- Manifest.txt
|
61
61
|
- README.txt
|
62
62
|
- bin/statsample
|
63
|
+
- data/locale/es/LC_MESSAGES/statsample.mo
|
63
64
|
- demo/benchmark.rb
|
64
65
|
- demo/chi-square.rb
|
65
66
|
- demo/crosstab.rb
|
@@ -104,6 +105,8 @@ files:
|
|
104
105
|
- lib/statsample/srs.rb
|
105
106
|
- lib/statsample/test.rb
|
106
107
|
- lib/statsample/vector.rb
|
108
|
+
- po/es/statsample.po
|
109
|
+
- po/statsample.pot
|
107
110
|
- setup.rb
|
108
111
|
- test/_test_chart.rb
|
109
112
|
- test/test_anova.rb
|
@@ -125,7 +128,7 @@ files:
|
|
125
128
|
- test/test_xls.rb
|
126
129
|
- test/test_xls.xls
|
127
130
|
has_rdoc: true
|
128
|
-
homepage: http://rubyforge.org/
|
131
|
+
homepage: http://ruby-statsample.rubyforge.org/
|
129
132
|
licenses: []
|
130
133
|
|
131
134
|
post_install_message:
|
@@ -152,7 +155,7 @@ rubyforge_project: ruby-statsample
|
|
152
155
|
rubygems_version: 1.3.5
|
153
156
|
signing_key:
|
154
157
|
specification_version: 3
|
155
|
-
summary:
|
158
|
+
summary: A suite for your basic and advanced statistics needs
|
156
159
|
test_files:
|
157
160
|
- test/test_anova.rb
|
158
161
|
- test/test_codification.rb
|