statsample 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +3 -1
- data/lib/statsample.rb +175 -179
- data/lib/statsample/codification.rb +1 -1
- data/lib/statsample/converter/csv18.rb +56 -0
- data/lib/statsample/converter/csv19.rb +60 -0
- data/lib/statsample/converters.rb +26 -75
- data/lib/statsample/dataset.rb +38 -29
- data/lib/statsample/dominanceanalysis.rb +6 -6
- data/lib/statsample/graph/gdchart.rb +2 -1
- data/lib/statsample/graph/svggraph.rb +10 -9
- data/lib/statsample/multiset.rb +3 -3
- data/lib/statsample/regression/multiple.rb +43 -271
- data/lib/statsample/regression/multiple/baseengine.rb +235 -0
- data/lib/statsample/regression/multiple/gslengine.rb +2 -2
- data/lib/statsample/vector.rb +754 -736
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +22 -3
- data/test/test_distribution.rb +4 -3
- data/test/test_ggobi.rb +2 -2
- data/test/test_regression.rb +11 -2
- data/test/test_svg_graph.rb +0 -1
- data/test/test_vector.rb +50 -5
- data/test/test_xls.rb +2 -4
- metadata +5 -3
- data/test/_test_chart.rb +0 -58
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -34,6 +34,8 @@ lib/statsample/anova.rb
|
|
34
34
|
lib/statsample/bivariate.rb
|
35
35
|
lib/statsample/codification.rb
|
36
36
|
lib/statsample/combination.rb
|
37
|
+
lib/statsample/converter/csv18.rb
|
38
|
+
lib/statsample/converter/csv19.rb
|
37
39
|
lib/statsample/converters.rb
|
38
40
|
lib/statsample/crosstab.rb
|
39
41
|
lib/statsample/dataset.rb
|
@@ -56,6 +58,7 @@ lib/statsample/regression/binomial/logit.rb
|
|
56
58
|
lib/statsample/regression/binomial/probit.rb
|
57
59
|
lib/statsample/regression/multiple.rb
|
58
60
|
lib/statsample/regression/multiple/alglibengine.rb
|
61
|
+
lib/statsample/regression/multiple/baseengine.rb
|
59
62
|
lib/statsample/regression/multiple/gslengine.rb
|
60
63
|
lib/statsample/regression/multiple/rubyengine.rb
|
61
64
|
lib/statsample/regression/simple.rb
|
@@ -67,7 +70,6 @@ lib/statsample/vector.rb
|
|
67
70
|
po/es/statsample.po
|
68
71
|
po/statsample.pot
|
69
72
|
setup.rb
|
70
|
-
test/_test_chart.rb
|
71
73
|
test/test_anova.rb
|
72
74
|
test/test_codification.rb
|
73
75
|
test/test_combination.rb
|
data/lib/statsample.rb
CHANGED
@@ -29,49 +29,49 @@ class Numeric
|
|
29
29
|
end
|
30
30
|
|
31
31
|
class String
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
32
|
+
def is_number?
|
33
|
+
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
34
|
+
true
|
35
|
+
else
|
36
|
+
false
|
38
37
|
end
|
38
|
+
end
|
39
39
|
end
|
40
40
|
|
41
41
|
def create_test(*args,&proc)
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
description=args.shift
|
43
|
+
fields=args
|
44
|
+
[description, fields, Proc.new]
|
45
45
|
end
|
46
46
|
#--
|
47
47
|
# Test extensions
|
48
48
|
begin
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
49
|
+
require 'gettext'
|
50
|
+
rescue LoadError
|
51
|
+
def bindtextdomain(d) #:nodoc:
|
52
|
+
d
|
53
|
+
end
|
54
|
+
|
55
|
+
# Bored module
|
56
|
+
module GetText #:nodoc:
|
57
|
+
def _(t)
|
58
|
+
t
|
53
59
|
end
|
54
|
-
|
55
|
-
# Bored module
|
56
|
-
module GetText #:nodoc:
|
57
|
-
def _(t)
|
58
|
-
t
|
59
|
-
end
|
60
|
-
end
|
60
|
+
end
|
61
61
|
end
|
62
62
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
63
|
+
begin
|
64
|
+
require 'rbgsl'
|
65
|
+
HAS_GSL=true
|
66
|
+
rescue LoadError
|
67
|
+
HAS_GSL=false
|
68
|
+
end
|
69
|
+
begin
|
70
|
+
require 'alglib'
|
71
|
+
HAS_ALGIB=true
|
72
|
+
rescue LoadError
|
73
|
+
HAS_ALGIB=false
|
74
|
+
end
|
75
75
|
# ++
|
76
76
|
# Modules for statistical analysis
|
77
77
|
# See first:
|
@@ -80,77 +80,74 @@ end
|
|
80
80
|
# * Dataset: An union of vectors.
|
81
81
|
#
|
82
82
|
module Statsample
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
o
|
114
|
-
else
|
115
|
-
false
|
116
|
-
end
|
83
|
+
VERSION = '0.5.0'
|
84
|
+
SPLIT_TOKEN = ","
|
85
|
+
autoload(:Database, 'statsample/converters')
|
86
|
+
autoload(:Anova, 'statsample/anova')
|
87
|
+
autoload(:Combination, 'statsample/combination')
|
88
|
+
autoload(:CSV, 'statsample/converters')
|
89
|
+
autoload(:PlainText, 'statsample/converters')
|
90
|
+
autoload(:Excel, 'statsample/converters')
|
91
|
+
autoload(:GGobi, 'statsample/converters')
|
92
|
+
autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
|
93
|
+
autoload(:HtmlReport, 'statsample/htmlreport')
|
94
|
+
autoload(:Mx, 'statsample/converters')
|
95
|
+
autoload(:Resample, 'statsample/resample')
|
96
|
+
autoload(:SRS, 'statsample/srs')
|
97
|
+
autoload(:Codification, 'statsample/codification')
|
98
|
+
autoload(:Reliability, 'statsample/reliability')
|
99
|
+
autoload(:Bivariate, 'statsample/bivariate')
|
100
|
+
autoload(:Multivariate, 'statsample/multivariate')
|
101
|
+
autoload(:Multiset, 'statsample/multiset')
|
102
|
+
autoload(:StratifiedSample, 'statsample/multiset')
|
103
|
+
autoload(:MLE, 'statsample/mle')
|
104
|
+
autoload(:Regression, 'statsample/regression')
|
105
|
+
autoload(:Test, 'statsample/test')
|
106
|
+
def self.load(filename)
|
107
|
+
if File.exists? filename
|
108
|
+
o=false
|
109
|
+
File.open(filename,"r") {|fp| o=Marshal.load(fp) }
|
110
|
+
o
|
111
|
+
else
|
112
|
+
false
|
117
113
|
end
|
114
|
+
end
|
118
115
|
|
119
116
|
module Util
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
131
|
-
end
|
132
|
-
module Writable
|
133
|
-
def save(filename)
|
134
|
-
fp=File.open(filename,"w")
|
135
|
-
Marshal.dump(self,fp)
|
136
|
-
fp.close
|
137
|
-
end
|
117
|
+
# Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
|
118
|
+
def normal_order_statistic_medians(i,n)
|
119
|
+
if i==1
|
120
|
+
u= 1.0 - normal_order_statistic_medians(n,n)
|
121
|
+
elsif i==n
|
122
|
+
u=0.5**(1 / n.to_f)
|
123
|
+
else
|
124
|
+
u= (i - 0.3175) / (n + 0.365)
|
125
|
+
end
|
126
|
+
u
|
138
127
|
end
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
128
|
+
end
|
129
|
+
module Writable
|
130
|
+
def save(filename)
|
131
|
+
fp=File.open(filename,"w")
|
132
|
+
Marshal.dump(self,fp)
|
133
|
+
fp.close
|
134
|
+
end
|
135
|
+
end
|
136
|
+
module HtmlSummary
|
137
|
+
def add_line(n=nil)
|
138
|
+
self << "<hr />"
|
139
|
+
end
|
140
|
+
def nl
|
141
|
+
self << "<br />"
|
142
|
+
end
|
143
|
+
def add(text)
|
144
|
+
self << ("<p>"+text.gsub("\n","<br />")+"</p>")
|
145
|
+
end
|
146
|
+
def parse_table(table)
|
147
|
+
self << table.parse_html
|
148
|
+
end
|
149
|
+
end
|
150
|
+
module ConsoleSummary
|
154
151
|
def add_line(n=80)
|
155
152
|
self << "-"*n+"\n"
|
156
153
|
end
|
@@ -163,98 +160,97 @@ module Statsample
|
|
163
160
|
def parse_table(table)
|
164
161
|
self << table.parse_console
|
165
162
|
end
|
163
|
+
end
|
164
|
+
class ReportTable
|
165
|
+
attr_reader :header
|
166
|
+
def initialize(h=[])
|
167
|
+
@rows=[]
|
168
|
+
@max_cols=[]
|
169
|
+
self.header=(h)
|
166
170
|
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
@
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
out << parse_console_row(
|
209
|
-
out << parse_console_hr
|
210
|
-
|
211
|
-
@rows.each{|row|
|
212
|
-
if row==:hr
|
213
|
-
out << parse_console_hr
|
214
|
-
else
|
215
|
-
out << parse_console_row(row)
|
216
|
-
end
|
217
|
-
}
|
218
|
-
out << parse_console_hr
|
219
|
-
|
220
|
-
out
|
221
|
-
end
|
222
|
-
def parse_html
|
223
|
-
out="<table>\n"
|
224
|
-
if header.size>0
|
225
|
-
out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
|
171
|
+
def add_row(row)
|
172
|
+
row.each_index{|i|
|
173
|
+
@max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
|
174
|
+
}
|
175
|
+
@rows.push(row)
|
176
|
+
end
|
177
|
+
def add_horizontal_line
|
178
|
+
@rows.push(:hr)
|
179
|
+
end
|
180
|
+
def header=(h)
|
181
|
+
h.each_index{|i|
|
182
|
+
@max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
|
183
|
+
}
|
184
|
+
@header=h
|
185
|
+
end
|
186
|
+
def parse_console_row(row)
|
187
|
+
out="| "
|
188
|
+
@max_cols.each_index{|i|
|
189
|
+
if row[i].nil?
|
190
|
+
out << " "*(@max_cols[i]+2)+"|"
|
191
|
+
else
|
192
|
+
t=row[i].to_s
|
193
|
+
out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
|
194
|
+
end
|
195
|
+
}
|
196
|
+
out << "\n"
|
197
|
+
out
|
198
|
+
end
|
199
|
+
def parse_console_hr
|
200
|
+
"-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
|
201
|
+
end
|
202
|
+
def parse_console
|
203
|
+
out="\n"
|
204
|
+
out << parse_console_hr
|
205
|
+
out << parse_console_row(header)
|
206
|
+
out << parse_console_hr
|
207
|
+
|
208
|
+
@rows.each{|row|
|
209
|
+
if row==:hr
|
210
|
+
out << parse_console_hr
|
211
|
+
else
|
212
|
+
out << parse_console_row(row)
|
226
213
|
end
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
row_with_line=false
|
237
|
-
end
|
238
|
-
}
|
239
|
-
out << "</tbody></table>\n"
|
240
|
-
out
|
214
|
+
}
|
215
|
+
out << parse_console_hr
|
216
|
+
|
217
|
+
out
|
218
|
+
end
|
219
|
+
def parse_html
|
220
|
+
out="<table>\n"
|
221
|
+
if header.size>0
|
222
|
+
out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
|
241
223
|
end
|
224
|
+
out << "<tbody>\n"
|
225
|
+
row_with_line=false
|
226
|
+
@rows.each{|row|
|
227
|
+
if row==:hr
|
228
|
+
row_with_line=true
|
229
|
+
else
|
230
|
+
out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
|
231
|
+
out << row.join("</td><td>") +"</td>"
|
232
|
+
out << "</tr>\n"
|
233
|
+
row_with_line=false
|
234
|
+
end
|
235
|
+
}
|
236
|
+
out << "</tbody></table>\n"
|
237
|
+
out
|
242
238
|
end
|
239
|
+
end
|
243
240
|
|
244
|
-
|
245
|
-
|
246
|
-
|
241
|
+
module STATSAMPLE__ #:nodoc:
|
242
|
+
end
|
247
243
|
end
|
248
244
|
|
249
245
|
|
250
246
|
|
251
247
|
#--
|
252
248
|
begin
|
253
|
-
|
249
|
+
require 'statsamplert'
|
254
250
|
rescue LoadError
|
255
|
-
|
256
|
-
|
257
|
-
|
251
|
+
module Statsample
|
252
|
+
OPTIMIZED=false
|
253
|
+
end
|
258
254
|
end
|
259
255
|
|
260
256
|
require 'statsample/vector'
|
@@ -43,7 +43,7 @@ module Statsample
|
|
43
43
|
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
|
44
44
|
v=dataset[v_name]
|
45
45
|
split_data=v.splitted(sep)
|
46
|
-
factors=split_data.flatten.uniq.compact.sort.inject({}) {|
|
46
|
+
factors=split_data.flatten.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac}
|
47
47
|
h[v_name]=factors
|
48
48
|
h
|
49
49
|
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Statsample
|
2
|
+
class CSV < SpreadsheetBase
|
3
|
+
class << self
|
4
|
+
# Returns a Dataset based on a csv file
|
5
|
+
#
|
6
|
+
# USE:
|
7
|
+
# ds=Statsample::CSV.read("test_csv.csv")
|
8
|
+
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
|
+
require 'csv'
|
10
|
+
first_row=true
|
11
|
+
fields=[]
|
12
|
+
fields_data={}
|
13
|
+
ds=nil
|
14
|
+
line_number=0
|
15
|
+
|
16
|
+
::CSV.open(filename,'r',fs,rs) do |row|
|
17
|
+
line_number+=1
|
18
|
+
if(line_number<=ignore_lines)
|
19
|
+
#puts "Skip line"
|
20
|
+
next
|
21
|
+
end
|
22
|
+
row.collect!{|c|
|
23
|
+
c.to_s
|
24
|
+
}
|
25
|
+
if first_row
|
26
|
+
fields=extract_fields(row)
|
27
|
+
ds=Statsample::Dataset.new(fields)
|
28
|
+
first_row=false
|
29
|
+
else
|
30
|
+
rowa=process_row(row,empty)
|
31
|
+
ds.add_case(rowa,false)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
convert_to_scale(ds,fields)
|
35
|
+
ds.update_valid_data
|
36
|
+
ds
|
37
|
+
end
|
38
|
+
# Save a Dataset on a csv file
|
39
|
+
#
|
40
|
+
# USE:
|
41
|
+
# Statsample::CSV.write(ds,"test_csv.csv")
|
42
|
+
def write(dataset,filename, convert_comma=false,*opts)
|
43
|
+
require 'csv'
|
44
|
+
writer=::CSV.open(filename,'w',*opts)
|
45
|
+
writer << dataset.fields
|
46
|
+
dataset.each_array{|row|
|
47
|
+
if(convert_comma)
|
48
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
49
|
+
end
|
50
|
+
writer << row
|
51
|
+
}
|
52
|
+
writer.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|