statsample 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +3 -1
- data/lib/statsample.rb +175 -179
- data/lib/statsample/codification.rb +1 -1
- data/lib/statsample/converter/csv18.rb +56 -0
- data/lib/statsample/converter/csv19.rb +60 -0
- data/lib/statsample/converters.rb +26 -75
- data/lib/statsample/dataset.rb +38 -29
- data/lib/statsample/dominanceanalysis.rb +6 -6
- data/lib/statsample/graph/gdchart.rb +2 -1
- data/lib/statsample/graph/svggraph.rb +10 -9
- data/lib/statsample/multiset.rb +3 -3
- data/lib/statsample/regression/multiple.rb +43 -271
- data/lib/statsample/regression/multiple/baseengine.rb +235 -0
- data/lib/statsample/regression/multiple/gslengine.rb +2 -2
- data/lib/statsample/vector.rb +754 -736
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +22 -3
- data/test/test_distribution.rb +4 -3
- data/test/test_ggobi.rb +2 -2
- data/test/test_regression.rb +11 -2
- data/test/test_svg_graph.rb +0 -1
- data/test/test_vector.rb +50 -5
- data/test/test_xls.rb +2 -4
- metadata +5 -3
- data/test/_test_chart.rb +0 -58
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -34,6 +34,8 @@ lib/statsample/anova.rb
|
|
34
34
|
lib/statsample/bivariate.rb
|
35
35
|
lib/statsample/codification.rb
|
36
36
|
lib/statsample/combination.rb
|
37
|
+
lib/statsample/converter/csv18.rb
|
38
|
+
lib/statsample/converter/csv19.rb
|
37
39
|
lib/statsample/converters.rb
|
38
40
|
lib/statsample/crosstab.rb
|
39
41
|
lib/statsample/dataset.rb
|
@@ -56,6 +58,7 @@ lib/statsample/regression/binomial/logit.rb
|
|
56
58
|
lib/statsample/regression/binomial/probit.rb
|
57
59
|
lib/statsample/regression/multiple.rb
|
58
60
|
lib/statsample/regression/multiple/alglibengine.rb
|
61
|
+
lib/statsample/regression/multiple/baseengine.rb
|
59
62
|
lib/statsample/regression/multiple/gslengine.rb
|
60
63
|
lib/statsample/regression/multiple/rubyengine.rb
|
61
64
|
lib/statsample/regression/simple.rb
|
@@ -67,7 +70,6 @@ lib/statsample/vector.rb
|
|
67
70
|
po/es/statsample.po
|
68
71
|
po/statsample.pot
|
69
72
|
setup.rb
|
70
|
-
test/_test_chart.rb
|
71
73
|
test/test_anova.rb
|
72
74
|
test/test_codification.rb
|
73
75
|
test/test_combination.rb
|
data/lib/statsample.rb
CHANGED
@@ -29,49 +29,49 @@ class Numeric
|
|
29
29
|
end
|
30
30
|
|
31
31
|
class String
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
32
|
+
def is_number?
|
33
|
+
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
34
|
+
true
|
35
|
+
else
|
36
|
+
false
|
38
37
|
end
|
38
|
+
end
|
39
39
|
end
|
40
40
|
|
41
41
|
def create_test(*args,&proc)
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
description=args.shift
|
43
|
+
fields=args
|
44
|
+
[description, fields, Proc.new]
|
45
45
|
end
|
46
46
|
#--
|
47
47
|
# Test extensions
|
48
48
|
begin
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
49
|
+
require 'gettext'
|
50
|
+
rescue LoadError
|
51
|
+
def bindtextdomain(d) #:nodoc:
|
52
|
+
d
|
53
|
+
end
|
54
|
+
|
55
|
+
# Bored module
|
56
|
+
module GetText #:nodoc:
|
57
|
+
def _(t)
|
58
|
+
t
|
53
59
|
end
|
54
|
-
|
55
|
-
# Bored module
|
56
|
-
module GetText #:nodoc:
|
57
|
-
def _(t)
|
58
|
-
t
|
59
|
-
end
|
60
|
-
end
|
60
|
+
end
|
61
61
|
end
|
62
62
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
63
|
+
begin
|
64
|
+
require 'rbgsl'
|
65
|
+
HAS_GSL=true
|
66
|
+
rescue LoadError
|
67
|
+
HAS_GSL=false
|
68
|
+
end
|
69
|
+
begin
|
70
|
+
require 'alglib'
|
71
|
+
HAS_ALGIB=true
|
72
|
+
rescue LoadError
|
73
|
+
HAS_ALGIB=false
|
74
|
+
end
|
75
75
|
# ++
|
76
76
|
# Modules for statistical analysis
|
77
77
|
# See first:
|
@@ -80,77 +80,74 @@ end
|
|
80
80
|
# * Dataset: An union of vectors.
|
81
81
|
#
|
82
82
|
module Statsample
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
o
|
114
|
-
else
|
115
|
-
false
|
116
|
-
end
|
83
|
+
VERSION = '0.5.0'
|
84
|
+
SPLIT_TOKEN = ","
|
85
|
+
autoload(:Database, 'statsample/converters')
|
86
|
+
autoload(:Anova, 'statsample/anova')
|
87
|
+
autoload(:Combination, 'statsample/combination')
|
88
|
+
autoload(:CSV, 'statsample/converters')
|
89
|
+
autoload(:PlainText, 'statsample/converters')
|
90
|
+
autoload(:Excel, 'statsample/converters')
|
91
|
+
autoload(:GGobi, 'statsample/converters')
|
92
|
+
autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
|
93
|
+
autoload(:HtmlReport, 'statsample/htmlreport')
|
94
|
+
autoload(:Mx, 'statsample/converters')
|
95
|
+
autoload(:Resample, 'statsample/resample')
|
96
|
+
autoload(:SRS, 'statsample/srs')
|
97
|
+
autoload(:Codification, 'statsample/codification')
|
98
|
+
autoload(:Reliability, 'statsample/reliability')
|
99
|
+
autoload(:Bivariate, 'statsample/bivariate')
|
100
|
+
autoload(:Multivariate, 'statsample/multivariate')
|
101
|
+
autoload(:Multiset, 'statsample/multiset')
|
102
|
+
autoload(:StratifiedSample, 'statsample/multiset')
|
103
|
+
autoload(:MLE, 'statsample/mle')
|
104
|
+
autoload(:Regression, 'statsample/regression')
|
105
|
+
autoload(:Test, 'statsample/test')
|
106
|
+
def self.load(filename)
|
107
|
+
if File.exists? filename
|
108
|
+
o=false
|
109
|
+
File.open(filename,"r") {|fp| o=Marshal.load(fp) }
|
110
|
+
o
|
111
|
+
else
|
112
|
+
false
|
117
113
|
end
|
114
|
+
end
|
118
115
|
|
119
116
|
module Util
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
131
|
-
end
|
132
|
-
module Writable
|
133
|
-
def save(filename)
|
134
|
-
fp=File.open(filename,"w")
|
135
|
-
Marshal.dump(self,fp)
|
136
|
-
fp.close
|
137
|
-
end
|
117
|
+
# Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
|
118
|
+
def normal_order_statistic_medians(i,n)
|
119
|
+
if i==1
|
120
|
+
u= 1.0 - normal_order_statistic_medians(n,n)
|
121
|
+
elsif i==n
|
122
|
+
u=0.5**(1 / n.to_f)
|
123
|
+
else
|
124
|
+
u= (i - 0.3175) / (n + 0.365)
|
125
|
+
end
|
126
|
+
u
|
138
127
|
end
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
128
|
+
end
|
129
|
+
module Writable
|
130
|
+
def save(filename)
|
131
|
+
fp=File.open(filename,"w")
|
132
|
+
Marshal.dump(self,fp)
|
133
|
+
fp.close
|
134
|
+
end
|
135
|
+
end
|
136
|
+
module HtmlSummary
|
137
|
+
def add_line(n=nil)
|
138
|
+
self << "<hr />"
|
139
|
+
end
|
140
|
+
def nl
|
141
|
+
self << "<br />"
|
142
|
+
end
|
143
|
+
def add(text)
|
144
|
+
self << ("<p>"+text.gsub("\n","<br />")+"</p>")
|
145
|
+
end
|
146
|
+
def parse_table(table)
|
147
|
+
self << table.parse_html
|
148
|
+
end
|
149
|
+
end
|
150
|
+
module ConsoleSummary
|
154
151
|
def add_line(n=80)
|
155
152
|
self << "-"*n+"\n"
|
156
153
|
end
|
@@ -163,98 +160,97 @@ module Statsample
|
|
163
160
|
def parse_table(table)
|
164
161
|
self << table.parse_console
|
165
162
|
end
|
163
|
+
end
|
164
|
+
class ReportTable
|
165
|
+
attr_reader :header
|
166
|
+
def initialize(h=[])
|
167
|
+
@rows=[]
|
168
|
+
@max_cols=[]
|
169
|
+
self.header=(h)
|
166
170
|
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
@
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
out << parse_console_row(
|
209
|
-
out << parse_console_hr
|
210
|
-
|
211
|
-
@rows.each{|row|
|
212
|
-
if row==:hr
|
213
|
-
out << parse_console_hr
|
214
|
-
else
|
215
|
-
out << parse_console_row(row)
|
216
|
-
end
|
217
|
-
}
|
218
|
-
out << parse_console_hr
|
219
|
-
|
220
|
-
out
|
221
|
-
end
|
222
|
-
def parse_html
|
223
|
-
out="<table>\n"
|
224
|
-
if header.size>0
|
225
|
-
out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
|
171
|
+
def add_row(row)
|
172
|
+
row.each_index{|i|
|
173
|
+
@max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
|
174
|
+
}
|
175
|
+
@rows.push(row)
|
176
|
+
end
|
177
|
+
def add_horizontal_line
|
178
|
+
@rows.push(:hr)
|
179
|
+
end
|
180
|
+
def header=(h)
|
181
|
+
h.each_index{|i|
|
182
|
+
@max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
|
183
|
+
}
|
184
|
+
@header=h
|
185
|
+
end
|
186
|
+
def parse_console_row(row)
|
187
|
+
out="| "
|
188
|
+
@max_cols.each_index{|i|
|
189
|
+
if row[i].nil?
|
190
|
+
out << " "*(@max_cols[i]+2)+"|"
|
191
|
+
else
|
192
|
+
t=row[i].to_s
|
193
|
+
out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
|
194
|
+
end
|
195
|
+
}
|
196
|
+
out << "\n"
|
197
|
+
out
|
198
|
+
end
|
199
|
+
def parse_console_hr
|
200
|
+
"-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
|
201
|
+
end
|
202
|
+
def parse_console
|
203
|
+
out="\n"
|
204
|
+
out << parse_console_hr
|
205
|
+
out << parse_console_row(header)
|
206
|
+
out << parse_console_hr
|
207
|
+
|
208
|
+
@rows.each{|row|
|
209
|
+
if row==:hr
|
210
|
+
out << parse_console_hr
|
211
|
+
else
|
212
|
+
out << parse_console_row(row)
|
226
213
|
end
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
row_with_line=false
|
237
|
-
end
|
238
|
-
}
|
239
|
-
out << "</tbody></table>\n"
|
240
|
-
out
|
214
|
+
}
|
215
|
+
out << parse_console_hr
|
216
|
+
|
217
|
+
out
|
218
|
+
end
|
219
|
+
def parse_html
|
220
|
+
out="<table>\n"
|
221
|
+
if header.size>0
|
222
|
+
out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
|
241
223
|
end
|
224
|
+
out << "<tbody>\n"
|
225
|
+
row_with_line=false
|
226
|
+
@rows.each{|row|
|
227
|
+
if row==:hr
|
228
|
+
row_with_line=true
|
229
|
+
else
|
230
|
+
out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
|
231
|
+
out << row.join("</td><td>") +"</td>"
|
232
|
+
out << "</tr>\n"
|
233
|
+
row_with_line=false
|
234
|
+
end
|
235
|
+
}
|
236
|
+
out << "</tbody></table>\n"
|
237
|
+
out
|
242
238
|
end
|
239
|
+
end
|
243
240
|
|
244
|
-
|
245
|
-
|
246
|
-
|
241
|
+
module STATSAMPLE__ #:nodoc:
|
242
|
+
end
|
247
243
|
end
|
248
244
|
|
249
245
|
|
250
246
|
|
251
247
|
#--
|
252
248
|
begin
|
253
|
-
|
249
|
+
require 'statsamplert'
|
254
250
|
rescue LoadError
|
255
|
-
|
256
|
-
|
257
|
-
|
251
|
+
module Statsample
|
252
|
+
OPTIMIZED=false
|
253
|
+
end
|
258
254
|
end
|
259
255
|
|
260
256
|
require 'statsample/vector'
|
@@ -43,7 +43,7 @@ module Statsample
|
|
43
43
|
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
|
44
44
|
v=dataset[v_name]
|
45
45
|
split_data=v.splitted(sep)
|
46
|
-
factors=split_data.flatten.uniq.compact.sort.inject({}) {|
|
46
|
+
factors=split_data.flatten.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac}
|
47
47
|
h[v_name]=factors
|
48
48
|
h
|
49
49
|
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Statsample
|
2
|
+
class CSV < SpreadsheetBase
|
3
|
+
class << self
|
4
|
+
# Returns a Dataset based on a csv file
|
5
|
+
#
|
6
|
+
# USE:
|
7
|
+
# ds=Statsample::CSV.read("test_csv.csv")
|
8
|
+
def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
|
9
|
+
require 'csv'
|
10
|
+
first_row=true
|
11
|
+
fields=[]
|
12
|
+
fields_data={}
|
13
|
+
ds=nil
|
14
|
+
line_number=0
|
15
|
+
|
16
|
+
::CSV.open(filename,'r',fs,rs) do |row|
|
17
|
+
line_number+=1
|
18
|
+
if(line_number<=ignore_lines)
|
19
|
+
#puts "Skip line"
|
20
|
+
next
|
21
|
+
end
|
22
|
+
row.collect!{|c|
|
23
|
+
c.to_s
|
24
|
+
}
|
25
|
+
if first_row
|
26
|
+
fields=extract_fields(row)
|
27
|
+
ds=Statsample::Dataset.new(fields)
|
28
|
+
first_row=false
|
29
|
+
else
|
30
|
+
rowa=process_row(row,empty)
|
31
|
+
ds.add_case(rowa,false)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
convert_to_scale(ds,fields)
|
35
|
+
ds.update_valid_data
|
36
|
+
ds
|
37
|
+
end
|
38
|
+
# Save a Dataset on a csv file
|
39
|
+
#
|
40
|
+
# USE:
|
41
|
+
# Statsample::CSV.write(ds,"test_csv.csv")
|
42
|
+
def write(dataset,filename, convert_comma=false,*opts)
|
43
|
+
require 'csv'
|
44
|
+
writer=::CSV.open(filename,'w',*opts)
|
45
|
+
writer << dataset.fields
|
46
|
+
dataset.each_array{|row|
|
47
|
+
if(convert_comma)
|
48
|
+
row.collect!{|v| v.to_s.gsub(".",",")}
|
49
|
+
end
|
50
|
+
writer << row
|
51
|
+
}
|
52
|
+
writer.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|