statsample 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.5.0 / 2009-09-26
2
+ * Vector now uses a Hash as a third argument
3
+ * Tested on Ruby 1.8.6, 1.8.7 and 1.9.1 with multiruby
4
+
1
5
  === 0.4.1 / 2009-09-12
2
6
  * More methods and usage documentation
3
7
  * Logit tests
data/Manifest.txt CHANGED
@@ -34,6 +34,8 @@ lib/statsample/anova.rb
34
34
  lib/statsample/bivariate.rb
35
35
  lib/statsample/codification.rb
36
36
  lib/statsample/combination.rb
37
+ lib/statsample/converter/csv18.rb
38
+ lib/statsample/converter/csv19.rb
37
39
  lib/statsample/converters.rb
38
40
  lib/statsample/crosstab.rb
39
41
  lib/statsample/dataset.rb
@@ -56,6 +58,7 @@ lib/statsample/regression/binomial/logit.rb
56
58
  lib/statsample/regression/binomial/probit.rb
57
59
  lib/statsample/regression/multiple.rb
58
60
  lib/statsample/regression/multiple/alglibengine.rb
61
+ lib/statsample/regression/multiple/baseengine.rb
59
62
  lib/statsample/regression/multiple/gslengine.rb
60
63
  lib/statsample/regression/multiple/rubyengine.rb
61
64
  lib/statsample/regression/simple.rb
@@ -67,7 +70,6 @@ lib/statsample/vector.rb
67
70
  po/es/statsample.po
68
71
  po/statsample.pot
69
72
  setup.rb
70
- test/_test_chart.rb
71
73
  test/test_anova.rb
72
74
  test/test_codification.rb
73
75
  test/test_combination.rb
data/lib/statsample.rb CHANGED
@@ -29,49 +29,49 @@ class Numeric
29
29
  end
30
30
 
31
31
  class String
32
- def is_number?
33
- if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
34
- true
35
- else
36
- false
37
- end
32
+ def is_number?
33
+ if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
34
+ true
35
+ else
36
+ false
38
37
  end
38
+ end
39
39
  end
40
40
 
41
41
  def create_test(*args,&proc)
42
- description=args.shift
43
- fields=args
44
- [description, fields, Proc.new]
42
+ description=args.shift
43
+ fields=args
44
+ [description, fields, Proc.new]
45
45
  end
46
46
  #--
47
47
  # Test extensions
48
48
  begin
49
- require 'gettext'
50
- rescue LoadError
51
- def bindtextdomain(d) #:nodoc:
52
- d
49
+ require 'gettext'
50
+ rescue LoadError
51
+ def bindtextdomain(d) #:nodoc:
52
+ d
53
+ end
54
+
55
+ # Bored module
56
+ module GetText #:nodoc:
57
+ def _(t)
58
+ t
53
59
  end
54
-
55
- # Bored module
56
- module GetText #:nodoc:
57
- def _(t)
58
- t
59
- end
60
- end
60
+ end
61
61
  end
62
62
 
63
- begin
64
- require 'rbgsl'
65
- HAS_GSL=true
66
- rescue LoadError
67
- HAS_GSL=false
68
- end
69
- begin
70
- require 'alglib'
71
- HAS_ALGIB=true
72
- rescue LoadError
73
- HAS_ALGIB=false
74
- end
63
+ begin
64
+ require 'rbgsl'
65
+ HAS_GSL=true
66
+ rescue LoadError
67
+ HAS_GSL=false
68
+ end
69
+ begin
70
+ require 'alglib'
71
+ HAS_ALGIB=true
72
+ rescue LoadError
73
+ HAS_ALGIB=false
74
+ end
75
75
  # ++
76
76
  # Modules for statistical analysis
77
77
  # See first:
@@ -80,77 +80,74 @@ end
80
80
  # * Dataset: An union of vectors.
81
81
  #
82
82
  module Statsample
83
-
84
- VERSION = '0.4.1'
85
- SPLIT_TOKEN = ","
86
- autoload(:Database, 'statsample/converters')
87
- autoload(:Anova, 'statsample/anova')
88
- autoload(:Combination, 'statsample/combination')
89
- autoload(:CSV, 'statsample/converters')
90
- autoload(:PlainText, 'statsample/converters')
91
- autoload(:Excel, 'statsample/converters')
92
- autoload(:GGobi, 'statsample/converters')
93
- autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
94
- autoload(:HtmlReport, 'statsample/htmlreport')
95
- autoload(:Mx, 'statsample/converters')
96
- autoload(:Resample, 'statsample/resample')
97
- autoload(:SRS, 'statsample/srs')
98
- autoload(:Codification, 'statsample/codification')
99
- autoload(:Reliability, 'statsample/reliability')
100
- autoload(:Bivariate, 'statsample/bivariate')
101
- autoload(:Multivariate, 'statsample/multivariate')
102
- autoload(:Multiset, 'statsample/multiset')
103
- autoload(:StratifiedSample, 'statsample/multiset')
104
- autoload(:MLE, 'statsample/mle')
105
- autoload(:Regression, 'statsample/regression')
106
- autoload(:Test, 'statsample/test')
107
- def self.load(filename)
108
- if File.exists? filename
109
- o=false
110
- File.open(filename,"r") {|fp|
111
- o=Marshal.load(fp)
112
- }
113
- o
114
- else
115
- false
116
- end
83
+ VERSION = '0.5.0'
84
+ SPLIT_TOKEN = ","
85
+ autoload(:Database, 'statsample/converters')
86
+ autoload(:Anova, 'statsample/anova')
87
+ autoload(:Combination, 'statsample/combination')
88
+ autoload(:CSV, 'statsample/converters')
89
+ autoload(:PlainText, 'statsample/converters')
90
+ autoload(:Excel, 'statsample/converters')
91
+ autoload(:GGobi, 'statsample/converters')
92
+ autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
93
+ autoload(:HtmlReport, 'statsample/htmlreport')
94
+ autoload(:Mx, 'statsample/converters')
95
+ autoload(:Resample, 'statsample/resample')
96
+ autoload(:SRS, 'statsample/srs')
97
+ autoload(:Codification, 'statsample/codification')
98
+ autoload(:Reliability, 'statsample/reliability')
99
+ autoload(:Bivariate, 'statsample/bivariate')
100
+ autoload(:Multivariate, 'statsample/multivariate')
101
+ autoload(:Multiset, 'statsample/multiset')
102
+ autoload(:StratifiedSample, 'statsample/multiset')
103
+ autoload(:MLE, 'statsample/mle')
104
+ autoload(:Regression, 'statsample/regression')
105
+ autoload(:Test, 'statsample/test')
106
+ def self.load(filename)
107
+ if File.exists? filename
108
+ o=false
109
+ File.open(filename,"r") {|fp| o=Marshal.load(fp) }
110
+ o
111
+ else
112
+ false
117
113
  end
114
+ end
118
115
 
119
116
  module Util
120
- # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
121
- def normal_order_statistic_medians(i,n)
122
- if i==1
123
- u= 1.0 - normal_order_statistic_medians(n,n)
124
- elsif i==n
125
- u=0.5**(1 / n.to_f)
126
- else
127
- u= (i - 0.3175) / (n + 0.365)
128
- end
129
- u
130
- end
131
- end
132
- module Writable
133
- def save(filename)
134
- fp=File.open(filename,"w")
135
- Marshal.dump(self,fp)
136
- fp.close
137
- end
117
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
118
+ def normal_order_statistic_medians(i,n)
119
+ if i==1
120
+ u= 1.0 - normal_order_statistic_medians(n,n)
121
+ elsif i==n
122
+ u=0.5**(1 / n.to_f)
123
+ else
124
+ u= (i - 0.3175) / (n + 0.365)
125
+ end
126
+ u
138
127
  end
139
- module HtmlSummary
140
- def add_line(n=nil)
141
- self << "<hr />"
142
- end
143
- def nl
144
- self << "<br />"
145
- end
146
- def add(text)
147
- self << ("<p>"+text.gsub("\n","<br />")+"</p>")
148
- end
149
- def parse_table(table)
150
- self << table.parse_html
151
- end
152
- end
153
- module ConsoleSummary
128
+ end
129
+ module Writable
130
+ def save(filename)
131
+ fp=File.open(filename,"w")
132
+ Marshal.dump(self,fp)
133
+ fp.close
134
+ end
135
+ end
136
+ module HtmlSummary
137
+ def add_line(n=nil)
138
+ self << "<hr />"
139
+ end
140
+ def nl
141
+ self << "<br />"
142
+ end
143
+ def add(text)
144
+ self << ("<p>"+text.gsub("\n","<br />")+"</p>")
145
+ end
146
+ def parse_table(table)
147
+ self << table.parse_html
148
+ end
149
+ end
150
+ module ConsoleSummary
154
151
  def add_line(n=80)
155
152
  self << "-"*n+"\n"
156
153
  end
@@ -163,98 +160,97 @@ module Statsample
163
160
  def parse_table(table)
164
161
  self << table.parse_console
165
162
  end
163
+ end
164
+ class ReportTable
165
+ attr_reader :header
166
+ def initialize(h=[])
167
+ @rows=[]
168
+ @max_cols=[]
169
+ self.header=(h)
166
170
  end
167
- class ReportTable
168
- attr_reader :header
169
- def initialize(h=[])
170
- @rows=[]
171
- @max_cols=[]
172
- self.header=(h)
173
- end
174
- def add_row(row)
175
- row.each_index{|i|
176
- @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
177
- }
178
- @rows.push(row)
179
- end
180
- def add_horizontal_line
181
- @rows.push(:hr)
182
- end
183
- def header=(h)
184
- h.each_index{|i|
185
- @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
186
- }
187
- @header=h
188
- end
189
- def parse_console_row(row)
190
- out="| "
191
- @max_cols.each_index{|i|
192
- if row[i].nil?
193
- out << " "*(@max_cols[i]+2)+"|"
194
- else
195
- t=row[i].to_s
196
- out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
197
- end
198
- }
199
- out << "\n"
200
- out
201
- end
202
- def parse_console_hr
203
- "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
204
- end
205
- def parse_console
206
- out="\n"
207
- out << parse_console_hr
208
- out << parse_console_row(header)
209
- out << parse_console_hr
210
-
211
- @rows.each{|row|
212
- if row==:hr
213
- out << parse_console_hr
214
- else
215
- out << parse_console_row(row)
216
- end
217
- }
218
- out << parse_console_hr
219
-
220
- out
221
- end
222
- def parse_html
223
- out="<table>\n"
224
- if header.size>0
225
- out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
171
+ def add_row(row)
172
+ row.each_index{|i|
173
+ @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
174
+ }
175
+ @rows.push(row)
176
+ end
177
+ def add_horizontal_line
178
+ @rows.push(:hr)
179
+ end
180
+ def header=(h)
181
+ h.each_index{|i|
182
+ @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
183
+ }
184
+ @header=h
185
+ end
186
+ def parse_console_row(row)
187
+ out="| "
188
+ @max_cols.each_index{|i|
189
+ if row[i].nil?
190
+ out << " "*(@max_cols[i]+2)+"|"
191
+ else
192
+ t=row[i].to_s
193
+ out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
194
+ end
195
+ }
196
+ out << "\n"
197
+ out
198
+ end
199
+ def parse_console_hr
200
+ "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
201
+ end
202
+ def parse_console
203
+ out="\n"
204
+ out << parse_console_hr
205
+ out << parse_console_row(header)
206
+ out << parse_console_hr
207
+
208
+ @rows.each{|row|
209
+ if row==:hr
210
+ out << parse_console_hr
211
+ else
212
+ out << parse_console_row(row)
226
213
  end
227
- out << "<tbody>\n"
228
- row_with_line=false
229
- @rows.each{|row|
230
- if row==:hr
231
- row_with_line=true
232
- else
233
- out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
234
- out << row.join("</td><td>") +"</td>"
235
- out << "</tr>\n"
236
- row_with_line=false
237
- end
238
- }
239
- out << "</tbody></table>\n"
240
- out
214
+ }
215
+ out << parse_console_hr
216
+
217
+ out
218
+ end
219
+ def parse_html
220
+ out="<table>\n"
221
+ if header.size>0
222
+ out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
241
223
  end
224
+ out << "<tbody>\n"
225
+ row_with_line=false
226
+ @rows.each{|row|
227
+ if row==:hr
228
+ row_with_line=true
229
+ else
230
+ out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
231
+ out << row.join("</td><td>") +"</td>"
232
+ out << "</tr>\n"
233
+ row_with_line=false
234
+ end
235
+ }
236
+ out << "</tbody></table>\n"
237
+ out
242
238
  end
239
+ end
243
240
 
244
- module STATSAMPLE__ #:nodoc:
245
- end
246
-
241
+ module STATSAMPLE__ #:nodoc:
242
+ end
247
243
  end
248
244
 
249
245
 
250
246
 
251
247
  #--
252
248
  begin
253
- require 'statsamplert'
249
+ require 'statsamplert'
254
250
  rescue LoadError
255
- module Statsample
256
- OPTIMIZED=false
257
- end
251
+ module Statsample
252
+ OPTIMIZED=false
253
+ end
258
254
  end
259
255
 
260
256
  require 'statsample/vector'
@@ -43,7 +43,7 @@ module Statsample
43
43
  raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
44
44
  v=dataset[v_name]
45
45
  split_data=v.splitted(sep)
46
- factors=split_data.flatten.uniq.compact.sort.inject({}) {|a,v| a[v]=v;a}
46
+ factors=split_data.flatten.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac}
47
47
  h[v_name]=factors
48
48
  h
49
49
  }
@@ -0,0 +1,56 @@
1
+ module Statsample
2
+ class CSV < SpreadsheetBase
3
+ class << self
4
+ # Returns a Dataset based on a csv file
5
+ #
6
+ # USE:
7
+ # ds=Statsample::CSV.read("test_csv.csv")
8
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
+ require 'csv'
10
+ first_row=true
11
+ fields=[]
12
+ fields_data={}
13
+ ds=nil
14
+ line_number=0
15
+
16
+ ::CSV.open(filename,'r',fs,rs) do |row|
17
+ line_number+=1
18
+ if(line_number<=ignore_lines)
19
+ #puts "Skip line"
20
+ next
21
+ end
22
+ row.collect!{|c|
23
+ c.to_s
24
+ }
25
+ if first_row
26
+ fields=extract_fields(row)
27
+ ds=Statsample::Dataset.new(fields)
28
+ first_row=false
29
+ else
30
+ rowa=process_row(row,empty)
31
+ ds.add_case(rowa,false)
32
+ end
33
+ end
34
+ convert_to_scale(ds,fields)
35
+ ds.update_valid_data
36
+ ds
37
+ end
38
+ # Save a Dataset on a csv file
39
+ #
40
+ # USE:
41
+ # Statsample::CSV.write(ds,"test_csv.csv")
42
+ def write(dataset,filename, convert_comma=false,*opts)
43
+ require 'csv'
44
+ writer=::CSV.open(filename,'w',*opts)
45
+ writer << dataset.fields
46
+ dataset.each_array{|row|
47
+ if(convert_comma)
48
+ row.collect!{|v| v.to_s.gsub(".",",")}
49
+ end
50
+ writer << row
51
+ }
52
+ writer.close
53
+ end
54
+ end
55
+ end
56
+ end