statsample 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.5.0 / 2009-09-26
2
+ * Vector now uses a Hash as a third argument
3
+ * Tested on Ruby 1.8.6, 1.8.7 and 1.9.1 with multiruby
4
+
1
5
  === 0.4.1 / 2009-09-12
2
6
  * More methods and usage documentation
3
7
  * Logit tests
data/Manifest.txt CHANGED
@@ -34,6 +34,8 @@ lib/statsample/anova.rb
34
34
  lib/statsample/bivariate.rb
35
35
  lib/statsample/codification.rb
36
36
  lib/statsample/combination.rb
37
+ lib/statsample/converter/csv18.rb
38
+ lib/statsample/converter/csv19.rb
37
39
  lib/statsample/converters.rb
38
40
  lib/statsample/crosstab.rb
39
41
  lib/statsample/dataset.rb
@@ -56,6 +58,7 @@ lib/statsample/regression/binomial/logit.rb
56
58
  lib/statsample/regression/binomial/probit.rb
57
59
  lib/statsample/regression/multiple.rb
58
60
  lib/statsample/regression/multiple/alglibengine.rb
61
+ lib/statsample/regression/multiple/baseengine.rb
59
62
  lib/statsample/regression/multiple/gslengine.rb
60
63
  lib/statsample/regression/multiple/rubyengine.rb
61
64
  lib/statsample/regression/simple.rb
@@ -67,7 +70,6 @@ lib/statsample/vector.rb
67
70
  po/es/statsample.po
68
71
  po/statsample.pot
69
72
  setup.rb
70
- test/_test_chart.rb
71
73
  test/test_anova.rb
72
74
  test/test_codification.rb
73
75
  test/test_combination.rb
data/lib/statsample.rb CHANGED
@@ -29,49 +29,49 @@ class Numeric
29
29
  end
30
30
 
31
31
  class String
32
- def is_number?
33
- if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
34
- true
35
- else
36
- false
37
- end
32
+ def is_number?
33
+ if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
34
+ true
35
+ else
36
+ false
38
37
  end
38
+ end
39
39
  end
40
40
 
41
41
  def create_test(*args,&proc)
42
- description=args.shift
43
- fields=args
44
- [description, fields, Proc.new]
42
+ description=args.shift
43
+ fields=args
44
+ [description, fields, Proc.new]
45
45
  end
46
46
  #--
47
47
  # Test extensions
48
48
  begin
49
- require 'gettext'
50
- rescue LoadError
51
- def bindtextdomain(d) #:nodoc:
52
- d
49
+ require 'gettext'
50
+ rescue LoadError
51
+ def bindtextdomain(d) #:nodoc:
52
+ d
53
+ end
54
+
55
+ # Bored module
56
+ module GetText #:nodoc:
57
+ def _(t)
58
+ t
53
59
  end
54
-
55
- # Bored module
56
- module GetText #:nodoc:
57
- def _(t)
58
- t
59
- end
60
- end
60
+ end
61
61
  end
62
62
 
63
- begin
64
- require 'rbgsl'
65
- HAS_GSL=true
66
- rescue LoadError
67
- HAS_GSL=false
68
- end
69
- begin
70
- require 'alglib'
71
- HAS_ALGIB=true
72
- rescue LoadError
73
- HAS_ALGIB=false
74
- end
63
+ begin
64
+ require 'rbgsl'
65
+ HAS_GSL=true
66
+ rescue LoadError
67
+ HAS_GSL=false
68
+ end
69
+ begin
70
+ require 'alglib'
71
+ HAS_ALGIB=true
72
+ rescue LoadError
73
+ HAS_ALGIB=false
74
+ end
75
75
  # ++
76
76
  # Modules for statistical analysis
77
77
  # See first:
@@ -80,77 +80,74 @@ end
80
80
  # * Dataset: An union of vectors.
81
81
  #
82
82
  module Statsample
83
-
84
- VERSION = '0.4.1'
85
- SPLIT_TOKEN = ","
86
- autoload(:Database, 'statsample/converters')
87
- autoload(:Anova, 'statsample/anova')
88
- autoload(:Combination, 'statsample/combination')
89
- autoload(:CSV, 'statsample/converters')
90
- autoload(:PlainText, 'statsample/converters')
91
- autoload(:Excel, 'statsample/converters')
92
- autoload(:GGobi, 'statsample/converters')
93
- autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
94
- autoload(:HtmlReport, 'statsample/htmlreport')
95
- autoload(:Mx, 'statsample/converters')
96
- autoload(:Resample, 'statsample/resample')
97
- autoload(:SRS, 'statsample/srs')
98
- autoload(:Codification, 'statsample/codification')
99
- autoload(:Reliability, 'statsample/reliability')
100
- autoload(:Bivariate, 'statsample/bivariate')
101
- autoload(:Multivariate, 'statsample/multivariate')
102
- autoload(:Multiset, 'statsample/multiset')
103
- autoload(:StratifiedSample, 'statsample/multiset')
104
- autoload(:MLE, 'statsample/mle')
105
- autoload(:Regression, 'statsample/regression')
106
- autoload(:Test, 'statsample/test')
107
- def self.load(filename)
108
- if File.exists? filename
109
- o=false
110
- File.open(filename,"r") {|fp|
111
- o=Marshal.load(fp)
112
- }
113
- o
114
- else
115
- false
116
- end
83
+ VERSION = '0.5.0'
84
+ SPLIT_TOKEN = ","
85
+ autoload(:Database, 'statsample/converters')
86
+ autoload(:Anova, 'statsample/anova')
87
+ autoload(:Combination, 'statsample/combination')
88
+ autoload(:CSV, 'statsample/converters')
89
+ autoload(:PlainText, 'statsample/converters')
90
+ autoload(:Excel, 'statsample/converters')
91
+ autoload(:GGobi, 'statsample/converters')
92
+ autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
93
+ autoload(:HtmlReport, 'statsample/htmlreport')
94
+ autoload(:Mx, 'statsample/converters')
95
+ autoload(:Resample, 'statsample/resample')
96
+ autoload(:SRS, 'statsample/srs')
97
+ autoload(:Codification, 'statsample/codification')
98
+ autoload(:Reliability, 'statsample/reliability')
99
+ autoload(:Bivariate, 'statsample/bivariate')
100
+ autoload(:Multivariate, 'statsample/multivariate')
101
+ autoload(:Multiset, 'statsample/multiset')
102
+ autoload(:StratifiedSample, 'statsample/multiset')
103
+ autoload(:MLE, 'statsample/mle')
104
+ autoload(:Regression, 'statsample/regression')
105
+ autoload(:Test, 'statsample/test')
106
+ def self.load(filename)
107
+ if File.exists? filename
108
+ o=false
109
+ File.open(filename,"r") {|fp| o=Marshal.load(fp) }
110
+ o
111
+ else
112
+ false
117
113
  end
114
+ end
118
115
 
119
116
  module Util
120
- # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
121
- def normal_order_statistic_medians(i,n)
122
- if i==1
123
- u= 1.0 - normal_order_statistic_medians(n,n)
124
- elsif i==n
125
- u=0.5**(1 / n.to_f)
126
- else
127
- u= (i - 0.3175) / (n + 0.365)
128
- end
129
- u
130
- end
131
- end
132
- module Writable
133
- def save(filename)
134
- fp=File.open(filename,"w")
135
- Marshal.dump(self,fp)
136
- fp.close
137
- end
117
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
118
+ def normal_order_statistic_medians(i,n)
119
+ if i==1
120
+ u= 1.0 - normal_order_statistic_medians(n,n)
121
+ elsif i==n
122
+ u=0.5**(1 / n.to_f)
123
+ else
124
+ u= (i - 0.3175) / (n + 0.365)
125
+ end
126
+ u
138
127
  end
139
- module HtmlSummary
140
- def add_line(n=nil)
141
- self << "<hr />"
142
- end
143
- def nl
144
- self << "<br />"
145
- end
146
- def add(text)
147
- self << ("<p>"+text.gsub("\n","<br />")+"</p>")
148
- end
149
- def parse_table(table)
150
- self << table.parse_html
151
- end
152
- end
153
- module ConsoleSummary
128
+ end
129
+ module Writable
130
+ def save(filename)
131
+ fp=File.open(filename,"w")
132
+ Marshal.dump(self,fp)
133
+ fp.close
134
+ end
135
+ end
136
+ module HtmlSummary
137
+ def add_line(n=nil)
138
+ self << "<hr />"
139
+ end
140
+ def nl
141
+ self << "<br />"
142
+ end
143
+ def add(text)
144
+ self << ("<p>"+text.gsub("\n","<br />")+"</p>")
145
+ end
146
+ def parse_table(table)
147
+ self << table.parse_html
148
+ end
149
+ end
150
+ module ConsoleSummary
154
151
  def add_line(n=80)
155
152
  self << "-"*n+"\n"
156
153
  end
@@ -163,98 +160,97 @@ module Statsample
163
160
  def parse_table(table)
164
161
  self << table.parse_console
165
162
  end
163
+ end
164
+ class ReportTable
165
+ attr_reader :header
166
+ def initialize(h=[])
167
+ @rows=[]
168
+ @max_cols=[]
169
+ self.header=(h)
166
170
  end
167
- class ReportTable
168
- attr_reader :header
169
- def initialize(h=[])
170
- @rows=[]
171
- @max_cols=[]
172
- self.header=(h)
173
- end
174
- def add_row(row)
175
- row.each_index{|i|
176
- @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
177
- }
178
- @rows.push(row)
179
- end
180
- def add_horizontal_line
181
- @rows.push(:hr)
182
- end
183
- def header=(h)
184
- h.each_index{|i|
185
- @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
186
- }
187
- @header=h
188
- end
189
- def parse_console_row(row)
190
- out="| "
191
- @max_cols.each_index{|i|
192
- if row[i].nil?
193
- out << " "*(@max_cols[i]+2)+"|"
194
- else
195
- t=row[i].to_s
196
- out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
197
- end
198
- }
199
- out << "\n"
200
- out
201
- end
202
- def parse_console_hr
203
- "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
204
- end
205
- def parse_console
206
- out="\n"
207
- out << parse_console_hr
208
- out << parse_console_row(header)
209
- out << parse_console_hr
210
-
211
- @rows.each{|row|
212
- if row==:hr
213
- out << parse_console_hr
214
- else
215
- out << parse_console_row(row)
216
- end
217
- }
218
- out << parse_console_hr
219
-
220
- out
221
- end
222
- def parse_html
223
- out="<table>\n"
224
- if header.size>0
225
- out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
171
+ def add_row(row)
172
+ row.each_index{|i|
173
+ @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
174
+ }
175
+ @rows.push(row)
176
+ end
177
+ def add_horizontal_line
178
+ @rows.push(:hr)
179
+ end
180
+ def header=(h)
181
+ h.each_index{|i|
182
+ @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
183
+ }
184
+ @header=h
185
+ end
186
+ def parse_console_row(row)
187
+ out="| "
188
+ @max_cols.each_index{|i|
189
+ if row[i].nil?
190
+ out << " "*(@max_cols[i]+2)+"|"
191
+ else
192
+ t=row[i].to_s
193
+ out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
194
+ end
195
+ }
196
+ out << "\n"
197
+ out
198
+ end
199
+ def parse_console_hr
200
+ "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
201
+ end
202
+ def parse_console
203
+ out="\n"
204
+ out << parse_console_hr
205
+ out << parse_console_row(header)
206
+ out << parse_console_hr
207
+
208
+ @rows.each{|row|
209
+ if row==:hr
210
+ out << parse_console_hr
211
+ else
212
+ out << parse_console_row(row)
226
213
  end
227
- out << "<tbody>\n"
228
- row_with_line=false
229
- @rows.each{|row|
230
- if row==:hr
231
- row_with_line=true
232
- else
233
- out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
234
- out << row.join("</td><td>") +"</td>"
235
- out << "</tr>\n"
236
- row_with_line=false
237
- end
238
- }
239
- out << "</tbody></table>\n"
240
- out
214
+ }
215
+ out << parse_console_hr
216
+
217
+ out
218
+ end
219
+ def parse_html
220
+ out="<table>\n"
221
+ if header.size>0
222
+ out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
241
223
  end
224
+ out << "<tbody>\n"
225
+ row_with_line=false
226
+ @rows.each{|row|
227
+ if row==:hr
228
+ row_with_line=true
229
+ else
230
+ out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
231
+ out << row.join("</td><td>") +"</td>"
232
+ out << "</tr>\n"
233
+ row_with_line=false
234
+ end
235
+ }
236
+ out << "</tbody></table>\n"
237
+ out
242
238
  end
239
+ end
243
240
 
244
- module STATSAMPLE__ #:nodoc:
245
- end
246
-
241
+ module STATSAMPLE__ #:nodoc:
242
+ end
247
243
  end
248
244
 
249
245
 
250
246
 
251
247
  #--
252
248
  begin
253
- require 'statsamplert'
249
+ require 'statsamplert'
254
250
  rescue LoadError
255
- module Statsample
256
- OPTIMIZED=false
257
- end
251
+ module Statsample
252
+ OPTIMIZED=false
253
+ end
258
254
  end
259
255
 
260
256
  require 'statsample/vector'
@@ -43,7 +43,7 @@ module Statsample
43
43
  raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
44
44
  v=dataset[v_name]
45
45
  split_data=v.splitted(sep)
46
- factors=split_data.flatten.uniq.compact.sort.inject({}) {|a,v| a[v]=v;a}
46
+ factors=split_data.flatten.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac}
47
47
  h[v_name]=factors
48
48
  h
49
49
  }
@@ -0,0 +1,56 @@
1
+ module Statsample
2
+ class CSV < SpreadsheetBase
3
+ class << self
4
+ # Returns a Dataset based on a csv file
5
+ #
6
+ # USE:
7
+ # ds=Statsample::CSV.read("test_csv.csv")
8
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
9
+ require 'csv'
10
+ first_row=true
11
+ fields=[]
12
+ fields_data={}
13
+ ds=nil
14
+ line_number=0
15
+
16
+ ::CSV.open(filename,'r',fs,rs) do |row|
17
+ line_number+=1
18
+ if(line_number<=ignore_lines)
19
+ #puts "Skip line"
20
+ next
21
+ end
22
+ row.collect!{|c|
23
+ c.to_s
24
+ }
25
+ if first_row
26
+ fields=extract_fields(row)
27
+ ds=Statsample::Dataset.new(fields)
28
+ first_row=false
29
+ else
30
+ rowa=process_row(row,empty)
31
+ ds.add_case(rowa,false)
32
+ end
33
+ end
34
+ convert_to_scale(ds,fields)
35
+ ds.update_valid_data
36
+ ds
37
+ end
38
+ # Save a Dataset on a csv file
39
+ #
40
+ # USE:
41
+ # Statsample::CSV.write(ds,"test_csv.csv")
42
+ def write(dataset,filename, convert_comma=false,*opts)
43
+ require 'csv'
44
+ writer=::CSV.open(filename,'w',*opts)
45
+ writer << dataset.fields
46
+ dataset.each_array{|row|
47
+ if(convert_comma)
48
+ row.collect!{|v| v.to_s.gsub(".",",")}
49
+ end
50
+ writer << row
51
+ }
52
+ writer.close
53
+ end
54
+ end
55
+ end
56
+ end