statsample 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -150,17 +150,16 @@ module Statsample
150
150
  @models=[]
151
151
  @models_data={}
152
152
  for i in 1..@fields.size
153
- c = GSL::Combination.calloc(@fields.size, i);
154
- begin
155
- convert=c.data.to_a.collect {|i|
156
- @fields[i]
153
+ c=Statsample::Combination.new(i,@fields.size)
154
+ c.each{|data|
155
+ convert=data.collect {|i|
156
+ @fields[i]
157
+ }
158
+ @models.push(convert)
159
+ ds_prev=@ds.dup(convert+[@y_var])
160
+ modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
161
+ @models_data[convert.sort]=modeldata
157
162
  }
158
- @models.push(convert)
159
- ds_prev=@ds.dup(convert+[@y_var])
160
- modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
161
-
162
- @models_data[convert.sort]=modeldata
163
- end while c.next == GSL::SUCCESS
164
163
  end
165
164
  end
166
165
  def summary(report_type=ConsoleSummary)
@@ -232,7 +231,12 @@ module Statsample
232
231
  @lr.r2
233
232
  end
234
233
  def add_table_row
235
- [@name.join("*"), sprintf("%0.3f",r2), sprintf("%0.3f",@lr.significance)]+@fields.collect{|k|
234
+ begin
235
+ sign=sprintf("%0.3f", @lr.significance)
236
+ rescue RuntimeError
237
+ sign="???"
238
+ end
239
+ [@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
236
240
  v=@contributions[k]
237
241
  if v.nil?
238
242
  "--"
@@ -51,26 +51,28 @@ class DominanceAnalysis
51
51
  @samples_cd={}
52
52
  @samples_gd={}
53
53
  @pairs=[]
54
- c = GSL::Combination.calloc(@fields.size, 2);
55
- begin
56
- convert=c.data.to_a.collect {|i|
54
+ c=Statsample::Combination.new(2,@fields.size)
55
+ c.each{|data|
56
+ convert=data.collect {|i|
57
57
  @fields[i]
58
58
  }
59
59
  @pairs.push(convert)
60
60
  [@samples_td,@samples_cd,@samples_gd].each{|s|
61
61
  s[convert]=[]
62
62
  }
63
- end while c.next == GSL::SUCCESS
64
- end
63
+ }
64
+ end
65
65
  def summary(report_type=ConsoleSummary)
66
66
  out =""
67
67
  raise "You should bootstrap first" if @n_samples==0
68
68
  alfa=0.95
69
- t=GSL::Cdf.tdist_Pinv(1-((1-alfa) / 2),@n_samples - 1)
70
69
  out.extend report_type
71
70
  out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
72
71
  out.add _("Sample size: %d\n") % @n_samples
73
- out.add "t:#{t}\n"
72
+ if HAS_GSL
73
+ t=GSL::Cdf.tdist_Pinv(1-((1-alfa) / 2),@n_samples - 1)
74
+ out.add "t:#{t}\n"
75
+ end
74
76
  out.add "Linear Regression Engine: #{@lr_class.name}"
75
77
  out.nl
76
78
  table=ReportTable.new
@@ -6,7 +6,7 @@ require 'SVG/Graph/Plot'
6
6
  require 'statsample/graph/svghistogram'
7
7
 
8
8
  module Statsample
9
- class Nominal
9
+ class Vector
10
10
  # Creates a barchart using ruby-gdchart
11
11
  def svggraph_frequencies(file, width=600, height=300, chart_type=SVG::Graph::BarNoOp, options={})
12
12
  labels,data=[],[]
@@ -26,8 +26,6 @@ module Statsample
26
26
  f.puts(graph.burn)
27
27
  }
28
28
  end
29
- end
30
- class Scale < Ordinal
31
29
  def svggraph_histogram(bins, options={})
32
30
  options={:graph_title=>"Histogram", :show_graph_title=>true,:show_normal=>true, :mean=>self.mean, :sigma=>sdp }.merge! options
33
31
  graph = Statsample::Graph::SvgHistogram.new(options)
@@ -1,4 +1,5 @@
1
1
  require 'statsample/regression/simple'
2
+ require 'statsample/regression/logit'
2
3
  require 'statsample/regression/multiple'
3
4
  require 'statsample/regression/multiple/alglibengine'
4
5
  require 'statsample/regression/multiple/rubyengine'
@@ -0,0 +1,35 @@
1
+ module Statsample
2
+ module Regression
3
+ class Logit
4
+ def initialize(ds,y_var)
5
+ @ds=ds
6
+ @y_var=y_var
7
+ end
8
+ def vp(x1,x2)
9
+ sum=0
10
+ x1.each_index{|i|
11
+ sum+=x1[i]*x2[i]
12
+ }
13
+ sum
14
+ end
15
+ # F(B'Xi)
16
+ def f(b,x)
17
+ Math::exp(vp(b,x)) / (1+Math::exp(vp(b,x)))
18
+ end
19
+ # f(B'Xi)
20
+ def fa(b,x)
21
+ f(b,x)*(1-f(b,x))
22
+ end
23
+ def l(b)
24
+ prod=1
25
+ y=@ds[@y_var]
26
+ @ds.each_array{|x|
27
+ x.unshift(1) # add constant
28
+ l=(f(b,x)**y[@ds.i])*((1.0-f(b,x))**(1.0-y[@ds.i]))
29
+ prod=prod*l
30
+ }
31
+ prod
32
+ end
33
+ end
34
+ end
35
+ end
@@ -194,13 +194,32 @@ out.add_line
194
194
  out.add "ANOVA TABLE"
195
195
 
196
196
  t=Statsample::ReportTable.new(%w{source ss df ms f s})
197
- t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f",significance)])
198
-
197
+ begin
198
+ t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
199
+ rescue RuntimeError
200
+ t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), "???", "???"])
201
+ end
199
202
  t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
200
203
 
201
204
  t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
202
205
 
203
206
  out.parse_table(t)
207
+
208
+ begin
209
+ out.add "Beta coefficientes"
210
+ sc=standarized_coeffs
211
+ cse=coeffs_se
212
+ t=Statsample::ReportTable.new(%w{coeff beta se t})
213
+ t.add_row(["Constant", "-",constant_se, constant_t])
214
+ @fields.each{|f|
215
+ t.add_row([f, sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
216
+ }
217
+ out.parse_table(t)
218
+
219
+ rescue
220
+
221
+ end
222
+
204
223
  out
205
224
  end
206
225
  def assign_names(c)
@@ -37,6 +37,8 @@ class AlglibEngine < BaseEngine
37
37
  matrix=Matrix.columns(columns)
38
38
  @lr_s=nil
39
39
  @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
40
+ @coeffs=assign_names(@lr.coeffs)
41
+
40
42
  end
41
43
 
42
44
  def _dump(i)
@@ -48,7 +50,7 @@ class AlglibEngine < BaseEngine
48
50
  end
49
51
 
50
52
  def coeffs
51
- assign_names(@lr.coeffs)
53
+ @coeffs
52
54
  end
53
55
  # Coefficients using a constant
54
56
  # Based on http://www.xycoon.com/ols1.htm
@@ -39,11 +39,11 @@ module Statsample
39
39
  ds=Statsample::Dataset.new(h).dup_only_valid
40
40
  ds.vectors.values
41
41
  end
42
- class Vector < DelegateClass(Array)
43
-
42
+
43
+ class Vector
44
44
  include Enumerable
45
- attr_reader :type, :data, :valid_data, :missing_values, :missing_data, :data_with_nils
46
- attr_accessor :labels
45
+ attr_reader :type, :data, :valid_data, :missing_values, :missing_data, :data_with_nils, :gsl
46
+ attr_accessor :labels
47
47
  # Creates a new
48
48
  # data = Array of data
49
49
  # t = level of meausurement. Could be:
@@ -61,9 +61,9 @@ class Vector < DelegateClass(Array)
61
61
  @data_with_nils=[]
62
62
  @missing_data=[]
63
63
  @has_missing_data=nil
64
- _set_valid_data
64
+ @scale_data=nil
65
+ set_valid_data_intern
65
66
  self.type=t
66
- super(@delegate)
67
67
  end
68
68
  def dup
69
69
  Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
@@ -78,23 +78,27 @@ class Vector < DelegateClass(Array)
78
78
  def vector_standarized_pop
79
79
  vector_standarized(true)
80
80
  end
81
-
81
+ def check_type(t)
82
+ raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal)
83
+ end
82
84
  # Return a vector usign the standarized values for data
83
85
  # with sd with denominator n-1
84
86
 
85
87
  def vector_standarized(use_population=false)
86
88
  raise "Should be a scale" unless @type==:scale
87
- mean=@delegate.mean
88
- sd=use_population ? @delegate.sdp : @delegate.sds
89
+ m=mean
90
+ sd=use_population ? sdp : sds
89
91
  @data_with_nils.collect{|x|
90
92
  if !x.nil?
91
- (x.to_f - mean).quo(sd)
93
+ (x.to_f - m).quo(sd)
92
94
  else
93
95
  nil
94
96
  end
95
97
  }.to_vector(:scale)
96
98
  end
99
+
97
100
  alias_method :standarized, :vector_standarized
101
+
98
102
  def box_cox_transformation(lambda)
99
103
  raise "Should be a scale" unless @type==:scale
100
104
  @data_with_nils.collect{|x|
@@ -116,6 +120,7 @@ class Vector < DelegateClass(Array)
116
120
  raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
117
121
  @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
118
122
  end
123
+
119
124
  def _dump(i)
120
125
  Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
121
126
  end
@@ -155,24 +160,31 @@ class Vector < DelegateClass(Array)
155
160
  @valid_data.clear
156
161
  @missing_data.clear
157
162
  @data_with_nils.clear
158
- _set_valid_data
159
- @delegate.set_gsl if(@type==:scale)
163
+ @gsl=nil
164
+ set_valid_data_intern
165
+ set_scale_data if(@type==:scale)
160
166
  end
161
- def _set_valid_data
162
- if Statsample::OPTIMIZED
163
- Statsample::_set_valid_data(self)
164
- else
167
+
168
+ if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
169
+ def set_valid_data_intern
170
+ Statsample::STATSAMPLE__.set_valid_data_intern(self)
171
+ end
172
+ else
173
+ def set_valid_data_intern
174
+ _set_valid_data_intern
175
+ end
176
+ end
177
+ def _set_valid_data_intern
165
178
  @data.each do |n|
166
- if is_valid? n
179
+ if is_valid? n
167
180
  @valid_data.push(n)
168
181
  @data_with_nils.push(n)
169
- else
182
+ else
170
183
  @data_with_nils.push(nil)
171
184
  @missing_data.push(n)
172
- end
173
- end
174
- @has_missing_data=@missing_data.size>0
185
+ end
175
186
  end
187
+ @has_missing_data=@missing_data.size>0
176
188
  end
177
189
  # Retrieves true if data has one o more missing values
178
190
  def has_missing_data?
@@ -212,29 +224,13 @@ class Vector < DelegateClass(Array)
212
224
  end
213
225
  # Set level of measurement.
214
226
  def type=(t)
215
- case t
216
- when :nominal
217
- @delegate=Nominal.new(@valid_data)
218
- when :ordinal
219
- @delegate=Ordinal.new(@valid_data)
220
- when :scale
221
- @delegate=Scale.new(@valid_data)
222
- else
223
- raise "Type doesn't exists"
224
- end
225
- __setobj__(@delegate)
226
- @type=t
227
+ @type=t
228
+ set_scale_data if(t==:scale)
227
229
  end
228
230
  def n; @data.size ; end
229
231
  def to_a
230
232
  @data.dup
231
- end
232
- # Redundant, but necessary
233
- # Spreadsheet creates Array#sum, so calling sum
234
- # doesn't call the delegates method
235
- def sum
236
- @delegate.sum
237
- end
233
+ end
238
234
  alias_method :to_ary, :to_a
239
235
  # Vector sum.
240
236
  # - If v is a scalar, add this value to all elements
@@ -357,7 +353,13 @@ class Vector < DelegateClass(Array)
357
353
  # In all the trails, every item have the same probability
358
354
  # of been selected
359
355
  def sample_with_replacement(sample=1)
360
- Vector.new(@delegate.sample_with_replacement(sample) ,@type)
356
+ if(@type!=:scale)
357
+ vds=@valid_data.size
358
+ (0...sample).collect{ @valid_data[rand(vds)] }
359
+ else
360
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
361
+ r.sample(@gsl, sample).to_a
362
+ end
361
363
  end
362
364
  # Returns an random sample of size n, without replacement,
363
365
  # only with valid data.
@@ -366,9 +368,20 @@ class Vector < DelegateClass(Array)
366
368
  # A sample of the same size of the vector is the vector itself
367
369
 
368
370
  def sample_without_replacement(sample=1)
369
- Vector.new(@delegate.sample_without_replacement(sample),@type)
371
+ if(@type!=:scale)
372
+ raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
373
+ out=[]
374
+ size=@valid_data.size
375
+ while out.size<sample
376
+ value=rand(size)
377
+ out.push(value) if !out.include?value
378
+ end
379
+ out.collect{|i|@data[i]}
380
+ else
381
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
382
+ r.choose(@gsl, sample).to_a
383
+ end
370
384
  end
371
-
372
385
  def count(x=false)
373
386
  if block_given?
374
387
  r=@data.inject(0) {|s, i|
@@ -401,41 +414,37 @@ class Vector < DelegateClass(Array)
401
414
  true
402
415
  end
403
416
  end
404
- def summary(out="")
405
- @delegate.summary(@labels,out)
406
- end
407
417
  def to_s
408
418
  sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
409
419
  end
410
420
  def inspect
411
421
  self.to_s
412
422
  end
413
-
414
- end
415
-
416
-
417
-
418
- class Nominal
419
- def initialize(data)
420
- @data=data
421
- # @factors=data.uniq
422
- end
423
- def delegate_data
424
- @data
425
- end
426
- # Return an array of the different values of the data
427
423
  def factors
428
- @data.uniq.sort
429
- end
430
- # Returns a hash with the distribution of frecuencies of
431
- # the sample
432
- def frequencies_slow
433
- @data.inject(Hash.new) {|a,x|
434
- a[x]||=0
435
- a[x]=a[x]+1
436
- a
437
- }
424
+ if @type==:scale
425
+ @scale_data.uniq.sort
426
+ else
427
+ @valid_data.uniq.sort
428
+ end
429
+ end
430
+ if Statsample::STATSAMPLE__.respond_to?(:frequencies)
431
+ # Returns a hash with the distribution of frecuencies of
432
+ # the sample
433
+ def frequencies
434
+ Statsample::STATSAMPLE__.frequencies(@valid_data)
435
+ end
436
+ else
437
+ def frequencies
438
+ _frequencies
439
+ end
438
440
  end
441
+ def _frequencies
442
+ @valid_data.inject(Hash.new) {|a,x|
443
+ a[x]||=0
444
+ a[x]=a[x]+1
445
+ a
446
+ }
447
+ end
439
448
  # Plot frequencies on a chart, using gnuplot
440
449
  def plot_frequencies
441
450
  require 'gnuplot'
@@ -469,21 +478,21 @@ class Vector < DelegateClass(Array)
469
478
  end
470
479
  # The numbers of item with valid data
471
480
  def n_valid
472
- @data.size
481
+ @valid_data.size
473
482
  end
474
483
  # Returns a hash with the distribution of proportions of
475
484
  # the sample
476
485
  def proportions
477
486
  frequencies.inject({}){|a,v|
478
- a[v[0]] = v[1].quo(@data.size)
487
+ a[v[0]] = v[1].quo(n_valid)
479
488
  a
480
489
  }
481
490
  end
482
491
  # Proportion of a given value.
483
492
  def proportion(v=1)
484
- frequencies[v].quo(@data.size)
493
+ frequencies[v].quo(@valid_data.size)
485
494
  end
486
- def summary(labels,out="")
495
+ def summary(out="")
487
496
  out << sprintf("n valid:%d\n",n_valid)
488
497
  out << sprintf("factors:%s\n",factors.join(","))
489
498
  out << "mode:"+mode.to_s+"\n"
@@ -492,47 +501,32 @@ class Vector < DelegateClass(Array)
492
501
  key=labels.has_key?(k) ? labels[k]:k
493
502
  out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
494
503
  }
504
+ if(@type==:ordinal)
505
+ out << "median:"+median.to_s+"\n"
506
+ end
507
+ if(@type==:scale)
508
+ out << "mean:"+mean.to_s+"\n"
509
+ out << "sd:"+sd.to_s+"\n"
510
+
511
+ end
495
512
  out
496
513
  end
497
514
 
498
- # Returns an random sample of size n, with replacement,
499
- # only with valid data.
500
- #
501
- # In all the trails, every item have the same probability
502
- # of been selected
503
- def sample_with_replacement(sample)
504
- (0...sample).collect{ @data[rand(@data.size)] }
505
- end
506
- # Returns an random sample of size n, without replacement,
507
- # only with valid data.
508
- #
509
- # Every element could only be selected once
510
- # A sample of the same size of the vector is the vector itself
511
-
512
- def sample_without_replacement(sample)
513
- raise ArgumentError, "Sample size couldn't be greater than n" if sample>@data.size
514
- out=[]
515
- size=@data.size
516
- while out.size<sample
517
- value=rand(size)
518
- out.push(value) if !out.include?value
519
- end
520
- out.collect{|i|@data[i]}
521
- end
515
+
522
516
 
523
517
 
524
518
  # Variance of p, according to poblation size
525
519
  def variance_proportion(n_poblation, v=1)
526
- Statsample::proportion_variance_sample(self.proportion(v), @data.size, n_poblation)
520
+ Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
527
521
  end
528
522
  def variance_total(n_poblation, v=1)
529
- Statsample::total_variance_sample(self.proportion(v), @data.size, n_poblation)
523
+ Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
530
524
  end
531
525
  def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
532
- Statsample::proportion_confidence_interval_t(proportion(v), @data.size, n_poblation, margin)
526
+ Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
533
527
  end
534
528
  def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
535
- Statsample::proportion_confidence_interval_z(proportion(v), @data.size, n_poblation, margin)
529
+ Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
536
530
  end
537
531
  self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
538
532
  met_or=met.gsub("_slow","")
@@ -540,12 +534,11 @@ class Vector < DelegateClass(Array)
540
534
  alias_method met_or, met
541
535
  end
542
536
  }
543
- end
544
-
545
- class Ordinal <Nominal
537
+ # Ordinal Methods
546
538
  # Return the value of the percentil q
547
539
  def percentil(q)
548
- sorted=@data.sort
540
+ check_type :ordinal
541
+ sorted=@valid_data.sort
549
542
  v= (n_valid * q).quo(100)
550
543
  if(v.to_i!=v)
551
544
  sorted[v.to_i]
@@ -555,6 +548,7 @@ class Vector < DelegateClass(Array)
555
548
  end
556
549
  # Returns a ranked vector
557
550
  def ranked(type=:ordinal)
551
+ check_type :ordinal
558
552
  i=0
559
553
  r=frequencies.sort.inject({}){|a,v|
560
554
  a[v[0]]=(i+1 + i+v[1]).quo(2)
@@ -567,100 +561,88 @@ class Vector < DelegateClass(Array)
567
561
  end
568
562
  # Return the median (percentil 50)
569
563
  def median
564
+ check_type :ordinal
565
+ if HAS_GSL and @type==:scale
566
+ GSL::Stats::median_from_sorted_data(@gsl)
567
+ else
570
568
  percentil(50)
571
- end
572
- if HAS_GSL
573
- %w{median}.each{|m|
574
- m_nuevo=(m+"_slow").intern
575
- alias_method m_nuevo, m.intern
576
- }
577
-
578
- #def percentil(p)
579
- # v=GSL::Vector.alloc(@data.sort)
580
- # v.stats_quantile_from_sorted_data(p)
581
- #end
582
- def median # :nodoc:
583
- GSL::Stats::median_from_sorted_data(GSL::Vector.alloc(@data.sort))
584
569
  end
585
570
  end
586
571
  # Minimun value
587
- def min; @data.min;end
588
- # Maximum value
589
- def max; @data.max; end
590
-
591
-
592
- def summary(labels,out="")
593
- out << sprintf("n valid:%d\n",n_valid)
594
- out << "median:"+median.to_s+"\n"
595
- out << "percentil 25:"+percentil(25).to_s+"\n"
596
- out << "percentil 75:"+percentil(75).to_s+"\n"
597
- out
598
- end
599
- end
600
- class Scale <Ordinal
601
- attr_reader :gsl
602
- def initialize(data)
603
- # puts "Inicializando Scale..."
604
- super(data)
605
-
606
- set_gsl
572
+ def min;
573
+ check_type :ordinal
574
+ @valid_data.min;
607
575
  end
608
-
609
- def _dump(i)
610
- Marshal.dump(@data)
611
- end
612
- def _load(data)
613
- @data=Marshal.restore(data)
614
- set_gsl
576
+ # Maximum value
577
+ def max;
578
+ check_type :ordinal
579
+ @valid_data.max;
615
580
  end
616
- def set_gsl # :nodoc
617
- data = @data.collect!{|x|
618
- if x.is_a? Numeric
581
+
582
+ def set_scale_data # :nodoc
583
+ @scale_data=@valid_data.collect{|x|
584
+ if x.is_a? Numeric
619
585
  x
620
586
  elsif x.is_a? String and x.to_i==x.to_f
621
587
  x.to_i
622
588
  else
623
589
  x.to_f
624
590
  end
625
- }
626
- if HAS_GSL
627
- @gsl=GSL::Vector.alloc(@data) if @data.size>0
628
- end
591
+ }
592
+ if HAS_GSL
593
+ @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
629
594
  end
595
+ end
630
596
  # The range of the data (max - min)
631
- def range; @data.max - @data.min; end
597
+ def range;
598
+ check_type :scale
599
+ @scale_data.max - @scale_data.min
600
+ end
632
601
  # The sum of values for the data
633
602
  def sum
634
- @data.inject(0){|a,x|x+a} ; end
603
+ check_type :scale
604
+ @scale_data.inject(0){|a,x|x+a} ; end
635
605
  # The arithmetical mean of data
636
606
  def mean
607
+ check_type :scale
608
+
637
609
  sum.to_f.quo(n_valid)
638
610
  end
639
611
  def sum_of_squares(m=nil)
612
+ check_type :scale
613
+
640
614
  m||=mean
641
- @data.inject(0){|a,x| a+(x-m).square}
615
+ @scale_data.inject(0){|a,x| a+(x-m).square}
642
616
  end
643
617
 
644
618
  # Sum of squared deviation
645
619
  def sum_of_squared_deviation
646
- @data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
620
+ check_type :scale
621
+
622
+ @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
647
623
  end
648
624
 
649
625
  # Population variance (divided by n)
650
626
  def variance_population(m=nil)
627
+ check_type :scale
628
+
651
629
  m||=mean
652
- squares=@data.inject(0){|a,x| x.square+a}
630
+ squares=@scale_data.inject(0){|a,x| x.square+a}
653
631
  squares.quo(n_valid) - m.square
654
632
  end
655
633
 
656
634
 
657
635
  # Population Standard deviation (divided by n)
658
636
  def standard_deviation_population(m=nil)
637
+ check_type :scale
638
+
659
639
  Math::sqrt( variance_population(m) )
660
640
  end
661
641
  # Sample Variance (divided by n-1)
662
642
 
663
643
  def variance_sample(m=nil)
644
+ check_type :scale
645
+
664
646
  m||=mean
665
647
  sum_of_squares(m).quo(n_valid - 1)
666
648
  end
@@ -668,22 +650,30 @@ class Vector < DelegateClass(Array)
668
650
  # Sample Standard deviation (divided by n-1)
669
651
 
670
652
  def standard_deviation_sample(m=nil)
653
+ check_type :scale
654
+
671
655
  m||=m
672
656
  Math::sqrt(variance_sample(m))
673
657
  end
674
658
  def skew
659
+ check_type :scale
660
+
675
661
  m=mean
676
- thirds=@data.inject(0){|a,x| a+((x-mean)**3)}
677
- thirds.quo((@data.size-1)*sd**3)
662
+ thirds=@scale_data.inject(0){|a,x| a+((x-mean)**3)}
663
+ thirds.quo((@scale_data.size-1)*sd**3)
678
664
  end
679
665
  def kurtosis
666
+ check_type :scale
667
+
680
668
  m=mean
681
- thirds=@data.inject(0){|a,x| a+((x-mean)**4)}
682
- thirds.quo((@data.size-1)*sd**4)
669
+ thirds=@scale_data.inject(0){|a,x| a+((x-mean)**4)}
670
+ thirds.quo((@scale_data.size-1)*sd**4)
683
671
 
684
672
  end
685
673
  def product
686
- @data.inject(1){|a,x| a*x }
674
+ check_type :scale
675
+
676
+ @scale_data.inject(1){|a,x| a*x }
687
677
  end
688
678
  if HAS_GSL
689
679
  %w{skew kurtosis variance_sample standard_deviation_sample variance_population standard_deviation_population mean sum}.each{|m|
@@ -691,38 +681,50 @@ class Vector < DelegateClass(Array)
691
681
  alias_method m_nuevo, m.intern
692
682
  }
693
683
  def sum # :nodoc:
684
+ check_type :scale
685
+
694
686
  @gsl.sum
695
687
  end
696
688
  def mean # :nodoc:
689
+ check_type :scale
690
+
697
691
  @gsl.mean
698
692
  end
699
693
  def variance_sample(m=nil) # :nodoc:
694
+ check_type :scale
695
+
700
696
  m||=mean
701
697
  @gsl.variance_m
702
698
  end
703
699
  def standard_deviation_sample(m=nil) # :nodoc:
700
+ check_type :scale
704
701
  m||=mean
705
702
  @gsl.sd(m)
706
703
  end
707
704
 
708
705
  def variance_population(m=nil) # :nodoc:
706
+ check_type :scale
709
707
  m||=mean
710
708
  @gsl.variance_with_fixed_mean(m)
711
709
  end
712
710
  def standard_deviation_population(m=nil) # :nodoc:
711
+ check_type :scale
713
712
  m||=mean
714
713
  @gsl.sd_with_fixed_mean(m)
715
714
  end
716
715
  def skew
716
+ check_type :scale
717
717
  @gsl.skew
718
718
  end
719
719
  def kurtosis
720
+ check_type :scale
720
721
  @gsl.kurtosis
721
722
  end
722
723
  # Create a GSL::Histogram
723
724
  # With a fixnum, creates X bins within the range of data
724
725
  # With an Array, each value will be a cut point
725
726
  def histogram(bins=10)
727
+ check_type :scale
726
728
  if bins.is_a? Array
727
729
  h=GSL::Histogram.alloc(bins)
728
730
  else
@@ -734,35 +736,18 @@ class Vector < DelegateClass(Array)
734
736
  h
735
737
  end
736
738
  def plot_histogram(bins=10,options="")
739
+ check_type :scale
737
740
  self.histogram(bins).graph(options)
738
741
  end
739
- def sample_with_replacement(k)
740
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
741
- r.sample(@gsl, k).to_a
742
- end
743
- def sample_without_replacement(k)
744
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
745
- r.choose(@gsl, k).to_a
746
- end
742
+
747
743
  end
748
744
 
749
745
  # Coefficient of variation
750
746
  # Calculed with the sample standard deviation
751
747
  def coefficient_of_variation
748
+ check_type :scale
752
749
  standard_deviation_sample.quo(mean)
753
750
  end
754
- def summary(labels,out="")
755
- out << sprintf("n valid:%d\n",n_valid)
756
- out << "mean:"+mean.to_s+"\n"
757
- out << "sum:"+sum.to_s+"\n"
758
- out << "range:"+range.to_s+"\n"
759
- out << "variance (pop):"+variance_population.to_s+"\n"
760
- out << "sd (pop):"+sdp.to_s+"\n"
761
- out << "variance (sample):"+variance_sample.to_s+"\n"
762
- out << "sd (sample):"+sds.to_s+"\n"
763
-
764
- out
765
- end
766
751
 
767
752
  alias_method :sdp, :standard_deviation_population
768
753
  alias_method :sds, :standard_deviation_sample