statsample 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -150,17 +150,16 @@ module Statsample
150
150
  @models=[]
151
151
  @models_data={}
152
152
  for i in 1..@fields.size
153
- c = GSL::Combination.calloc(@fields.size, i);
154
- begin
155
- convert=c.data.to_a.collect {|i|
156
- @fields[i]
153
+ c=Statsample::Combination.new(i,@fields.size)
154
+ c.each{|data|
155
+ convert=data.collect {|i|
156
+ @fields[i]
157
+ }
158
+ @models.push(convert)
159
+ ds_prev=@ds.dup(convert+[@y_var])
160
+ modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
161
+ @models_data[convert.sort]=modeldata
157
162
  }
158
- @models.push(convert)
159
- ds_prev=@ds.dup(convert+[@y_var])
160
- modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
161
-
162
- @models_data[convert.sort]=modeldata
163
- end while c.next == GSL::SUCCESS
164
163
  end
165
164
  end
166
165
  def summary(report_type=ConsoleSummary)
@@ -232,7 +231,12 @@ module Statsample
232
231
  @lr.r2
233
232
  end
234
233
  def add_table_row
235
- [@name.join("*"), sprintf("%0.3f",r2), sprintf("%0.3f",@lr.significance)]+@fields.collect{|k|
234
+ begin
235
+ sign=sprintf("%0.3f", @lr.significance)
236
+ rescue RuntimeError
237
+ sign="???"
238
+ end
239
+ [@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
236
240
  v=@contributions[k]
237
241
  if v.nil?
238
242
  "--"
@@ -51,26 +51,28 @@ class DominanceAnalysis
51
51
  @samples_cd={}
52
52
  @samples_gd={}
53
53
  @pairs=[]
54
- c = GSL::Combination.calloc(@fields.size, 2);
55
- begin
56
- convert=c.data.to_a.collect {|i|
54
+ c=Statsample::Combination.new(2,@fields.size)
55
+ c.each{|data|
56
+ convert=data.collect {|i|
57
57
  @fields[i]
58
58
  }
59
59
  @pairs.push(convert)
60
60
  [@samples_td,@samples_cd,@samples_gd].each{|s|
61
61
  s[convert]=[]
62
62
  }
63
- end while c.next == GSL::SUCCESS
64
- end
63
+ }
64
+ end
65
65
  def summary(report_type=ConsoleSummary)
66
66
  out =""
67
67
  raise "You should bootstrap first" if @n_samples==0
68
68
  alfa=0.95
69
- t=GSL::Cdf.tdist_Pinv(1-((1-alfa) / 2),@n_samples - 1)
70
69
  out.extend report_type
71
70
  out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
72
71
  out.add _("Sample size: %d\n") % @n_samples
73
- out.add "t:#{t}\n"
72
+ if HAS_GSL
73
+ t=GSL::Cdf.tdist_Pinv(1-((1-alfa) / 2),@n_samples - 1)
74
+ out.add "t:#{t}\n"
75
+ end
74
76
  out.add "Linear Regression Engine: #{@lr_class.name}"
75
77
  out.nl
76
78
  table=ReportTable.new
@@ -6,7 +6,7 @@ require 'SVG/Graph/Plot'
6
6
  require 'statsample/graph/svghistogram'
7
7
 
8
8
  module Statsample
9
- class Nominal
9
+ class Vector
10
10
  # Creates a barchart using ruby-gdchart
11
11
  def svggraph_frequencies(file, width=600, height=300, chart_type=SVG::Graph::BarNoOp, options={})
12
12
  labels,data=[],[]
@@ -26,8 +26,6 @@ module Statsample
26
26
  f.puts(graph.burn)
27
27
  }
28
28
  end
29
- end
30
- class Scale < Ordinal
31
29
  def svggraph_histogram(bins, options={})
32
30
  options={:graph_title=>"Histogram", :show_graph_title=>true,:show_normal=>true, :mean=>self.mean, :sigma=>sdp }.merge! options
33
31
  graph = Statsample::Graph::SvgHistogram.new(options)
@@ -1,4 +1,5 @@
1
1
  require 'statsample/regression/simple'
2
+ require 'statsample/regression/logit'
2
3
  require 'statsample/regression/multiple'
3
4
  require 'statsample/regression/multiple/alglibengine'
4
5
  require 'statsample/regression/multiple/rubyengine'
@@ -0,0 +1,35 @@
1
+ module Statsample
2
+ module Regression
3
+ class Logit
4
+ def initialize(ds,y_var)
5
+ @ds=ds
6
+ @y_var=y_var
7
+ end
8
+ def vp(x1,x2)
9
+ sum=0
10
+ x1.each_index{|i|
11
+ sum+=x1[i]*x2[i]
12
+ }
13
+ sum
14
+ end
15
+ # F(B'Xi)
16
+ def f(b,x)
17
+ Math::exp(vp(b,x)) / (1+Math::exp(vp(b,x)))
18
+ end
19
+ # f(B'Xi)
20
+ def fa(b,x)
21
+ f(b,x)*(1-f(b,x))
22
+ end
23
+ def l(b)
24
+ prod=1
25
+ y=@ds[@y_var]
26
+ @ds.each_array{|x|
27
+ x.unshift(1) # add constant
28
+ l=(f(b,x)**y[@ds.i])*((1.0-f(b,x))**(1.0-y[@ds.i]))
29
+ prod=prod*l
30
+ }
31
+ prod
32
+ end
33
+ end
34
+ end
35
+ end
@@ -194,13 +194,32 @@ out.add_line
194
194
  out.add "ANOVA TABLE"
195
195
 
196
196
  t=Statsample::ReportTable.new(%w{source ss df ms f s})
197
- t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f",significance)])
198
-
197
+ begin
198
+ t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
199
+ rescue RuntimeError
200
+ t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), "???", "???"])
201
+ end
199
202
  t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
200
203
 
201
204
  t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
202
205
 
203
206
  out.parse_table(t)
207
+
208
+ begin
209
+ out.add "Beta coefficientes"
210
+ sc=standarized_coeffs
211
+ cse=coeffs_se
212
+ t=Statsample::ReportTable.new(%w{coeff beta se t})
213
+ t.add_row(["Constant", "-",constant_se, constant_t])
214
+ @fields.each{|f|
215
+ t.add_row([f, sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
216
+ }
217
+ out.parse_table(t)
218
+
219
+ rescue
220
+
221
+ end
222
+
204
223
  out
205
224
  end
206
225
  def assign_names(c)
@@ -37,6 +37,8 @@ class AlglibEngine < BaseEngine
37
37
  matrix=Matrix.columns(columns)
38
38
  @lr_s=nil
39
39
  @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
40
+ @coeffs=assign_names(@lr.coeffs)
41
+
40
42
  end
41
43
 
42
44
  def _dump(i)
@@ -48,7 +50,7 @@ class AlglibEngine < BaseEngine
48
50
  end
49
51
 
50
52
  def coeffs
51
- assign_names(@lr.coeffs)
53
+ @coeffs
52
54
  end
53
55
  # Coefficients using a constant
54
56
  # Based on http://www.xycoon.com/ols1.htm
@@ -39,11 +39,11 @@ module Statsample
39
39
  ds=Statsample::Dataset.new(h).dup_only_valid
40
40
  ds.vectors.values
41
41
  end
42
- class Vector < DelegateClass(Array)
43
-
42
+
43
+ class Vector
44
44
  include Enumerable
45
- attr_reader :type, :data, :valid_data, :missing_values, :missing_data, :data_with_nils
46
- attr_accessor :labels
45
+ attr_reader :type, :data, :valid_data, :missing_values, :missing_data, :data_with_nils, :gsl
46
+ attr_accessor :labels
47
47
  # Creates a new
48
48
  # data = Array of data
49
49
  # t = level of meausurement. Could be:
@@ -61,9 +61,9 @@ class Vector < DelegateClass(Array)
61
61
  @data_with_nils=[]
62
62
  @missing_data=[]
63
63
  @has_missing_data=nil
64
- _set_valid_data
64
+ @scale_data=nil
65
+ set_valid_data_intern
65
66
  self.type=t
66
- super(@delegate)
67
67
  end
68
68
  def dup
69
69
  Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
@@ -78,23 +78,27 @@ class Vector < DelegateClass(Array)
78
78
  def vector_standarized_pop
79
79
  vector_standarized(true)
80
80
  end
81
-
81
+ def check_type(t)
82
+ raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal)
83
+ end
82
84
  # Return a vector usign the standarized values for data
83
85
  # with sd with denominator n-1
84
86
 
85
87
  def vector_standarized(use_population=false)
86
88
  raise "Should be a scale" unless @type==:scale
87
- mean=@delegate.mean
88
- sd=use_population ? @delegate.sdp : @delegate.sds
89
+ m=mean
90
+ sd=use_population ? sdp : sds
89
91
  @data_with_nils.collect{|x|
90
92
  if !x.nil?
91
- (x.to_f - mean).quo(sd)
93
+ (x.to_f - m).quo(sd)
92
94
  else
93
95
  nil
94
96
  end
95
97
  }.to_vector(:scale)
96
98
  end
99
+
97
100
  alias_method :standarized, :vector_standarized
101
+
98
102
  def box_cox_transformation(lambda)
99
103
  raise "Should be a scale" unless @type==:scale
100
104
  @data_with_nils.collect{|x|
@@ -116,6 +120,7 @@ class Vector < DelegateClass(Array)
116
120
  raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
117
121
  @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
118
122
  end
123
+
119
124
  def _dump(i)
120
125
  Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
121
126
  end
@@ -155,24 +160,31 @@ class Vector < DelegateClass(Array)
155
160
  @valid_data.clear
156
161
  @missing_data.clear
157
162
  @data_with_nils.clear
158
- _set_valid_data
159
- @delegate.set_gsl if(@type==:scale)
163
+ @gsl=nil
164
+ set_valid_data_intern
165
+ set_scale_data if(@type==:scale)
160
166
  end
161
- def _set_valid_data
162
- if Statsample::OPTIMIZED
163
- Statsample::_set_valid_data(self)
164
- else
167
+
168
+ if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
169
+ def set_valid_data_intern
170
+ Statsample::STATSAMPLE__.set_valid_data_intern(self)
171
+ end
172
+ else
173
+ def set_valid_data_intern
174
+ _set_valid_data_intern
175
+ end
176
+ end
177
+ def _set_valid_data_intern
165
178
  @data.each do |n|
166
- if is_valid? n
179
+ if is_valid? n
167
180
  @valid_data.push(n)
168
181
  @data_with_nils.push(n)
169
- else
182
+ else
170
183
  @data_with_nils.push(nil)
171
184
  @missing_data.push(n)
172
- end
173
- end
174
- @has_missing_data=@missing_data.size>0
185
+ end
175
186
  end
187
+ @has_missing_data=@missing_data.size>0
176
188
  end
177
189
  # Retrieves true if data has one o more missing values
178
190
  def has_missing_data?
@@ -212,29 +224,13 @@ class Vector < DelegateClass(Array)
212
224
  end
213
225
  # Set level of measurement.
214
226
  def type=(t)
215
- case t
216
- when :nominal
217
- @delegate=Nominal.new(@valid_data)
218
- when :ordinal
219
- @delegate=Ordinal.new(@valid_data)
220
- when :scale
221
- @delegate=Scale.new(@valid_data)
222
- else
223
- raise "Type doesn't exists"
224
- end
225
- __setobj__(@delegate)
226
- @type=t
227
+ @type=t
228
+ set_scale_data if(t==:scale)
227
229
  end
228
230
  def n; @data.size ; end
229
231
  def to_a
230
232
  @data.dup
231
- end
232
- # Redundant, but necessary
233
- # Spreadsheet creates Array#sum, so calling sum
234
- # doesn't call the delegates method
235
- def sum
236
- @delegate.sum
237
- end
233
+ end
238
234
  alias_method :to_ary, :to_a
239
235
  # Vector sum.
240
236
  # - If v is a scalar, add this value to all elements
@@ -357,7 +353,13 @@ class Vector < DelegateClass(Array)
357
353
  # In all the trails, every item have the same probability
358
354
  # of been selected
359
355
  def sample_with_replacement(sample=1)
360
- Vector.new(@delegate.sample_with_replacement(sample) ,@type)
356
+ if(@type!=:scale)
357
+ vds=@valid_data.size
358
+ (0...sample).collect{ @valid_data[rand(vds)] }
359
+ else
360
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
361
+ r.sample(@gsl, sample).to_a
362
+ end
361
363
  end
362
364
  # Returns an random sample of size n, without replacement,
363
365
  # only with valid data.
@@ -366,9 +368,20 @@ class Vector < DelegateClass(Array)
366
368
  # A sample of the same size of the vector is the vector itself
367
369
 
368
370
  def sample_without_replacement(sample=1)
369
- Vector.new(@delegate.sample_without_replacement(sample),@type)
371
+ if(@type!=:scale)
372
+ raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
373
+ out=[]
374
+ size=@valid_data.size
375
+ while out.size<sample
376
+ value=rand(size)
377
+ out.push(value) if !out.include?value
378
+ end
379
+ out.collect{|i|@data[i]}
380
+ else
381
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
382
+ r.choose(@gsl, sample).to_a
383
+ end
370
384
  end
371
-
372
385
  def count(x=false)
373
386
  if block_given?
374
387
  r=@data.inject(0) {|s, i|
@@ -401,41 +414,37 @@ class Vector < DelegateClass(Array)
401
414
  true
402
415
  end
403
416
  end
404
- def summary(out="")
405
- @delegate.summary(@labels,out)
406
- end
407
417
  def to_s
408
418
  sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
409
419
  end
410
420
  def inspect
411
421
  self.to_s
412
422
  end
413
-
414
- end
415
-
416
-
417
-
418
- class Nominal
419
- def initialize(data)
420
- @data=data
421
- # @factors=data.uniq
422
- end
423
- def delegate_data
424
- @data
425
- end
426
- # Return an array of the different values of the data
427
423
  def factors
428
- @data.uniq.sort
429
- end
430
- # Returns a hash with the distribution of frecuencies of
431
- # the sample
432
- def frequencies_slow
433
- @data.inject(Hash.new) {|a,x|
434
- a[x]||=0
435
- a[x]=a[x]+1
436
- a
437
- }
424
+ if @type==:scale
425
+ @scale_data.uniq.sort
426
+ else
427
+ @valid_data.uniq.sort
428
+ end
429
+ end
430
+ if Statsample::STATSAMPLE__.respond_to?(:frequencies)
431
+ # Returns a hash with the distribution of frecuencies of
432
+ # the sample
433
+ def frequencies
434
+ Statsample::STATSAMPLE__.frequencies(@valid_data)
435
+ end
436
+ else
437
+ def frequencies
438
+ _frequencies
439
+ end
438
440
  end
441
+ def _frequencies
442
+ @valid_data.inject(Hash.new) {|a,x|
443
+ a[x]||=0
444
+ a[x]=a[x]+1
445
+ a
446
+ }
447
+ end
439
448
  # Plot frequencies on a chart, using gnuplot
440
449
  def plot_frequencies
441
450
  require 'gnuplot'
@@ -469,21 +478,21 @@ class Vector < DelegateClass(Array)
469
478
  end
470
479
  # The numbers of item with valid data
471
480
  def n_valid
472
- @data.size
481
+ @valid_data.size
473
482
  end
474
483
  # Returns a hash with the distribution of proportions of
475
484
  # the sample
476
485
  def proportions
477
486
  frequencies.inject({}){|a,v|
478
- a[v[0]] = v[1].quo(@data.size)
487
+ a[v[0]] = v[1].quo(n_valid)
479
488
  a
480
489
  }
481
490
  end
482
491
  # Proportion of a given value.
483
492
  def proportion(v=1)
484
- frequencies[v].quo(@data.size)
493
+ frequencies[v].quo(@valid_data.size)
485
494
  end
486
- def summary(labels,out="")
495
+ def summary(out="")
487
496
  out << sprintf("n valid:%d\n",n_valid)
488
497
  out << sprintf("factors:%s\n",factors.join(","))
489
498
  out << "mode:"+mode.to_s+"\n"
@@ -492,47 +501,32 @@ class Vector < DelegateClass(Array)
492
501
  key=labels.has_key?(k) ? labels[k]:k
493
502
  out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
494
503
  }
504
+ if(@type==:ordinal)
505
+ out << "median:"+median.to_s+"\n"
506
+ end
507
+ if(@type==:scale)
508
+ out << "mean:"+mean.to_s+"\n"
509
+ out << "sd:"+sd.to_s+"\n"
510
+
511
+ end
495
512
  out
496
513
  end
497
514
 
498
- # Returns an random sample of size n, with replacement,
499
- # only with valid data.
500
- #
501
- # In all the trails, every item have the same probability
502
- # of been selected
503
- def sample_with_replacement(sample)
504
- (0...sample).collect{ @data[rand(@data.size)] }
505
- end
506
- # Returns an random sample of size n, without replacement,
507
- # only with valid data.
508
- #
509
- # Every element could only be selected once
510
- # A sample of the same size of the vector is the vector itself
511
-
512
- def sample_without_replacement(sample)
513
- raise ArgumentError, "Sample size couldn't be greater than n" if sample>@data.size
514
- out=[]
515
- size=@data.size
516
- while out.size<sample
517
- value=rand(size)
518
- out.push(value) if !out.include?value
519
- end
520
- out.collect{|i|@data[i]}
521
- end
515
+
522
516
 
523
517
 
524
518
  # Variance of p, according to poblation size
525
519
  def variance_proportion(n_poblation, v=1)
526
- Statsample::proportion_variance_sample(self.proportion(v), @data.size, n_poblation)
520
+ Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
527
521
  end
528
522
  def variance_total(n_poblation, v=1)
529
- Statsample::total_variance_sample(self.proportion(v), @data.size, n_poblation)
523
+ Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
530
524
  end
531
525
  def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
532
- Statsample::proportion_confidence_interval_t(proportion(v), @data.size, n_poblation, margin)
526
+ Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
533
527
  end
534
528
  def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
535
- Statsample::proportion_confidence_interval_z(proportion(v), @data.size, n_poblation, margin)
529
+ Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
536
530
  end
537
531
  self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
538
532
  met_or=met.gsub("_slow","")
@@ -540,12 +534,11 @@ class Vector < DelegateClass(Array)
540
534
  alias_method met_or, met
541
535
  end
542
536
  }
543
- end
544
-
545
- class Ordinal <Nominal
537
+ # Ordinal Methods
546
538
  # Return the value of the percentil q
547
539
  def percentil(q)
548
- sorted=@data.sort
540
+ check_type :ordinal
541
+ sorted=@valid_data.sort
549
542
  v= (n_valid * q).quo(100)
550
543
  if(v.to_i!=v)
551
544
  sorted[v.to_i]
@@ -555,6 +548,7 @@ class Vector < DelegateClass(Array)
555
548
  end
556
549
  # Returns a ranked vector
557
550
  def ranked(type=:ordinal)
551
+ check_type :ordinal
558
552
  i=0
559
553
  r=frequencies.sort.inject({}){|a,v|
560
554
  a[v[0]]=(i+1 + i+v[1]).quo(2)
@@ -567,100 +561,88 @@ class Vector < DelegateClass(Array)
567
561
  end
568
562
  # Return the median (percentil 50)
569
563
  def median
564
+ check_type :ordinal
565
+ if HAS_GSL and @type==:scale
566
+ GSL::Stats::median_from_sorted_data(@gsl)
567
+ else
570
568
  percentil(50)
571
- end
572
- if HAS_GSL
573
- %w{median}.each{|m|
574
- m_nuevo=(m+"_slow").intern
575
- alias_method m_nuevo, m.intern
576
- }
577
-
578
- #def percentil(p)
579
- # v=GSL::Vector.alloc(@data.sort)
580
- # v.stats_quantile_from_sorted_data(p)
581
- #end
582
- def median # :nodoc:
583
- GSL::Stats::median_from_sorted_data(GSL::Vector.alloc(@data.sort))
584
569
  end
585
570
  end
586
571
  # Minimun value
587
- def min; @data.min;end
588
- # Maximum value
589
- def max; @data.max; end
590
-
591
-
592
- def summary(labels,out="")
593
- out << sprintf("n valid:%d\n",n_valid)
594
- out << "median:"+median.to_s+"\n"
595
- out << "percentil 25:"+percentil(25).to_s+"\n"
596
- out << "percentil 75:"+percentil(75).to_s+"\n"
597
- out
598
- end
599
- end
600
- class Scale <Ordinal
601
- attr_reader :gsl
602
- def initialize(data)
603
- # puts "Inicializando Scale..."
604
- super(data)
605
-
606
- set_gsl
572
+ def min;
573
+ check_type :ordinal
574
+ @valid_data.min;
607
575
  end
608
-
609
- def _dump(i)
610
- Marshal.dump(@data)
611
- end
612
- def _load(data)
613
- @data=Marshal.restore(data)
614
- set_gsl
576
+ # Maximum value
577
+ def max;
578
+ check_type :ordinal
579
+ @valid_data.max;
615
580
  end
616
- def set_gsl # :nodoc
617
- data = @data.collect!{|x|
618
- if x.is_a? Numeric
581
+
582
+ def set_scale_data # :nodoc
583
+ @scale_data=@valid_data.collect{|x|
584
+ if x.is_a? Numeric
619
585
  x
620
586
  elsif x.is_a? String and x.to_i==x.to_f
621
587
  x.to_i
622
588
  else
623
589
  x.to_f
624
590
  end
625
- }
626
- if HAS_GSL
627
- @gsl=GSL::Vector.alloc(@data) if @data.size>0
628
- end
591
+ }
592
+ if HAS_GSL
593
+ @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
629
594
  end
595
+ end
630
596
  # The range of the data (max - min)
631
- def range; @data.max - @data.min; end
597
+ def range;
598
+ check_type :scale
599
+ @scale_data.max - @scale_data.min
600
+ end
632
601
  # The sum of values for the data
633
602
  def sum
634
- @data.inject(0){|a,x|x+a} ; end
603
+ check_type :scale
604
+ @scale_data.inject(0){|a,x|x+a} ; end
635
605
  # The arithmetical mean of data
636
606
  def mean
607
+ check_type :scale
608
+
637
609
  sum.to_f.quo(n_valid)
638
610
  end
639
611
  def sum_of_squares(m=nil)
612
+ check_type :scale
613
+
640
614
  m||=mean
641
- @data.inject(0){|a,x| a+(x-m).square}
615
+ @scale_data.inject(0){|a,x| a+(x-m).square}
642
616
  end
643
617
 
644
618
  # Sum of squared deviation
645
619
  def sum_of_squared_deviation
646
- @data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
620
+ check_type :scale
621
+
622
+ @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
647
623
  end
648
624
 
649
625
  # Population variance (divided by n)
650
626
  def variance_population(m=nil)
627
+ check_type :scale
628
+
651
629
  m||=mean
652
- squares=@data.inject(0){|a,x| x.square+a}
630
+ squares=@scale_data.inject(0){|a,x| x.square+a}
653
631
  squares.quo(n_valid) - m.square
654
632
  end
655
633
 
656
634
 
657
635
  # Population Standard deviation (divided by n)
658
636
  def standard_deviation_population(m=nil)
637
+ check_type :scale
638
+
659
639
  Math::sqrt( variance_population(m) )
660
640
  end
661
641
  # Sample Variance (divided by n-1)
662
642
 
663
643
  def variance_sample(m=nil)
644
+ check_type :scale
645
+
664
646
  m||=mean
665
647
  sum_of_squares(m).quo(n_valid - 1)
666
648
  end
@@ -668,22 +650,30 @@ class Vector < DelegateClass(Array)
668
650
  # Sample Standard deviation (divided by n-1)
669
651
 
670
652
  def standard_deviation_sample(m=nil)
653
+ check_type :scale
654
+
671
655
  m||=m
672
656
  Math::sqrt(variance_sample(m))
673
657
  end
674
658
  def skew
659
+ check_type :scale
660
+
675
661
  m=mean
676
- thirds=@data.inject(0){|a,x| a+((x-mean)**3)}
677
- thirds.quo((@data.size-1)*sd**3)
662
+ thirds=@scale_data.inject(0){|a,x| a+((x-mean)**3)}
663
+ thirds.quo((@scale_data.size-1)*sd**3)
678
664
  end
679
665
  def kurtosis
666
+ check_type :scale
667
+
680
668
  m=mean
681
- thirds=@data.inject(0){|a,x| a+((x-mean)**4)}
682
- thirds.quo((@data.size-1)*sd**4)
669
+ thirds=@scale_data.inject(0){|a,x| a+((x-mean)**4)}
670
+ thirds.quo((@scale_data.size-1)*sd**4)
683
671
 
684
672
  end
685
673
  def product
686
- @data.inject(1){|a,x| a*x }
674
+ check_type :scale
675
+
676
+ @scale_data.inject(1){|a,x| a*x }
687
677
  end
688
678
  if HAS_GSL
689
679
  %w{skew kurtosis variance_sample standard_deviation_sample variance_population standard_deviation_population mean sum}.each{|m|
@@ -691,38 +681,50 @@ class Vector < DelegateClass(Array)
691
681
  alias_method m_nuevo, m.intern
692
682
  }
693
683
  def sum # :nodoc:
684
+ check_type :scale
685
+
694
686
  @gsl.sum
695
687
  end
696
688
  def mean # :nodoc:
689
+ check_type :scale
690
+
697
691
  @gsl.mean
698
692
  end
699
693
  def variance_sample(m=nil) # :nodoc:
694
+ check_type :scale
695
+
700
696
  m||=mean
701
697
  @gsl.variance_m
702
698
  end
703
699
  def standard_deviation_sample(m=nil) # :nodoc:
700
+ check_type :scale
704
701
  m||=mean
705
702
  @gsl.sd(m)
706
703
  end
707
704
 
708
705
  def variance_population(m=nil) # :nodoc:
706
+ check_type :scale
709
707
  m||=mean
710
708
  @gsl.variance_with_fixed_mean(m)
711
709
  end
712
710
  def standard_deviation_population(m=nil) # :nodoc:
711
+ check_type :scale
713
712
  m||=mean
714
713
  @gsl.sd_with_fixed_mean(m)
715
714
  end
716
715
  def skew
716
+ check_type :scale
717
717
  @gsl.skew
718
718
  end
719
719
  def kurtosis
720
+ check_type :scale
720
721
  @gsl.kurtosis
721
722
  end
722
723
  # Create a GSL::Histogram
723
724
  # With a fixnum, creates X bins within the range of data
724
725
  # With an Array, each value will be a cut point
725
726
  def histogram(bins=10)
727
+ check_type :scale
726
728
  if bins.is_a? Array
727
729
  h=GSL::Histogram.alloc(bins)
728
730
  else
@@ -734,35 +736,18 @@ class Vector < DelegateClass(Array)
734
736
  h
735
737
  end
736
738
  def plot_histogram(bins=10,options="")
739
+ check_type :scale
737
740
  self.histogram(bins).graph(options)
738
741
  end
739
- def sample_with_replacement(k)
740
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
741
- r.sample(@gsl, k).to_a
742
- end
743
- def sample_without_replacement(k)
744
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
745
- r.choose(@gsl, k).to_a
746
- end
742
+
747
743
  end
748
744
 
749
745
  # Coefficient of variation
750
746
  # Calculed with the sample standard deviation
751
747
  def coefficient_of_variation
748
+ check_type :scale
752
749
  standard_deviation_sample.quo(mean)
753
750
  end
754
- def summary(labels,out="")
755
- out << sprintf("n valid:%d\n",n_valid)
756
- out << "mean:"+mean.to_s+"\n"
757
- out << "sum:"+sum.to_s+"\n"
758
- out << "range:"+range.to_s+"\n"
759
- out << "variance (pop):"+variance_population.to_s+"\n"
760
- out << "sd (pop):"+sdp.to_s+"\n"
761
- out << "variance (sample):"+variance_sample.to_s+"\n"
762
- out << "sd (sample):"+sds.to_s+"\n"
763
-
764
- out
765
- end
766
751
 
767
752
  alias_method :sdp, :standard_deviation_population
768
753
  alias_method :sds, :standard_deviation_sample