statsample 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,8 +5,9 @@ module Statsample::VectorShorthands
5
5
  # Creates a new Statsample::Vector object
6
6
  # Argument should be equal to Vector.new
7
7
  def to_vector(*args)
8
- Statsample::Vector.new(self,*args)
9
- end
8
+ Statsample::Vector.new(self,*args)
9
+ end
10
+
10
11
  # Creates a new Statsample::Vector object of type :scale
11
12
  def to_scale(*args)
12
13
  Statsample::Vector.new(self, :scale, *args)
@@ -26,27 +27,29 @@ if Statsample.has_gsl?
26
27
  end
27
28
  module Statsample
28
29
 
29
-
30
+
30
31
  # Collection of values on one dimension. Works as a column on a Spreadsheet.
31
- #
32
+ #
32
33
  # == Usage
33
34
  # The fast way to create a vector uses Array.to_vector or Array.to_scale.
34
35
  #
35
36
  # v=[1,2,3,4].to_vector(:scale)
36
37
  # v=[1,2,3,4].to_scale
37
- #
38
+ #
38
39
  class Vector
39
40
  include Enumerable
40
41
  include Writable
41
42
  include Summarizable
43
+ include Statsample::VectorShorthands
44
+
42
45
  # Level of measurement. Could be :nominal, :ordinal or :scale
43
46
  attr_reader :type
44
- # Original data.
47
+ # Original data.
45
48
  attr_reader :data
46
49
  # Valid data. Equal to data, minus values assigned as missing values
47
50
  attr_reader :valid_data
48
51
  # Array of values considered as missing. Nil is a missing value, by default
49
- attr_reader :missing_values
52
+ attr_reader :missing_values
50
53
  # Array of values considered as "Today", with date type. "NOW", "TODAY", :NOW and :TODAY are 'today' values, by default
51
54
  attr_reader :today_values
52
55
  # Missing values array
@@ -59,7 +62,7 @@ module Statsample
59
62
  attr_accessor :labels
60
63
  # Name of vector. Should be used for output by many classes
61
64
  attr_accessor :name
62
-
65
+
63
66
  # Creates a new Vector object.
64
67
  # * <tt>data</tt> Any data which can be converted on Array
65
68
  # * <tt>type</tt> Level of meausurement. See Vector#type
@@ -123,7 +126,7 @@ module Statsample
123
126
  # Parameters
124
127
  # [n] Size
125
128
  # [val] Value of each value
126
- # [&block] If block provided, is used to set the values of vector
129
+ # [&block] If block provided, is used to set the values of vector
127
130
  def self.new_scale(n,val=nil, &block)
128
131
  if block
129
132
  vector=n.times.map {|i| block.call(i)}.to_scale
@@ -144,7 +147,7 @@ module Statsample
144
147
  def dup_empty
145
148
  Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=> @name)
146
149
  end
147
-
150
+
148
151
  if Statsample::STATSAMPLE__.respond_to?(:check_type)
149
152
  # Raises an exception if type of vector is inferior to t type
150
153
  def check_type(t)
@@ -155,8 +158,8 @@ module Statsample
155
158
  _check_type(t)
156
159
  end
157
160
  end
158
-
159
-
161
+
162
+
160
163
  def _check_type(t) #:nodoc:
161
164
  raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal) or (t==:date) or (:date==@type)
162
165
  end
@@ -167,12 +170,12 @@ module Statsample
167
170
  # Return a vector usign the standarized values for data
168
171
  # with sd with denominator n-1. With variance=0 or mean nil,
169
172
  # returns a vector of equal size full of nils
170
- #
173
+ #
171
174
  def vector_standarized(use_population=false)
172
175
  check_type :scale
173
176
  m=mean
174
177
  sd=use_population ? sdp : sds
175
- return ([nil]*size).to_scale if mean.nil? or sd==0.0
178
+ return ([nil]*size).to_scale if mean.nil? or sd==0.0
176
179
  vector=vector_standarized_compute(m,sd)
177
180
  vector.name=_("%s(standarized)") % @name
178
181
  vector
@@ -189,7 +192,7 @@ module Statsample
189
192
  vector.name=_("%s(centered)") % @name
190
193
  vector
191
194
  end
192
-
195
+
193
196
  alias_method :standarized, :vector_standarized
194
197
  alias_method :centered, :vector_centered
195
198
  # Return a vector with values replaced with the percentiles
@@ -215,24 +218,24 @@ module Statsample
215
218
  end
216
219
  }.to_vector(:scale)
217
220
  end
218
-
221
+
219
222
  # Vector equality.
220
223
  # Two vector will be the same if their data, missing values, type, labels are equals
221
224
  def ==(v2)
222
- raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
225
+ return false unless v2.instance_of? Statsample::Vector
223
226
  @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels==v2.labels
224
227
  end
225
-
228
+
226
229
  def _dump(i) # :nodoc:
227
230
  Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type,'name'=>@name})
228
231
  end
229
-
232
+
230
233
  def self._load(data) # :nodoc:
231
234
  h=Marshal.load(data)
232
235
  Vector.new(h['data'], h['type'], :missing_values=> h['missing_values'], :labels=>h['labels'], :name=>h['name'])
233
236
  end
234
237
  # Returns a new vector, with data modified by block.
235
- # Equivalent to create a Vector after #collect on data
238
+ # Equivalent to create a Vector after #collect on data
236
239
  def recode(type=nil)
237
240
  type||=@type
238
241
  @data.collect{|x|
@@ -240,7 +243,7 @@ module Statsample
240
243
  }.to_vector(type)
241
244
  end
242
245
  # Modifies current vector, with data modified by block.
243
- # Equivalent to #collect! on @data
246
+ # Equivalent to #collect! on @data
244
247
  def recode!
245
248
  @data.collect!{|x|
246
249
  yield x
@@ -251,21 +254,22 @@ module Statsample
251
254
  @data.push(v)
252
255
  set_valid_data
253
256
  end
257
+
254
258
  # Dicotomize the vector with 0 and 1, based on lowest value
255
259
  # If parameter if defined, this value and lower
256
260
  # will be 0 and higher, 1
257
- def dichotomize(low=nil)
258
- fs=factors
259
- low||=factors.min
260
- @data_with_nils.collect{|x|
261
+ def dichotomize(low = nil)
262
+ low ||= factors.min
263
+
264
+ @data_with_nils.collect do |x|
261
265
  if x.nil?
262
266
  nil
263
- elsif x>low
267
+ elsif x > low
264
268
  1
265
269
  else
266
270
  0
267
271
  end
268
- }.to_scale
272
+ end.to_scale
269
273
  end
270
274
  # Iterate on each item.
271
275
  # Equivalent to
@@ -273,7 +277,7 @@ module Statsample
273
277
  def each
274
278
  @data.each{|x| yield(x) }
275
279
  end
276
-
280
+
277
281
  # Iterate on each item, retrieving index
278
282
  def each_index
279
283
  (0...@data.size).each {|i|
@@ -318,7 +322,7 @@ module Statsample
318
322
  end
319
323
  else
320
324
  def set_valid_data_intern #:nodoc:
321
- _set_valid_data_intern
325
+ _set_valid_data_intern
322
326
  end
323
327
  end
324
328
  def _set_valid_data_intern #:nodoc:
@@ -333,19 +337,19 @@ module Statsample
333
337
  end
334
338
  @has_missing_data=@missing_data.size>0
335
339
  end
336
-
340
+
337
341
  # Retrieves true if data has one o more missing values
338
342
  def has_missing_data?
339
343
  @has_missing_data
340
344
  end
341
- alias :flawed? :has_missing_data?
342
-
345
+ alias :flawed? :has_missing_data?
346
+
343
347
  # Retrieves label for value x. Retrieves x if
344
348
  # no label defined.
345
349
  def labeling(x)
346
350
  @labels.has_key?(x) ? @labels[x].to_s : x.to_s
347
351
  end
348
- alias :label :labeling
352
+ alias :label :labeling
349
353
  # Returns a Vector with data with labels replaced by the label.
350
354
  def vector_labeled
351
355
  d=@data.collect{|x|
@@ -362,12 +366,12 @@ module Statsample
362
366
  @data.size
363
367
  end
364
368
  alias_method :n, :size
365
-
369
+
366
370
  # Retrieves i element of data
367
371
  def [](i)
368
372
  @data[i]
369
373
  end
370
- # Set i element of data.
374
+ # Set i element of data.
371
375
  # Note: Use set_valid_data if you include missing values
372
376
  def []=(i,v)
373
377
  @data[i]=v
@@ -387,9 +391,9 @@ module Statsample
387
391
  @today_values = vals
388
392
  set_valid_data
389
393
  end
390
- # Set level of measurement.
394
+ # Set level of measurement.
391
395
  def type=(t)
392
- @type=t
396
+ @type=t
393
397
  set_scale_data if(t==:scale)
394
398
  set_date_data if (t==:date)
395
399
  end
@@ -400,9 +404,9 @@ module Statsample
400
404
  @data.to_a
401
405
  end
402
406
  end
403
- alias_method :to_ary, :to_a
404
-
405
- # Vector sum.
407
+ alias_method :to_ary, :to_a
408
+
409
+ # Vector sum.
406
410
  # - If v is a scalar, add this value to all elements
407
411
  # - If v is a Array or a Vector, should be of the same size of this vector
408
412
  # every item of this vector will be added to the value of the
@@ -410,17 +414,17 @@ module Statsample
410
414
  def +(v)
411
415
  _vector_ari("+",v)
412
416
  end
413
- # Vector rest.
417
+ # Vector rest.
414
418
  # - If v is a scalar, rest this value to all elements
415
- # - If v is a Array or a Vector, should be of the same
419
+ # - If v is a Array or a Vector, should be of the same
416
420
  # size of this vector
417
421
  # every item of this vector will be rested to the value of the
418
422
  # item at the same position on the other vector
419
-
423
+
420
424
  def -(v)
421
425
  _vector_ari("-",v)
422
426
  end
423
-
427
+
424
428
  def *(v)
425
429
  _vector_ari("*",v)
426
430
  end
@@ -459,7 +463,7 @@ module Statsample
459
463
  else
460
464
  raise TypeError,"You should pass a scalar or a array/vector"
461
465
  end
462
-
466
+
463
467
  end
464
468
  # Return an array with the data splitted by a separator.
465
469
  # a=Vector.new(["a,b","c,d","a,b","d"])
@@ -483,11 +487,11 @@ module Statsample
483
487
  #
484
488
  # a=Vector.new(["a,b","c,d","a,b"])
485
489
  # a.split_by_separator
486
- # => {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
487
- # @data=[1, 0, 1]>,
488
- # "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
489
- # @data=[1, 1, 0]>,
490
- # "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
490
+ # => {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
491
+ # @data=[1, 0, 1]>,
492
+ # "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
493
+ # @data=[1, 1, 0]>,
494
+ # "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
491
495
  # @data=[0, 1, 1]>}
492
496
  #
493
497
  def split_by_separator(sep=Statsample::SPLIT_TOKEN)
@@ -504,7 +508,7 @@ module Statsample
504
508
  end
505
509
  else
506
510
  factors.each do |f|
507
- out[f].push(r.include?(f) ? 1:0)
511
+ out[f].push(r.include?(f) ? 1:0)
508
512
  end
509
513
  end
510
514
  end
@@ -519,11 +523,11 @@ module Statsample
519
523
  a
520
524
  }
521
525
  end
522
-
526
+
523
527
  # == Bootstrap
524
528
  # Generate +nr+ resamples (with replacement) of size +s+
525
529
  # from vector, computing each estimate from +estimators+
526
- # over each resample.
530
+ # over each resample.
527
531
  # +estimators+ could be
528
532
  # a) Hash with variable names as keys and lambdas as values
529
533
  # a.bootstrap(:log_s2=>lambda {|v| Math.log(v.variance)},1000)
@@ -532,33 +536,33 @@ module Statsample
532
536
  # c) A single method to bootstrap
533
537
  # a.jacknife(:mean, 1000)
534
538
  # If s is nil, is set to vector size by default.
535
- #
539
+ #
536
540
  # Returns a dataset where each vector is an vector
537
541
  # of length +nr+ containing the computed resample estimates.
538
542
  def bootstrap(estimators, nr, s=nil)
539
543
  s||=n
540
-
544
+
541
545
  h_est, es, bss= prepare_bootstrap(estimators)
542
-
543
-
546
+
547
+
544
548
  nr.times do |i|
545
549
  bs=sample_with_replacement(s)
546
- es.each do |estimator|
550
+ es.each do |estimator|
547
551
  # Add bootstrap
548
552
  bss[estimator].push(h_est[estimator].call(bs))
549
553
  end
550
554
  end
551
-
555
+
552
556
  es.each do |est|
553
557
  bss[est]=bss[est].to_scale
554
558
  bss[est].type=:scale
555
559
  end
556
560
  bss.to_dataset
557
-
561
+
558
562
  end
559
-
563
+
560
564
  # == Jacknife
561
- # Returns a dataset with jacknife delete-+k+ +estimators+
565
+ # Returns a dataset with jacknife delete-+k+ +estimators+
562
566
  # +estimators+ could be:
563
567
  # a) Hash with variable names as keys and lambdas as values
564
568
  # a.jacknife(:log_s2=>lambda {|v| Math.log(v.variance)})
@@ -571,23 +575,23 @@ module Statsample
571
575
  #
572
576
  # Returns a dataset where each vector is an vector
573
577
  # of length +cases+/+k+ containing the computed jacknife estimates.
574
- #
578
+ #
575
579
  # == Reference:
576
580
  # * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
577
581
  def jacknife(estimators, k=1)
578
582
  raise "n should be divisible by k:#{k}" unless n%k==0
579
-
583
+
580
584
  nb=(n / k).to_i
581
-
582
-
585
+
586
+
583
587
  h_est, es, ps= prepare_bootstrap(estimators)
584
588
 
585
589
  est_n=es.inject({}) {|h,v|
586
590
  h[v]=h_est[v].call(self)
587
591
  h
588
592
  }
589
-
590
-
593
+
594
+
591
595
  nb.times do |i|
592
596
  other=@data_with_nils.dup
593
597
  other.slice!(i*k,k)
@@ -597,16 +601,16 @@ module Statsample
597
601
  ps[estimator].push( nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other))
598
602
  end
599
603
  end
600
-
601
-
604
+
605
+
602
606
  es.each do |est|
603
607
  ps[est]=ps[est].to_scale
604
608
  ps[est].type=:scale
605
609
  end
606
610
  ps.to_dataset
607
611
  end
608
-
609
-
612
+
613
+
610
614
  # For an array or hash of estimators methods, returns
611
615
  # an array with three elements
612
616
  # 1.- A hash with estimators names as keys and lambdas as values
@@ -614,23 +618,23 @@ module Statsample
614
618
  # 3.- A Hash with estimators names as keys and empty arrays as values
615
619
  def prepare_bootstrap(estimators)
616
620
  h_est=estimators
617
-
621
+
618
622
  h_est=[h_est] unless h_est.is_a? Array or h_est.is_a? Hash
619
-
623
+
620
624
  if h_est.is_a? Array
621
625
  h_est=h_est.inject({}) {|h,est|
622
626
  h[est]=lambda {|v| v.send(est)}
623
627
  h
624
628
  }
625
629
  end
626
-
630
+
627
631
  bss=h_est.keys.inject({}) {|h,v| h[v]=[];h}
628
-
632
+
629
633
  [h_est,h_est.keys, bss]
630
-
634
+
631
635
  end
632
636
  private :prepare_bootstrap
633
-
637
+
634
638
  # Returns an random sample of size n, with replacement,
635
639
  # only with valid data.
636
640
  #
@@ -644,9 +648,9 @@ module Statsample
644
648
  # only with valid data.
645
649
  #
646
650
  # Every element could only be selected once.
647
- #
651
+ #
648
652
  # A sample of the same size of the vector is the vector itself.
649
-
653
+
650
654
  def sample_without_replacement(sample=1)
651
655
  raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
652
656
  out=[]
@@ -673,9 +677,9 @@ module Statsample
673
677
  frequencies[x].nil? ? 0 : frequencies[x]
674
678
  end
675
679
  end
676
-
680
+
677
681
  # Returns the database type for the vector, according to its content
678
-
682
+
679
683
  def db_type(dbs='mysql')
680
684
  # first, detect any character not number
681
685
  if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
@@ -690,7 +694,7 @@ module Statsample
690
694
  end
691
695
  # Return true if all data is Date, "today" values or nil
692
696
  def can_be_date?
693
- if @data.find {|v|
697
+ if @data.find {|v|
694
698
  !v.nil? and !v.is_a? Date and !v.is_a? Time and (v.is_a? String and !@today_values.include? v) and (v.is_a? String and !(v=~/\d{4,4}[-\/]\d{1,2}[-\/]\d{1,2}/))}
695
699
  false
696
700
  else
@@ -705,7 +709,7 @@ module Statsample
705
709
  true
706
710
  end
707
711
  end
708
-
712
+
709
713
  def to_s
710
714
  sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
711
715
  end
@@ -734,7 +738,7 @@ module Statsample
734
738
  end
735
739
  if Statsample::STATSAMPLE__.respond_to?(:frequencies)
736
740
  # Returns a hash with the distribution of frecuencies for
737
- # the sample
741
+ # the sample
738
742
  def frequencies
739
743
  Statsample::STATSAMPLE__.frequencies(@valid_data)
740
744
  end
@@ -743,8 +747,8 @@ module Statsample
743
747
  _frequencies
744
748
  end
745
749
  end
746
-
747
-
750
+
751
+
748
752
  def _frequencies #:nodoc:
749
753
  @valid_data.inject(Hash.new) {|a,x|
750
754
  a[x]||=0
@@ -752,7 +756,7 @@ module Statsample
752
756
  a
753
757
  }
754
758
  end
755
-
759
+
756
760
  # Returns the most frequent item.
757
761
  def mode
758
762
  frequencies.max{|a,b| a[1]<=>b[1]}.first
@@ -775,12 +779,12 @@ module Statsample
775
779
  end
776
780
  def report_building(b)
777
781
  b.section(:name=>name) do |s|
778
- s.text _("n :%d") % n
782
+ s.text _("n :%d") % n
779
783
  s.text _("n valid:%d") % n_valid
780
784
  if @type==:nominal
781
- s.text _("factors:%s") % factors.join(",")
782
- s.text _("mode: %s") % mode
783
-
785
+ s.text _("factors:%s") % factors.join(",")
786
+ s.text _("mode: %s") % mode
787
+
784
788
  s.table(:name=>_("Distribution")) do |t|
785
789
  frequencies.sort.each do |k,v|
786
790
  key=labels.has_key?(k) ? labels[k]:k
@@ -788,7 +792,7 @@ module Statsample
788
792
  end
789
793
  end
790
794
  end
791
-
795
+
792
796
  s.text _("median: %s") % median.to_s if(@type==:ordinal or @type==:scale)
793
797
  if(@type==:scale)
794
798
  s.text _("mean: %0.4f") % mean
@@ -801,7 +805,7 @@ module Statsample
801
805
  end
802
806
  end
803
807
  end
804
-
808
+
805
809
  # Variance of p, according to poblation size
806
810
  def variance_proportion(n_poblation, v=1)
807
811
  Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
@@ -816,29 +820,58 @@ module Statsample
816
820
  def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
817
821
  Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
818
822
  end
819
-
823
+
820
824
  self.instance_methods.find_all{|met| met=~/_slow$/}.each do |met|
821
825
  met_or=met.gsub("_slow","")
822
826
  if !self.method_defined?(met_or)
823
827
  alias_method met_or, met
824
828
  end
825
829
  end
826
-
830
+
827
831
  ######
828
832
  ### Ordinal Methods
829
833
  ######
830
-
831
- # Return the value of the percentil q
832
- def percentil(q)
834
+
835
+ # == Percentil
836
+ # Returns the value of the percentile q
837
+ #
838
+ # Accepts an optional second argument specifying the strategy to interpolate
839
+ # when the requested percentile lies between two data points a and b
840
+ # Valid strategies are:
841
+ # * :midpoint (Default): (a + b) / 2
842
+ # * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
843
+ # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
844
+ #
845
+ def percentil(q, strategy = :midpoint)
833
846
  check_type :ordinal
834
847
  sorted=@valid_data.sort
835
- v= (n_valid * q).quo(100)
836
- if(v.to_i!=v)
837
- sorted[v.to_i]
848
+
849
+ case strategy
850
+ when :midpoint
851
+ v = (n_valid * q).quo(100)
852
+ if(v.to_i!=v)
853
+ sorted[v.to_i]
854
+ else
855
+ (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
856
+ end
857
+ when :linear
858
+ index = (q / 100.0) * (n_valid + 1)
859
+
860
+ k = index.truncate
861
+ d = index % 1
862
+
863
+ if k == 0
864
+ sorted[0]
865
+ elsif k >= sorted.size
866
+ sorted[-1]
867
+ else
868
+ sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
869
+ end
838
870
  else
839
- (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
871
+ raise NotImplementedError.new "Unknown strategy #{strategy.to_s}"
840
872
  end
841
873
  end
874
+
842
875
  # Returns a ranked vector.
843
876
  def ranked(type=:ordinal)
844
877
  check_type :ordinal
@@ -856,7 +889,7 @@ module Statsample
856
889
  percentil(50)
857
890
  end
858
891
  # Minimun value
859
- def min
892
+ def min
860
893
  check_type :ordinal
861
894
  @valid_data.min
862
895
  end
@@ -865,7 +898,7 @@ module Statsample
865
898
  check_type :ordinal
866
899
  @valid_data.max
867
900
  end
868
-
901
+
869
902
  def set_date_data
870
903
  @date_data_with_nils=@data.collect do|x|
871
904
  if x.is_a? Date
@@ -881,7 +914,7 @@ module Statsample
881
914
  end
882
915
  end
883
916
  end
884
-
917
+
885
918
  def set_scale_data
886
919
  @scale_data=@valid_data.collect do|x|
887
920
  if x.is_a? Numeric
@@ -893,18 +926,18 @@ module Statsample
893
926
  end
894
927
  end
895
928
  end
896
-
929
+
897
930
  private :set_date_data, :set_scale_data
898
-
931
+
899
932
  # The range of the data (max - min)
900
- def range;
933
+ def range;
901
934
  check_type :scale
902
935
  @scale_data.max - @scale_data.min
903
936
  end
904
937
  # The sum of values for the data
905
938
  def sum
906
939
  check_type :scale
907
- @scale_data.inject(0){|a,x|x+a} ;
940
+ @scale_data.inject(0){|a,x|x+a} ;
908
941
  end
909
942
  # The arithmetical mean of data
910
943
  def mean
@@ -914,7 +947,7 @@ module Statsample
914
947
  # Sum of squares for the data around a value.
915
948
  # By default, this value is the mean
916
949
  # ss= sum{(xi-m)^2}
917
- #
950
+ #
918
951
  def sum_of_squares(m=nil)
919
952
  check_type :scale
920
953
  m||=mean
@@ -925,7 +958,7 @@ module Statsample
925
958
  check_type :scale
926
959
  @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
927
960
  end
928
-
961
+
929
962
  # Population variance (denominator N)
930
963
  def variance_population(m=nil)
931
964
  check_type :scale
@@ -933,8 +966,8 @@ module Statsample
933
966
  squares=@scale_data.inject(0){|a,x| x.square+a}
934
967
  squares.quo(n_valid) - m.square
935
968
  end
936
-
937
-
969
+
970
+
938
971
  # Population Standard deviation (denominator N)
939
972
  def standard_deviation_population(m=nil)
940
973
  check_type :scale
@@ -943,7 +976,7 @@ module Statsample
943
976
 
944
977
  # Population average deviation (denominator N)
945
978
  # author: Al Chou
946
-
979
+
947
980
  def average_deviation_population( m = nil )
948
981
  check_type :scale
949
982
  m ||= mean
@@ -960,7 +993,7 @@ module Statsample
960
993
  m||=mean
961
994
  sum_of_squares(m).quo(n_valid - 1)
962
995
  end
963
-
996
+
964
997
  # Sample Standard deviation (denominator n-1)
965
998
  def standard_deviation_sample(m=nil)
966
999
  check_type :scale
@@ -980,23 +1013,23 @@ module Statsample
980
1013
  m||=mean
981
1014
  fo=@scale_data.inject(0){|a,x| a+((x-m)**4)}
982
1015
  fo.quo((@scale_data.size)*sd(m)**4)-3
983
-
1016
+
984
1017
  end
985
1018
  # Product of all values on the sample
986
- #
1019
+ #
987
1020
  def product
988
1021
  check_type :scale
989
1022
  @scale_data.inject(1){|a,x| a*x }
990
1023
  end
991
-
1024
+
992
1025
  # With a fixnum, creates X bins within the range of data
993
1026
  # With an Array, each value will be a cut point
994
1027
  def histogram(bins=10)
995
1028
  check_type :scale
996
-
1029
+
997
1030
  if bins.is_a? Array
998
1031
  #h=Statsample::Histogram.new(self, bins)
999
- h=Statsample::Histogram.alloc(bins)
1032
+ h=Statsample::Histogram.alloc(bins)
1000
1033
  else
1001
1034
  # ugly patch. The upper limit for a bin has the form
1002
1035
  # x < range
@@ -1013,7 +1046,7 @@ module Statsample
1013
1046
  h.increment(@valid_data)
1014
1047
  h
1015
1048
  end
1016
-
1049
+
1017
1050
  # Coefficient of variation
1018
1051
  # Calculed with the sample standard deviation
1019
1052
  def coefficient_of_variation
@@ -1026,12 +1059,12 @@ module Statsample
1026
1059
  standard_deviation_sample.quo(Math.sqrt(valid_data.size))
1027
1060
  end
1028
1061
  alias :se :standard_error
1029
-
1062
+
1030
1063
  alias_method :sdp, :standard_deviation_population
1031
1064
  alias_method :sds, :standard_deviation_sample
1032
1065
  alias_method :adp, :average_deviation_population
1033
1066
  alias_method :cov, :coefficient_of_variation
1034
- alias_method :variance, :variance_sample
1067
+ alias_method :variance, :variance_sample
1035
1068
  alias_method :sd, :standard_deviation_sample
1036
1069
  alias_method :ss, :sum_of_squares
1037
1070
  include_aliasing Statsample::Vector::GSL_ if Statsample.has_gsl?