davidrichards-just_enumerable_stats 0.0.8 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -271,7 +271,7 @@ describe "JustEnumerableStats" do
271
271
 
272
272
  it "should be able to dichotomize a list" do
273
273
  @a.dichotomize(2, :small, :big)
274
- @a.categories.should eql([:small, :big])
274
+ @a.categories.map{|e| e.to_s}.sort.map{|e| e.to_sym}.should eql([:big, :small])
275
275
  @a.category_values[:small].should eql([1,2])
276
276
  @a.category_values[:big].should eql([3])
277
277
  end
@@ -523,14 +523,204 @@ describe "JustEnumerableStats" do
523
523
  a.covariance(b).should eql(0.125)
524
524
  end
525
525
 
526
- it "should be able to return the Pearson correlation" do
527
- a = [1,2,3,4]
528
- b = [3,3,4,3]
529
- a.pearson_correlation(b).should be_close(0.193649167310371, 1.0e-15)
530
- end
531
-
532
526
  it "should be able to force the list into floats" do
533
527
  [1,2,3].to_f!.should eql([1.0, 2.0, 3.0])
534
528
  end
535
529
 
530
+ context "unobstrusive" do
531
+ before do
532
+ @a = BusyClass.new(1,2,3)
533
+ @b = [2,3,1]
534
+ end
535
+
536
+ it "should not use the native max" do
537
+ lambda{@a._jes_max}.should_not raise_error
538
+ end
539
+
540
+ it "should not use the native max_index" do
541
+ lambda{@a._jes_max_index}.should_not raise_error
542
+ end
543
+
544
+ it "should not use the native min" do
545
+ lambda{@a._jes_min}.should_not raise_error
546
+ end
547
+
548
+ it "should not use the native min_index" do
549
+ lambda{@a._jes_min_index}.should_not raise_error
550
+ end
551
+
552
+ it "should not use the native default_block" do
553
+ lambda{@a._jes_default_block}.should_not raise_error
554
+ end
555
+
556
+ it "should not use the native default_block=" do
557
+ lambda{@a._jes_default_block= lambda{|e| 1} }.should_not raise_error
558
+ end
559
+
560
+ it "should not use the native sum" do
561
+ lambda{@a._jes_sum}.should_not raise_error
562
+ end
563
+
564
+ it "should not use the native average" do
565
+ lambda{@a._jes_average}.should_not raise_error
566
+ end
567
+
568
+ it "should not use the native variance" do
569
+ lambda{@a._jes_variance}.should_not raise_error
570
+ end
571
+
572
+ it "should not use the native standard_deviation" do
573
+ lambda{@a._jes_standard_deviation}.should_not raise_error
574
+ end
575
+
576
+ it "should not use the native median" do
577
+ lambda{@a._jes_median}.should_not raise_error
578
+ end
579
+
580
+ it "should not use the native categories" do
581
+ lambda{@a._jes_categories}.should_not raise_error
582
+ end
583
+
584
+ it "should not use the native is_numeric?" do
585
+ lambda{@a._jes_is_numeric?}.should_not raise_error
586
+ end
587
+
588
+ it "should not use the native range" do
589
+ lambda{@a._jes_range}.should_not raise_error
590
+ end
591
+
592
+ it "should not use the native set_range_class" do
593
+ lambda{@a._jes_set_range_class(FixedRange)}.should_not raise_error
594
+ end
595
+
596
+ it "should not use the native set_range" do
597
+ lambda{@a._jes_set_range({:a => 1})}.should_not raise_error
598
+ end
599
+
600
+ it "should not use the native dichotomize" do
601
+ lambda{@a._jes_dichotomize(2, :small, :big)}.should_not raise_error
602
+ end
603
+
604
+ it "should not use the native count_if" do
605
+ lambda{@a._jes_count_if {|e| e == 2}}.should_not raise_error
606
+ end
607
+
608
+ it "should not use the native category_values" do
609
+ lambda{@a._jes_category_values}.should_not raise_error
610
+ end
611
+
612
+ it "should not use the native range_class" do
613
+ lambda{@a._jes_range_class}.should_not raise_error
614
+ end
615
+
616
+ it "should not use the native range_as_range" do
617
+ lambda{@a._jes_range_as_range}.should_not raise_error
618
+ end
619
+
620
+ it "should not use the native new_sort" do
621
+ lambda{@a._jes_new_sort}.should_not raise_error
622
+ end
623
+
624
+ it "should not use the native rank" do
625
+ lambda{@a._jes_rank}.should_not raise_error
626
+ end
627
+
628
+ it "should not use the native order" do
629
+ lambda{@a._jes_order}.should_not raise_error
630
+ end
631
+
632
+ it "should not use the native quantile" do
633
+ lambda{@a._jes_quantile}.should_not raise_error
634
+ end
635
+
636
+ it "should not use the native cum_sum" do
637
+ lambda{@a._jes_cum_sum}.should_not raise_error
638
+ end
639
+
640
+ it "should not use the native cum_prod" do
641
+ lambda{@a._jes_cum_prod}.should_not raise_error
642
+ end
643
+
644
+ it "should not use the native cum_max" do
645
+ lambda{@a._jes_cum_max}.should_not raise_error
646
+ end
647
+
648
+ it "should not use the native cum_min" do
649
+ lambda{@a._jes_cum_min}.should_not raise_error
650
+ end
651
+
652
+ it "should not use the native product" do
653
+ lambda{@a._jes_product}.should_not raise_error
654
+ end
655
+
656
+ it "should not use the native to_pairs" do
657
+ lambda{@a._jes_to_pairs(@b) {|a, b| a}}.should_not raise_error
658
+ end
659
+
660
+ it "should not use the native tanimoto_pairs" do
661
+ lambda{@a._jes_tanimoto_pairs(@b)}.should_not raise_error
662
+ end
663
+
664
+ it "should not use the native union" do
665
+ lambda{@a._jes_union(@b)}.should_not raise_error
666
+ end
667
+
668
+ it "should not use the native intersect" do
669
+ lambda{@a._jes_intersect(@b)}.should_not raise_error
670
+ end
671
+
672
+ it "should not use the native compliment" do
673
+ lambda{@a._jes_compliment(@b)}.should_not raise_error
674
+ end
675
+
676
+ it "should not use the native exclusive_not" do
677
+ lambda{@a._jes_exclusive_not(@b)}.should_not raise_error
678
+ end
679
+
680
+ it "should not use the native cartesian_product" do
681
+ lambda{@a._jes_cartesian_product(@b)}.should_not raise_error
682
+ end
683
+
684
+ it "should not use the native sigma_pairs" do
685
+ lambda{@a._jes_sigma_pairs(@b) {|a, b| a}}.should_not raise_error
686
+ end
687
+
688
+ it "should not use the native euclidian_distance" do
689
+ lambda{@a._jes_euclidian_distance(@b)}.should_not raise_error
690
+ end
691
+
692
+ it "should not use the native rand_in_range" do
693
+ lambda{@a._jes_rand_in_range(1, 2)}.should_not raise_error
694
+ end
695
+
696
+ it "should not use the native correlation" do
697
+ lambda{@a._jes_correlation(@b)}.should_not raise_error
698
+ end
699
+
700
+ it "should not use the native yield_transpose" do
701
+ lambda{@a._jes_yield_transpose(@b)}.should_not raise_error
702
+ end
703
+
704
+ it "should not use the native max_of_lists" do
705
+ lambda{@a._jes_max_of_lists(@b)}.should_not raise_error
706
+ end
707
+
708
+ it "should not use the native min_of_lists" do
709
+ lambda{@a._jes_min_of_lists(@b)}.should_not raise_error
710
+ end
711
+
712
+ it "should not use the native covariance" do
713
+ lambda{@a._jes_covariance(@b)}.should_not raise_error
714
+ end
715
+
716
+ it "should not use the native pearson_correlation" do
717
+ lambda{@a._jes_pearson_correlation(@b)}.should_not raise_error
718
+ end
719
+
720
+ it "should not use the native to_f!" do
721
+ lambda{@a._jes_to_f!}.should_not raise_error
722
+ end
723
+
724
+ end
725
+
536
726
  end
data/spec/spec_helper.rb CHANGED
@@ -6,3 +6,65 @@ require 'just_enumerable_stats'
6
6
  Spec::Runner.configure do |config|
7
7
 
8
8
  end
9
+
10
+ class BusyClass
11
+ include Enumerable
12
+ def initialize(*vals)
13
+ @values = vals
14
+ end
15
+
16
+ def method_missing(sym, *args, &block)
17
+ @values.send(sym, *args, &block)
18
+ end
19
+
20
+ def max(&block); raise ArgumentError, "Should not be called"; end
21
+ def max_index(&block); raise ArgumentError, "Should not be called"; end
22
+ def min(&block); raise ArgumentError, "Should not be called"; end
23
+ def min_index(&block); raise ArgumentError, "Should not be called"; end
24
+ def default_block; raise ArgumentError, "Should not be called"; end
25
+ def default_block=(block); raise ArgumentError, "Should not be called"; end
26
+ def sum; raise ArgumentError, "Should not be called"; end
27
+ def average(&block); raise ArgumentError, "Should not be called"; end
28
+ def variance(&block); raise ArgumentError, "Should not be called"; end
29
+ def standard_deviation(&block); raise ArgumentError, "Should not be called"; end
30
+ def median(ratio=0.5, &block); raise ArgumentError, "Should not be called"; end
31
+ def categories; raise ArgumentError, "Should not be called"; end
32
+ def is_numeric?; raise ArgumentError, "Should not be called"; end
33
+ def range(&block); raise ArgumentError, "Should not be called"; end
34
+ def set_range_class(klass, *args); raise ArgumentError, "Should not be called"; end
35
+ def set_range(hash); raise ArgumentError, "Should not be called"; end
36
+ def dichotomize(split_value, first_label, second_label); raise ArgumentError, "Should not be called"; end
37
+ def count_if(&block); raise ArgumentError, "Should not be called"; end
38
+ def category_values(reset=false); raise ArgumentError, "Should not be called"; end
39
+ def range_class; raise ArgumentError, "Should not be called"; end
40
+ def range_as_range(&block); raise ArgumentError, "Should not be called"; end
41
+ def new_sort(&block); raise ArgumentError, "Should not be called"; end
42
+ def rank(&block); raise ArgumentError, "Should not be called"; end
43
+ def order(&block); raise ArgumentError, "Should not be called"; end
44
+ def quantile(&block); raise ArgumentError, "Should not be called"; end
45
+ def cum_sum(sorted=false, &block); raise ArgumentError, "Should not be called"; end
46
+ def cum_prod(sorted=false, &block); raise ArgumentError, "Should not be called"; end
47
+ def cum_max(&block); raise ArgumentError, "Should not be called"; end
48
+ def cum_min(&block); raise ArgumentError, "Should not be called"; end
49
+ def product; raise ArgumentError, "Should not be called"; end
50
+ def to_pairs(other, &block); raise ArgumentError, "Should not be called"; end
51
+ def tanimoto_pairs(other); raise ArgumentError, "Should not be called"; end
52
+ def union(other); raise ArgumentError, "Should not be called"; end
53
+ def intersect(other); raise ArgumentError, "Should not be called"; end
54
+ def compliment(other); raise ArgumentError, "Should not be called"; end
55
+ def exclusive_not(other); raise ArgumentError, "Should not be called"; end
56
+ def cartesian_product(other, &block); raise ArgumentError, "Should not be called"; end
57
+ def sigma_pairs(other, z=_jes_zero, &block); raise ArgumentError, "Should not be called"; end
58
+ def euclidian_distance(other); raise ArgumentError, "Should not be called"; end
59
+ def rand_in_range(*args); raise ArgumentError, "Should not be called"; end
60
+ def correlation(other); raise ArgumentError, "Should not be called"; end
61
+ def yield_transpose(*enums, &block); raise ArgumentError, "Should not be called"; end
62
+ def max_of_lists(*enums); raise ArgumentError, "Should not be called"; end
63
+ def min_of_lists(*enums); raise ArgumentError, "Should not be called"; end
64
+ def covariance(other); raise ArgumentError, "Should not be called"; end
65
+ def pearson_correlation(other); raise ArgumentError, "Should not be called"; end
66
+ def to_f!; raise ArgumentError, "Should not be called"; end
67
+
68
+ end
69
+
70
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-just_enumerable_stats
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -26,12 +26,8 @@ files:
26
26
  - VERSION.yml
27
27
  - bin/jes
28
28
  - lib/fixed_range.rb
29
- - lib/just_enumerable_stats
30
- - lib/just_enumerable_stats/stats.rb
31
29
  - lib/just_enumerable_stats.rb
32
30
  - spec/fixed_range_spec.rb
33
- - spec/just_enumerable_stats
34
- - spec/just_enumerable_stats/stats_spec.rb
35
31
  - spec/just_enumerable_stats_spec.rb
36
32
  - spec/spec_helper.rb
37
33
  has_rdoc: true
@@ -1,597 +0,0 @@
1
- # This is a namespaced version of the gem, in case you can create a
2
- # container for your data and only include these methods there.
3
- # Example:
4
- class Object
5
-
6
- # Simpler way to handle a random number between to values
7
- def rand_between(a, b)
8
- return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
9
- range = (a - b).abs + 1
10
- rand(range) + [a,b].min
11
- end
12
-
13
- # Handles non-integers
14
- def rand_in_floats(a, b)
15
- range = (a - b).abs
16
- (rand * range) + [a,b].min
17
- end
18
-
19
- end
20
-
21
- module JustEnumerableStats #:nodoc:
22
- module Stats
23
-
24
- # To keep max and min DRY.
25
- def block_sorter(a, b, &block)
26
- if block
27
- val = yield(a, b)
28
- elsif default_block
29
- val = default_block.call(a, b)
30
- else
31
- val = a <=> b
32
- end
33
- end
34
- protected :block_sorter
35
-
36
- # Returns the max, using an optional block.
37
- def max(&block)
38
- self.inject do |best, e|
39
- val = block_sorter(best, e, &block)
40
- best = val > 0 ? best : e
41
- end
42
- end
43
-
44
- # Returns the first index of the max value
45
- def max_index(&block)
46
- self.index(max(&block))
47
- end
48
-
49
- # Min of any number of items
50
- def min(&block)
51
- self.inject do |best, e|
52
- val = block_sorter(best, e, &block)
53
- best = val < 0 ? best : e
54
- end
55
- end
56
-
57
- # Returns the first index of the min value
58
- def min_index(&block)
59
- self.index(min(&block))
60
- end
61
-
62
- # The block called to filter the values in the object.
63
- def default_block
64
- @default_stat_block
65
- end
66
-
67
- # Allows me to setup a block for a series of operations. Example:
68
- # a = [1,2,3]
69
- # a.sum # => 6.0
70
- # a.default_block = lambda{|e| 1 / e}
71
- # a.sum # => 1.0
72
- def default_block=(block)
73
- @default_stat_block = block
74
- end
75
-
76
- # Provides zero in the right class (Numeric or Float)
77
- def zero
78
- any? {|e| e.is_a?(Float)} ? 0.0 : 0
79
- end
80
- protected :zero
81
-
82
- # Provides one in the right class (Numeric or Float)
83
- def one
84
- any? {|e| e.is_a?(Float)} ? 1.0 : 1
85
- end
86
- protected :one
87
-
88
- # Adds up the list. Uses a block or default block if present.
89
- def sum
90
- sum = zero
91
- if block_given?
92
- each{|i| sum += yield(i)}
93
- elsif default_block
94
- each{|i| sum += default_block[*i]}
95
- else
96
- each{|i| sum += i}
97
- end
98
- sum
99
- end
100
-
101
- # The arithmetic mean, uses a block or default block.
102
- def average(&block)
103
- sum(&block)/size
104
- end
105
- alias :mean :average
106
- alias :avg :average
107
-
108
- # The variance, uses a block or default block.
109
- def variance(&block)
110
- m = mean(&block)
111
- sum_of_differences = if block_given?
112
- sum{ |i| j=yield(i); (m - j) ** 2 }
113
- elsif default_block
114
- sum{ |i| j=default_block[*i]; (m - j) ** 2 }
115
- else
116
- sum{ |i| (m - i) ** 2 }
117
- end
118
- sum_of_differences / (size - 1)
119
- end
120
- alias :var :variance
121
-
122
- # The standard deviation. Uses a block or default block.
123
- def standard_deviation(&block)
124
- Math::sqrt(variance(&block))
125
- end
126
- alias :std :standard_deviation
127
-
128
- # The slow way is to iterate up to the middle point. A faster way is to
129
- # use the index, when available. If a block is supplied, always iterate
130
- # to the middle point.
131
- def median(ratio=0.5, &block)
132
- return iterate_midway(ratio, &block) if block_given?
133
- begin
134
- mid1, mid2 = middle_two
135
- sorted = new_sort
136
- med1, med2 = sorted[mid1], sorted[mid2]
137
- return med1 if med1 == med2
138
- return med1 + ((med2 - med1) * ratio)
139
- rescue
140
- iterate_midway(ratio, &block)
141
- end
142
- end
143
-
144
- def middle_two
145
- mid2 = size.div(2)
146
- mid1 = (size % 2 == 0) ? mid2 - 1 : mid2
147
- return mid1, mid2
148
- end
149
- protected :middle_two
150
-
151
- def median_position
152
- middle_two.last
153
- end
154
- protected :median_position
155
-
156
- def first_half(&block)
157
- fh = self[0..median_position].dup
158
- end
159
- protected :first_half
160
-
161
- def second_half(&block)
162
- # Total crap, but it's the way R does things, and this will most likely
163
- # only be used to feed R some numbers to plot, if at all.
164
- sh = size <= 5 ? self[median_position..-1].dup : self[median_position - 1..-1].dup
165
- end
166
- protected :second_half
167
-
168
- # An iterative version of median
169
- def iterate_midway(ratio, &block)
170
- mid1, mid2, last_value, j, sorted, sort1, sort2 = middle_two, nil, 0, new_sort, nil, nil
171
-
172
- if block_given?
173
- sorted.each do |i|
174
- last_value = yield(i)
175
- j += 1
176
- sort1 = last_value if j == mid1
177
- sort2 = last_value if j == mid2
178
- break if j >= mid2
179
- end
180
- elsif default_block
181
- sorted.each do |i|
182
- last_value = default_block[*i]
183
- j += 1
184
- sort1 = last_value if j == mid1
185
- sort2 = last_value if j == mid2
186
- break if j >= mid2
187
- end
188
- else
189
- sorted.each do |i|
190
- last_value = i
191
- sort1 = last_value if j == mid1
192
- sort2 = last_value if j == mid2
193
- j += 1
194
- break if j >= mid2
195
- end
196
- end
197
- return med1 if med1 == med2
198
- return med1 + ((med2 - med1) * ratio)
199
- end
200
- protected :iterate_midway
201
-
202
- # Takes the range_class and returns its map.
203
- # Example:
204
- # require 'mathn'
205
- # a = [1,2,3]
206
- # a
207
- # range_class = FixedRange, a.min, a.max, 1/4
208
- # a.categories
209
- # => [1, 5/4, 3/2, 7/4, 2, 9/4, 5/2, 11/4, 3]
210
- # For non-numeric values, returns a unique set,
211
- # ordered if possible.
212
- def categories
213
- if @categories
214
- @categories
215
- elsif self.is_numeric?
216
- self.range_instance.map
217
- else
218
- self.uniq.sort rescue self.uniq
219
- end
220
- end
221
-
222
- def is_numeric?
223
- self.all? {|e| e.is_a?(Numeric)}
224
- end
225
-
226
- # Just an array of [min, max] to comply with R uses of the work. Use
227
- # range_as_range if you want a real Range.
228
- def range(&block)
229
- [min(&block), max(&block)]
230
- end
231
-
232
- # Useful for setting a real range class (FixedRange).
233
- def set_range_class(klass, *args)
234
- @range_class = klass
235
- @range_class_args = args
236
- self.range_class
237
- end
238
-
239
- # Takes a hash of arrays for categories
240
- # If Facets happens to be loaded on the computer, this keeps the order
241
- # of the categories straight.
242
- def set_range(hash)
243
- if defined?(Dictionary)
244
- @range_hash = Dictionary.new
245
- @range_hash.merge!(hash)
246
- @categories = @range_hash.keys
247
- else
248
- @categories = hash.keys
249
- @range_hash = hash
250
- end
251
- @categories
252
- end
253
-
254
- # The hash of lambdas that are used to categorize the enumerable.
255
- attr_reader :range_hash
256
-
257
- # The arguments needed to instantiate the custom-defined range class.
258
- attr_reader :range_class_args
259
-
260
- # Splits the values in two, <= the value and > the value.
261
- def dichotomize(split_value, first_label, second_label)
262
- set_range({
263
- first_label => lambda{|e| e <= split_value},
264
- second_label => lambda{|e| e > split_value}
265
- })
266
- end
267
-
268
- # Counts each element where the block evaluates to true
269
- # Example:
270
- # a = [1,2,3]
271
- # a.count_if {|e| e % 2 == 0}
272
- def count_if(&block)
273
- self.inject(0) do |s, e|
274
- s += 1 if block.call(e)
275
- s
276
- end
277
- end
278
-
279
- # Returns a Hash or Dictionary (if available) for each category with a
280
- # value as the set of matching values as an array.
281
- # Because this is supposed to be lean (just enumerables), but this is an
282
- # expensive call, I'm going to cache it and offer a parameter to reset
283
- # the cache. So, call category_values(true) if you need to reset the
284
- # cache.
285
- def category_values(reset=false)
286
- @category_values = nil if reset
287
- return @category_values if @category_values
288
- container = defined?(Dictionary) ? Dictionary.new : Hash.new
289
- if self.range_hash
290
- @category_values = self.categories.inject(container) do |cont, cat|
291
- cont[cat] = self.find_all &self.range_hash[cat]
292
- cont
293
- end
294
- else
295
- @category_values = self.categories.inject(container) do |cont, cat|
296
- cont[cat] = self.find_all {|e| e == cat}
297
- cont
298
- end
299
- end
300
- end
301
-
302
- # When creating a range, what class will it be? Defaults to Range, but
303
- # other classes are sometimes useful.
304
- def range_class
305
- @range_class ||= Range
306
- end
307
-
308
- # Actually instantiates the range, instead of producing a min and max array.
309
- def range_as_range(&block)
310
- if @range_class_args and not @range_class_args.empty?
311
- self.range_class.new(*@range_class_args)
312
- else
313
- self.range_class.new(min(&block), max(&block))
314
- end
315
- end
316
- alias :range_instance :range_as_range
317
-
318
- # I don't pass the block to the sort, because a sort block needs to look
319
- # something like: {|x,y| x <=> y}. To get around this, set the default
320
- # block on the object.
321
- def new_sort(&block)
322
- if block_given?
323
- map { |i| yield(i) }.sort.dup
324
- elsif default_block
325
- map { |i| default_block[*i] }.sort.dup
326
- else
327
- sort().dup
328
- end
329
- end
330
-
331
- # Doesn't overwrite things like Matrix#rank
332
- def rank(&block)
333
-
334
- sorted = new_sort(&block)
335
-
336
- if block_given?
337
- map { |i| sorted.index(yield(i)) + 1 }
338
- elsif default_block
339
- map { |i| sorted.index(default_block[*i]) + 1 }
340
- else
341
- map { |i| sorted.index(i) + 1 }
342
- end
343
-
344
- end unless defined?(rank)
345
-
346
- # Given values like [10,5,5,1]
347
- # Rank should produce something like [4,2,2,1]
348
- # And order should produce something like [4,2,3,1]
349
- # The trick is that rank skips as many as were duplicated, so there
350
- # could not be a 3 in the rank from the example above.
351
- def order(&block)
352
- hold = []
353
- rank(&block).each do |x|
354
- while hold.include?(x) do
355
- x += 1
356
- end
357
- hold << x
358
- end
359
- hold
360
- end
361
-
362
- # First quartile: nth_split_by_m(1, 4)
363
- # Third quartile: nth_split_by_m(3, 4)
364
- # Median: nth_split_by_m(1, 2)
365
- # Doesn't match R, and it's silly to try to.
366
- # def nth_split_by_m(n, m)
367
- # sorted = new_sort
368
- # dividers = m - 1
369
- # if size % m == dividers # Divides evenly
370
- # # Because we have a 0-based list, we get the floor
371
- # i = ((size / m.to_f) * n).floor
372
- # j = i
373
- # else
374
- # # This reflects R's approach, which I don't think I agree with.
375
- # i = (((size / m.to_f) * n) - 1)
376
- # i = i > (size / m.to_f) ? i.floor : i.ceil
377
- # j = i + 1
378
- # end
379
- # sorted[i] + ((n / m.to_f) * (sorted[j] - sorted[i]))
380
- # end
381
- def quantile(&block)
382
- [
383
- min(&block),
384
- first_half(&block).median(0.25, &block),
385
- median(&block),
386
- second_half(&block).median(0.75, &block),
387
- max(&block)
388
- ]
389
- end
390
-
391
- # The cummulative sum. Example:
392
- # [1,2,3].cum_sum # => [1, 3, 6]
393
- def cum_sum(sorted=false, &block)
394
- sum = zero
395
- obj = sorted ? self.new_sort : self
396
- if block_given?
397
- obj.map { |i| sum += yield(i) }
398
- elsif default_block
399
- obj.map { |i| sum += default_block[*i] }
400
- else
401
- obj.map { |i| sum += i }
402
- end
403
- end
404
- alias :cumulative_sum :cum_sum
405
-
406
- # The cummulative product. Example:
407
- # [1,2,3].cum_prod # => [1.0, 2.0, 6.0]
408
- def cum_prod(sorted=false, &block)
409
- prod = one
410
- obj = sorted ? self.new_sort : self
411
- if block_given?
412
- obj.map { |i| prod *= yield(i) }
413
- elsif default_block
414
- obj.map { |i| prod *= default_block[*i] }
415
- else
416
- obj.map { |i| prod *= i }
417
- end
418
- end
419
- alias :cumulative_product :cum_prod
420
-
421
- # Used to preprocess the list
422
- def morph_list(&block)
423
- if block
424
- self.map{ |e| block.call(e) }
425
- elsif self.default_block
426
- self.map{ |e| self.default_block.call(e) }
427
- else
428
- self
429
- end
430
- end
431
- protected :morph_list
432
-
433
- # Example:
434
- # [1,2,3,0,5].cum_max # => [1,2,3,3,5]
435
- def cum_max(&block)
436
- morph_list(&block).inject([]) do |list, e|
437
- found = (list | [e]).max
438
- list << (found ? found : e)
439
- end
440
- end
441
- alias :cumulative_max :cum_max
442
-
443
- # Example:
444
- # [1,2,3,0,5].cum_min # => [1,1,1,0,0]
445
- def cum_min(&block)
446
- morph_list(&block).inject([]) do |list, e|
447
- found = (list | [e]).min
448
- list << (found ? found : e)
449
- end
450
- end
451
- alias :cumulative_min :cum_min
452
-
453
- # Multiplies the values:
454
- # >> product(1,2,3)
455
- # => 6.0
456
- def product
457
- self.inject(one) {|sum, a| sum *= a}
458
- end
459
-
460
- # There are going to be a lot more of these kinds of things, so pay
461
- # attention.
462
- def to_pairs(other, &block)
463
- n = [self.size, other.size].min
464
- (0...n).map {|i| block.call(self[i], other[i]) }
465
- end
466
-
467
- # Finds the tanimoto coefficient: the intersection set size / union set
468
- # size. This is used to find the distance between two vectors.
469
- # >> [1,2,3].cor([2,3,5])
470
- # => 0.981980506061966
471
- # >> [1,2,3].tanimoto_pairs([2,3,5])
472
- # => 0.5
473
- def tanimoto_pairs(other)
474
- intersect(other).size / union(other).size.to_f
475
- end
476
- alias :tanimoto_correlation :tanimoto_pairs
477
-
478
- # Sometimes it just helps to have things spelled out. These are all
479
- # part of the Array class. This means, you have methods that you can't
480
- # run on some kinds of enumerables.
481
-
482
- # All of the left and right hand sides, excluding duplicates.
483
- # "The union of x and y"
484
- def union(other)
485
- other = other.to_a unless other.is_a?(Array)
486
- self | other
487
- end
488
-
489
- # What's shared on the left and right hand sides
490
- # "The intersection of x and y"
491
- def intersect(other)
492
- other = other.to_a unless other.is_a?(Array)
493
- self & other
494
- end
495
-
496
- # Everything on the left hand side except what's shared on the right
497
- # hand side.
498
- # "The relative compliment of y in x"
499
- def compliment(other)
500
- other = other.to_a unless other.is_a?(Array)
501
- self - other
502
- end
503
-
504
- # Everything but what's shared
505
- def exclusive_not(other)
506
- other = other.to_a unless other.is_a?(Array)
507
- (self | other) - (self & other)
508
- end
509
-
510
- # Finds the cartesian product, excluding duplicates items and self-
511
- # referential pairs. Yields the block value if given.
512
- def cartesian_product(other, &block)
513
- x,y = self.uniq.dup, other.uniq.dup
514
- pairs = x.inject([]) do |cp, i|
515
- cp | y.map{|b| i == b ? nil : [i,b]}.compact
516
- end
517
- return pairs unless block_given?
518
- pairs.map{|p| yield p.first, p.last}
519
- end
520
- alias :cp :cartesian_product
521
- alias :permutations :cartesian_product
522
-
523
- # Sigma of pairs. Returns a single float, or whatever object is sent in.
524
- # Example: [1,2,3].sigma_pairs([4,5,6], 0) {|x, y| x + y}
525
- # returns 21 instead of 21.0.
526
- def sigma_pairs(other, z=zero, &block)
527
- self.to_pairs(other,&block).inject(z) {|sum, i| sum += i}
528
- end
529
-
530
- # Returns the Euclidian distance between all points of a set of enumerables
531
- def euclidian_distance(other)
532
- Math.sqrt(self.sigma_pairs(other) {|a, b| (a - b) ** 2})
533
- end
534
-
535
- # Returns a random integer in the range for any number of lists. This
536
- # is a way to get a random vector that is tenable based on the sample
537
- # data. For example, given two sets of numbers:
538
- #
539
- # a = [1,2,3]; b = [8,8,8]
540
- #
541
- # rand_in_pair_range will return a value >= 1 and <= 8 in the first
542
- # place, >= 2 and <= 8 in the second place, and >= 3 and <= 8 in the
543
- # last place.
544
- # Works for integers. Rethink this for floats. May consider setting up
545
- # FixedRange for floats. O(n*5)
546
- def rand_in_range(*args)
547
- min = self.min_of_lists(*args)
548
- max = self.max_of_lists(*args)
549
- (0...size).inject([]) do |ary, i|
550
- ary << rand_between(min[i], max[i])
551
- end
552
- end
553
-
554
- # Finds the correlation between two enumerables.
555
- # Example: [1,2,3].cor [2,3,5]
556
- # returns 0.981980506061966
557
- def correlation(other)
558
- n = [self.size, other.size].min
559
- sum_of_products_of_pairs = self.sigma_pairs(other) {|a, b| a * b}
560
- self_sum = self.sum
561
- other_sum = other.sum
562
- sum_of_squared_self_scores = self.sum { |e| e * e }
563
- sum_of_squared_other_scores = other.sum { |e| e * e }
564
-
565
- numerator = (n * sum_of_products_of_pairs) - (self_sum * other_sum)
566
- self_denominator = ((n * sum_of_squared_self_scores) - (self_sum ** 2))
567
- other_denominator = ((n * sum_of_squared_other_scores) - (other_sum ** 2))
568
- denominator = Math.sqrt(self_denominator * other_denominator)
569
- return numerator / denominator
570
- end
571
- alias :cor :correlation
572
-
573
- # Transposes arrays of arrays and yields a block on the value.
574
- # The regular Array#transpose ignores blocks
575
- def yield_transpose(*enums, &block)
576
- enums.unshift(self)
577
- n = enums.map{ |x| x.size}.min
578
- block ||= lambda{|e| e}
579
- (0...n).map { |i| block.call enums.map{ |x| x[i] } }
580
- end
581
-
582
- # Returns the max of two or more enumerables.
583
- # >> [1,2,3].max_of_lists([0,5,6], [0,2,9])
584
- # => [1, 5, 9]
585
- def max_of_lists(*enums)
586
- yield_transpose(*enums) {|e| e.max}
587
- end
588
-
589
- # Returns the min of two or more enumerables.
590
- # >> [1,2,3].min_of_lists([4,5,6], [0,2,9])
591
- # => [0, 2, 3]
592
- def min_of_lists(*enums)
593
- yield_transpose(*enums) {|e| e.min}
594
- end
595
-
596
- end
597
- end