davidrichards-just_enumerable_stats 0.0.8 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -271,7 +271,7 @@ describe "JustEnumerableStats" do
271
271
 
272
272
  it "should be able to dichotomize a list" do
273
273
  @a.dichotomize(2, :small, :big)
274
- @a.categories.should eql([:small, :big])
274
+ @a.categories.map{|e| e.to_s}.sort.map{|e| e.to_sym}.should eql([:big, :small])
275
275
  @a.category_values[:small].should eql([1,2])
276
276
  @a.category_values[:big].should eql([3])
277
277
  end
@@ -523,14 +523,204 @@ describe "JustEnumerableStats" do
523
523
  a.covariance(b).should eql(0.125)
524
524
  end
525
525
 
526
- it "should be able to return the Pearson correlation" do
527
- a = [1,2,3,4]
528
- b = [3,3,4,3]
529
- a.pearson_correlation(b).should be_close(0.193649167310371, 1.0e-15)
530
- end
531
-
532
526
  it "should be able to force the list into floats" do
533
527
  [1,2,3].to_f!.should eql([1.0, 2.0, 3.0])
534
528
  end
535
529
 
530
+ context "unobstrusive" do
531
+ before do
532
+ @a = BusyClass.new(1,2,3)
533
+ @b = [2,3,1]
534
+ end
535
+
536
+ it "should not use the native max" do
537
+ lambda{@a._jes_max}.should_not raise_error
538
+ end
539
+
540
+ it "should not use the native max_index" do
541
+ lambda{@a._jes_max_index}.should_not raise_error
542
+ end
543
+
544
+ it "should not use the native min" do
545
+ lambda{@a._jes_min}.should_not raise_error
546
+ end
547
+
548
+ it "should not use the native min_index" do
549
+ lambda{@a._jes_min_index}.should_not raise_error
550
+ end
551
+
552
+ it "should not use the native default_block" do
553
+ lambda{@a._jes_default_block}.should_not raise_error
554
+ end
555
+
556
+ it "should not use the native default_block=" do
557
+ lambda{@a._jes_default_block= lambda{|e| 1} }.should_not raise_error
558
+ end
559
+
560
+ it "should not use the native sum" do
561
+ lambda{@a._jes_sum}.should_not raise_error
562
+ end
563
+
564
+ it "should not use the native average" do
565
+ lambda{@a._jes_average}.should_not raise_error
566
+ end
567
+
568
+ it "should not use the native variance" do
569
+ lambda{@a._jes_variance}.should_not raise_error
570
+ end
571
+
572
+ it "should not use the native standard_deviation" do
573
+ lambda{@a._jes_standard_deviation}.should_not raise_error
574
+ end
575
+
576
+ it "should not use the native median" do
577
+ lambda{@a._jes_median}.should_not raise_error
578
+ end
579
+
580
+ it "should not use the native categories" do
581
+ lambda{@a._jes_categories}.should_not raise_error
582
+ end
583
+
584
+ it "should not use the native is_numeric?" do
585
+ lambda{@a._jes_is_numeric?}.should_not raise_error
586
+ end
587
+
588
+ it "should not use the native range" do
589
+ lambda{@a._jes_range}.should_not raise_error
590
+ end
591
+
592
+ it "should not use the native set_range_class" do
593
+ lambda{@a._jes_set_range_class(FixedRange)}.should_not raise_error
594
+ end
595
+
596
+ it "should not use the native set_range" do
597
+ lambda{@a._jes_set_range({:a => 1})}.should_not raise_error
598
+ end
599
+
600
+ it "should not use the native dichotomize" do
601
+ lambda{@a._jes_dichotomize(2, :small, :big)}.should_not raise_error
602
+ end
603
+
604
+ it "should not use the native count_if" do
605
+ lambda{@a._jes_count_if {|e| e == 2}}.should_not raise_error
606
+ end
607
+
608
+ it "should not use the native category_values" do
609
+ lambda{@a._jes_category_values}.should_not raise_error
610
+ end
611
+
612
+ it "should not use the native range_class" do
613
+ lambda{@a._jes_range_class}.should_not raise_error
614
+ end
615
+
616
+ it "should not use the native range_as_range" do
617
+ lambda{@a._jes_range_as_range}.should_not raise_error
618
+ end
619
+
620
+ it "should not use the native new_sort" do
621
+ lambda{@a._jes_new_sort}.should_not raise_error
622
+ end
623
+
624
+ it "should not use the native rank" do
625
+ lambda{@a._jes_rank}.should_not raise_error
626
+ end
627
+
628
+ it "should not use the native order" do
629
+ lambda{@a._jes_order}.should_not raise_error
630
+ end
631
+
632
+ it "should not use the native quantile" do
633
+ lambda{@a._jes_quantile}.should_not raise_error
634
+ end
635
+
636
+ it "should not use the native cum_sum" do
637
+ lambda{@a._jes_cum_sum}.should_not raise_error
638
+ end
639
+
640
+ it "should not use the native cum_prod" do
641
+ lambda{@a._jes_cum_prod}.should_not raise_error
642
+ end
643
+
644
+ it "should not use the native cum_max" do
645
+ lambda{@a._jes_cum_max}.should_not raise_error
646
+ end
647
+
648
+ it "should not use the native cum_min" do
649
+ lambda{@a._jes_cum_min}.should_not raise_error
650
+ end
651
+
652
+ it "should not use the native product" do
653
+ lambda{@a._jes_product}.should_not raise_error
654
+ end
655
+
656
+ it "should not use the native to_pairs" do
657
+ lambda{@a._jes_to_pairs(@b) {|a, b| a}}.should_not raise_error
658
+ end
659
+
660
+ it "should not use the native tanimoto_pairs" do
661
+ lambda{@a._jes_tanimoto_pairs(@b)}.should_not raise_error
662
+ end
663
+
664
+ it "should not use the native union" do
665
+ lambda{@a._jes_union(@b)}.should_not raise_error
666
+ end
667
+
668
+ it "should not use the native intersect" do
669
+ lambda{@a._jes_intersect(@b)}.should_not raise_error
670
+ end
671
+
672
+ it "should not use the native compliment" do
673
+ lambda{@a._jes_compliment(@b)}.should_not raise_error
674
+ end
675
+
676
+ it "should not use the native exclusive_not" do
677
+ lambda{@a._jes_exclusive_not(@b)}.should_not raise_error
678
+ end
679
+
680
+ it "should not use the native cartesian_product" do
681
+ lambda{@a._jes_cartesian_product(@b)}.should_not raise_error
682
+ end
683
+
684
+ it "should not use the native sigma_pairs" do
685
+ lambda{@a._jes_sigma_pairs(@b) {|a, b| a}}.should_not raise_error
686
+ end
687
+
688
+ it "should not use the native euclidian_distance" do
689
+ lambda{@a._jes_euclidian_distance(@b)}.should_not raise_error
690
+ end
691
+
692
+ it "should not use the native rand_in_range" do
693
+ lambda{@a._jes_rand_in_range(1, 2)}.should_not raise_error
694
+ end
695
+
696
+ it "should not use the native correlation" do
697
+ lambda{@a._jes_correlation(@b)}.should_not raise_error
698
+ end
699
+
700
+ it "should not use the native yield_transpose" do
701
+ lambda{@a._jes_yield_transpose(@b)}.should_not raise_error
702
+ end
703
+
704
+ it "should not use the native max_of_lists" do
705
+ lambda{@a._jes_max_of_lists(@b)}.should_not raise_error
706
+ end
707
+
708
+ it "should not use the native min_of_lists" do
709
+ lambda{@a._jes_min_of_lists(@b)}.should_not raise_error
710
+ end
711
+
712
+ it "should not use the native covariance" do
713
+ lambda{@a._jes_covariance(@b)}.should_not raise_error
714
+ end
715
+
716
+ it "should not use the native pearson_correlation" do
717
+ lambda{@a._jes_pearson_correlation(@b)}.should_not raise_error
718
+ end
719
+
720
+ it "should not use the native to_f!" do
721
+ lambda{@a._jes_to_f!}.should_not raise_error
722
+ end
723
+
724
+ end
725
+
536
726
  end
data/spec/spec_helper.rb CHANGED
@@ -6,3 +6,65 @@ require 'just_enumerable_stats'
6
6
  Spec::Runner.configure do |config|
7
7
 
8
8
  end
9
+
10
+ class BusyClass
11
+ include Enumerable
12
+ def initialize(*vals)
13
+ @values = vals
14
+ end
15
+
16
+ def method_missing(sym, *args, &block)
17
+ @values.send(sym, *args, &block)
18
+ end
19
+
20
+ def max(&block); raise ArgumentError, "Should not be called"; end
21
+ def max_index(&block); raise ArgumentError, "Should not be called"; end
22
+ def min(&block); raise ArgumentError, "Should not be called"; end
23
+ def min_index(&block); raise ArgumentError, "Should not be called"; end
24
+ def default_block; raise ArgumentError, "Should not be called"; end
25
+ def default_block=(block); raise ArgumentError, "Should not be called"; end
26
+ def sum; raise ArgumentError, "Should not be called"; end
27
+ def average(&block); raise ArgumentError, "Should not be called"; end
28
+ def variance(&block); raise ArgumentError, "Should not be called"; end
29
+ def standard_deviation(&block); raise ArgumentError, "Should not be called"; end
30
+ def median(ratio=0.5, &block); raise ArgumentError, "Should not be called"; end
31
+ def categories; raise ArgumentError, "Should not be called"; end
32
+ def is_numeric?; raise ArgumentError, "Should not be called"; end
33
+ def range(&block); raise ArgumentError, "Should not be called"; end
34
+ def set_range_class(klass, *args); raise ArgumentError, "Should not be called"; end
35
+ def set_range(hash); raise ArgumentError, "Should not be called"; end
36
+ def dichotomize(split_value, first_label, second_label); raise ArgumentError, "Should not be called"; end
37
+ def count_if(&block); raise ArgumentError, "Should not be called"; end
38
+ def category_values(reset=false); raise ArgumentError, "Should not be called"; end
39
+ def range_class; raise ArgumentError, "Should not be called"; end
40
+ def range_as_range(&block); raise ArgumentError, "Should not be called"; end
41
+ def new_sort(&block); raise ArgumentError, "Should not be called"; end
42
+ def rank(&block); raise ArgumentError, "Should not be called"; end
43
+ def order(&block); raise ArgumentError, "Should not be called"; end
44
+ def quantile(&block); raise ArgumentError, "Should not be called"; end
45
+ def cum_sum(sorted=false, &block); raise ArgumentError, "Should not be called"; end
46
+ def cum_prod(sorted=false, &block); raise ArgumentError, "Should not be called"; end
47
+ def cum_max(&block); raise ArgumentError, "Should not be called"; end
48
+ def cum_min(&block); raise ArgumentError, "Should not be called"; end
49
+ def product; raise ArgumentError, "Should not be called"; end
50
+ def to_pairs(other, &block); raise ArgumentError, "Should not be called"; end
51
+ def tanimoto_pairs(other); raise ArgumentError, "Should not be called"; end
52
+ def union(other); raise ArgumentError, "Should not be called"; end
53
+ def intersect(other); raise ArgumentError, "Should not be called"; end
54
+ def compliment(other); raise ArgumentError, "Should not be called"; end
55
+ def exclusive_not(other); raise ArgumentError, "Should not be called"; end
56
+ def cartesian_product(other, &block); raise ArgumentError, "Should not be called"; end
57
+ def sigma_pairs(other, z=_jes_zero, &block); raise ArgumentError, "Should not be called"; end
58
+ def euclidian_distance(other); raise ArgumentError, "Should not be called"; end
59
+ def rand_in_range(*args); raise ArgumentError, "Should not be called"; end
60
+ def correlation(other); raise ArgumentError, "Should not be called"; end
61
+ def yield_transpose(*enums, &block); raise ArgumentError, "Should not be called"; end
62
+ def max_of_lists(*enums); raise ArgumentError, "Should not be called"; end
63
+ def min_of_lists(*enums); raise ArgumentError, "Should not be called"; end
64
+ def covariance(other); raise ArgumentError, "Should not be called"; end
65
+ def pearson_correlation(other); raise ArgumentError, "Should not be called"; end
66
+ def to_f!; raise ArgumentError, "Should not be called"; end
67
+
68
+ end
69
+
70
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-just_enumerable_stats
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -26,12 +26,8 @@ files:
26
26
  - VERSION.yml
27
27
  - bin/jes
28
28
  - lib/fixed_range.rb
29
- - lib/just_enumerable_stats
30
- - lib/just_enumerable_stats/stats.rb
31
29
  - lib/just_enumerable_stats.rb
32
30
  - spec/fixed_range_spec.rb
33
- - spec/just_enumerable_stats
34
- - spec/just_enumerable_stats/stats_spec.rb
35
31
  - spec/just_enumerable_stats_spec.rb
36
32
  - spec/spec_helper.rb
37
33
  has_rdoc: true
@@ -1,597 +0,0 @@
1
- # This is a namespaced version of the gem, in case you can create a
2
- # container for your data and only include these methods there.
3
- # Example:
4
- class Object
5
-
6
- # Simpler way to handle a random number between to values
7
- def rand_between(a, b)
8
- return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
9
- range = (a - b).abs + 1
10
- rand(range) + [a,b].min
11
- end
12
-
13
- # Handles non-integers
14
- def rand_in_floats(a, b)
15
- range = (a - b).abs
16
- (rand * range) + [a,b].min
17
- end
18
-
19
- end
20
-
21
- module JustEnumerableStats #:nodoc:
22
- module Stats
23
-
24
- # To keep max and min DRY.
25
- def block_sorter(a, b, &block)
26
- if block
27
- val = yield(a, b)
28
- elsif default_block
29
- val = default_block.call(a, b)
30
- else
31
- val = a <=> b
32
- end
33
- end
34
- protected :block_sorter
35
-
36
- # Returns the max, using an optional block.
37
- def max(&block)
38
- self.inject do |best, e|
39
- val = block_sorter(best, e, &block)
40
- best = val > 0 ? best : e
41
- end
42
- end
43
-
44
- # Returns the first index of the max value
45
- def max_index(&block)
46
- self.index(max(&block))
47
- end
48
-
49
- # Min of any number of items
50
- def min(&block)
51
- self.inject do |best, e|
52
- val = block_sorter(best, e, &block)
53
- best = val < 0 ? best : e
54
- end
55
- end
56
-
57
- # Returns the first index of the min value
58
- def min_index(&block)
59
- self.index(min(&block))
60
- end
61
-
62
- # The block called to filter the values in the object.
63
- def default_block
64
- @default_stat_block
65
- end
66
-
67
- # Allows me to setup a block for a series of operations. Example:
68
- # a = [1,2,3]
69
- # a.sum # => 6.0
70
- # a.default_block = lambda{|e| 1 / e}
71
- # a.sum # => 1.0
72
- def default_block=(block)
73
- @default_stat_block = block
74
- end
75
-
76
- # Provides zero in the right class (Numeric or Float)
77
- def zero
78
- any? {|e| e.is_a?(Float)} ? 0.0 : 0
79
- end
80
- protected :zero
81
-
82
- # Provides one in the right class (Numeric or Float)
83
- def one
84
- any? {|e| e.is_a?(Float)} ? 1.0 : 1
85
- end
86
- protected :one
87
-
88
- # Adds up the list. Uses a block or default block if present.
89
- def sum
90
- sum = zero
91
- if block_given?
92
- each{|i| sum += yield(i)}
93
- elsif default_block
94
- each{|i| sum += default_block[*i]}
95
- else
96
- each{|i| sum += i}
97
- end
98
- sum
99
- end
100
-
101
- # The arithmetic mean, uses a block or default block.
102
- def average(&block)
103
- sum(&block)/size
104
- end
105
- alias :mean :average
106
- alias :avg :average
107
-
108
- # The variance, uses a block or default block.
109
- def variance(&block)
110
- m = mean(&block)
111
- sum_of_differences = if block_given?
112
- sum{ |i| j=yield(i); (m - j) ** 2 }
113
- elsif default_block
114
- sum{ |i| j=default_block[*i]; (m - j) ** 2 }
115
- else
116
- sum{ |i| (m - i) ** 2 }
117
- end
118
- sum_of_differences / (size - 1)
119
- end
120
- alias :var :variance
121
-
122
- # The standard deviation. Uses a block or default block.
123
- def standard_deviation(&block)
124
- Math::sqrt(variance(&block))
125
- end
126
- alias :std :standard_deviation
127
-
128
- # The slow way is to iterate up to the middle point. A faster way is to
129
- # use the index, when available. If a block is supplied, always iterate
130
- # to the middle point.
131
- def median(ratio=0.5, &block)
132
- return iterate_midway(ratio, &block) if block_given?
133
- begin
134
- mid1, mid2 = middle_two
135
- sorted = new_sort
136
- med1, med2 = sorted[mid1], sorted[mid2]
137
- return med1 if med1 == med2
138
- return med1 + ((med2 - med1) * ratio)
139
- rescue
140
- iterate_midway(ratio, &block)
141
- end
142
- end
143
-
144
- def middle_two
145
- mid2 = size.div(2)
146
- mid1 = (size % 2 == 0) ? mid2 - 1 : mid2
147
- return mid1, mid2
148
- end
149
- protected :middle_two
150
-
151
- def median_position
152
- middle_two.last
153
- end
154
- protected :median_position
155
-
156
- def first_half(&block)
157
- fh = self[0..median_position].dup
158
- end
159
- protected :first_half
160
-
161
- def second_half(&block)
162
- # Total crap, but it's the way R does things, and this will most likely
163
- # only be used to feed R some numbers to plot, if at all.
164
- sh = size <= 5 ? self[median_position..-1].dup : self[median_position - 1..-1].dup
165
- end
166
- protected :second_half
167
-
168
- # An iterative version of median
169
- def iterate_midway(ratio, &block)
170
- mid1, mid2, last_value, j, sorted, sort1, sort2 = middle_two, nil, 0, new_sort, nil, nil
171
-
172
- if block_given?
173
- sorted.each do |i|
174
- last_value = yield(i)
175
- j += 1
176
- sort1 = last_value if j == mid1
177
- sort2 = last_value if j == mid2
178
- break if j >= mid2
179
- end
180
- elsif default_block
181
- sorted.each do |i|
182
- last_value = default_block[*i]
183
- j += 1
184
- sort1 = last_value if j == mid1
185
- sort2 = last_value if j == mid2
186
- break if j >= mid2
187
- end
188
- else
189
- sorted.each do |i|
190
- last_value = i
191
- sort1 = last_value if j == mid1
192
- sort2 = last_value if j == mid2
193
- j += 1
194
- break if j >= mid2
195
- end
196
- end
197
- return med1 if med1 == med2
198
- return med1 + ((med2 - med1) * ratio)
199
- end
200
- protected :iterate_midway
201
-
202
- # Takes the range_class and returns its map.
203
- # Example:
204
- # require 'mathn'
205
- # a = [1,2,3]
206
- # a
207
- # range_class = FixedRange, a.min, a.max, 1/4
208
- # a.categories
209
- # => [1, 5/4, 3/2, 7/4, 2, 9/4, 5/2, 11/4, 3]
210
- # For non-numeric values, returns a unique set,
211
- # ordered if possible.
212
- def categories
213
- if @categories
214
- @categories
215
- elsif self.is_numeric?
216
- self.range_instance.map
217
- else
218
- self.uniq.sort rescue self.uniq
219
- end
220
- end
221
-
222
- def is_numeric?
223
- self.all? {|e| e.is_a?(Numeric)}
224
- end
225
-
226
- # Just an array of [min, max] to comply with R uses of the work. Use
227
- # range_as_range if you want a real Range.
228
- def range(&block)
229
- [min(&block), max(&block)]
230
- end
231
-
232
- # Useful for setting a real range class (FixedRange).
233
- def set_range_class(klass, *args)
234
- @range_class = klass
235
- @range_class_args = args
236
- self.range_class
237
- end
238
-
239
- # Takes a hash of arrays for categories
240
- # If Facets happens to be loaded on the computer, this keeps the order
241
- # of the categories straight.
242
- def set_range(hash)
243
- if defined?(Dictionary)
244
- @range_hash = Dictionary.new
245
- @range_hash.merge!(hash)
246
- @categories = @range_hash.keys
247
- else
248
- @categories = hash.keys
249
- @range_hash = hash
250
- end
251
- @categories
252
- end
253
-
254
- # The hash of lambdas that are used to categorize the enumerable.
255
- attr_reader :range_hash
256
-
257
- # The arguments needed to instantiate the custom-defined range class.
258
- attr_reader :range_class_args
259
-
260
- # Splits the values in two, <= the value and > the value.
261
- def dichotomize(split_value, first_label, second_label)
262
- set_range({
263
- first_label => lambda{|e| e <= split_value},
264
- second_label => lambda{|e| e > split_value}
265
- })
266
- end
267
-
268
- # Counts each element where the block evaluates to true
269
- # Example:
270
- # a = [1,2,3]
271
- # a.count_if {|e| e % 2 == 0}
272
- def count_if(&block)
273
- self.inject(0) do |s, e|
274
- s += 1 if block.call(e)
275
- s
276
- end
277
- end
278
-
279
- # Returns a Hash or Dictionary (if available) for each category with a
280
- # value as the set of matching values as an array.
281
- # Because this is supposed to be lean (just enumerables), but this is an
282
- # expensive call, I'm going to cache it and offer a parameter to reset
283
- # the cache. So, call category_values(true) if you need to reset the
284
- # cache.
285
- def category_values(reset=false)
286
- @category_values = nil if reset
287
- return @category_values if @category_values
288
- container = defined?(Dictionary) ? Dictionary.new : Hash.new
289
- if self.range_hash
290
- @category_values = self.categories.inject(container) do |cont, cat|
291
- cont[cat] = self.find_all &self.range_hash[cat]
292
- cont
293
- end
294
- else
295
- @category_values = self.categories.inject(container) do |cont, cat|
296
- cont[cat] = self.find_all {|e| e == cat}
297
- cont
298
- end
299
- end
300
- end
301
-
302
- # When creating a range, what class will it be? Defaults to Range, but
303
- # other classes are sometimes useful.
304
- def range_class
305
- @range_class ||= Range
306
- end
307
-
308
- # Actually instantiates the range, instead of producing a min and max array.
309
- def range_as_range(&block)
310
- if @range_class_args and not @range_class_args.empty?
311
- self.range_class.new(*@range_class_args)
312
- else
313
- self.range_class.new(min(&block), max(&block))
314
- end
315
- end
316
- alias :range_instance :range_as_range
317
-
318
- # I don't pass the block to the sort, because a sort block needs to look
319
- # something like: {|x,y| x <=> y}. To get around this, set the default
320
- # block on the object.
321
- def new_sort(&block)
322
- if block_given?
323
- map { |i| yield(i) }.sort.dup
324
- elsif default_block
325
- map { |i| default_block[*i] }.sort.dup
326
- else
327
- sort().dup
328
- end
329
- end
330
-
331
- # Doesn't overwrite things like Matrix#rank
332
- def rank(&block)
333
-
334
- sorted = new_sort(&block)
335
-
336
- if block_given?
337
- map { |i| sorted.index(yield(i)) + 1 }
338
- elsif default_block
339
- map { |i| sorted.index(default_block[*i]) + 1 }
340
- else
341
- map { |i| sorted.index(i) + 1 }
342
- end
343
-
344
- end unless defined?(rank)
345
-
346
- # Given values like [10,5,5,1]
347
- # Rank should produce something like [4,2,2,1]
348
- # And order should produce something like [4,2,3,1]
349
- # The trick is that rank skips as many as were duplicated, so there
350
- # could not be a 3 in the rank from the example above.
351
- def order(&block)
352
- hold = []
353
- rank(&block).each do |x|
354
- while hold.include?(x) do
355
- x += 1
356
- end
357
- hold << x
358
- end
359
- hold
360
- end
361
-
362
- # First quartile: nth_split_by_m(1, 4)
363
- # Third quartile: nth_split_by_m(3, 4)
364
- # Median: nth_split_by_m(1, 2)
365
- # Doesn't match R, and it's silly to try to.
366
- # def nth_split_by_m(n, m)
367
- # sorted = new_sort
368
- # dividers = m - 1
369
- # if size % m == dividers # Divides evenly
370
- # # Because we have a 0-based list, we get the floor
371
- # i = ((size / m.to_f) * n).floor
372
- # j = i
373
- # else
374
- # # This reflects R's approach, which I don't think I agree with.
375
- # i = (((size / m.to_f) * n) - 1)
376
- # i = i > (size / m.to_f) ? i.floor : i.ceil
377
- # j = i + 1
378
- # end
379
- # sorted[i] + ((n / m.to_f) * (sorted[j] - sorted[i]))
380
- # end
381
- def quantile(&block)
382
- [
383
- min(&block),
384
- first_half(&block).median(0.25, &block),
385
- median(&block),
386
- second_half(&block).median(0.75, &block),
387
- max(&block)
388
- ]
389
- end
390
-
391
- # The cummulative sum. Example:
392
- # [1,2,3].cum_sum # => [1, 3, 6]
393
- def cum_sum(sorted=false, &block)
394
- sum = zero
395
- obj = sorted ? self.new_sort : self
396
- if block_given?
397
- obj.map { |i| sum += yield(i) }
398
- elsif default_block
399
- obj.map { |i| sum += default_block[*i] }
400
- else
401
- obj.map { |i| sum += i }
402
- end
403
- end
404
- alias :cumulative_sum :cum_sum
405
-
406
- # The cummulative product. Example:
407
- # [1,2,3].cum_prod # => [1.0, 2.0, 6.0]
408
- def cum_prod(sorted=false, &block)
409
- prod = one
410
- obj = sorted ? self.new_sort : self
411
- if block_given?
412
- obj.map { |i| prod *= yield(i) }
413
- elsif default_block
414
- obj.map { |i| prod *= default_block[*i] }
415
- else
416
- obj.map { |i| prod *= i }
417
- end
418
- end
419
- alias :cumulative_product :cum_prod
420
-
421
- # Used to preprocess the list
422
- def morph_list(&block)
423
- if block
424
- self.map{ |e| block.call(e) }
425
- elsif self.default_block
426
- self.map{ |e| self.default_block.call(e) }
427
- else
428
- self
429
- end
430
- end
431
- protected :morph_list
432
-
433
- # Example:
434
- # [1,2,3,0,5].cum_max # => [1,2,3,3,5]
435
- def cum_max(&block)
436
- morph_list(&block).inject([]) do |list, e|
437
- found = (list | [e]).max
438
- list << (found ? found : e)
439
- end
440
- end
441
- alias :cumulative_max :cum_max
442
-
443
- # Example:
444
- # [1,2,3,0,5].cum_min # => [1,1,1,0,0]
445
- def cum_min(&block)
446
- morph_list(&block).inject([]) do |list, e|
447
- found = (list | [e]).min
448
- list << (found ? found : e)
449
- end
450
- end
451
- alias :cumulative_min :cum_min
452
-
453
- # Multiplies the values:
454
- # >> product(1,2,3)
455
- # => 6.0
456
- def product
457
- self.inject(one) {|sum, a| sum *= a}
458
- end
459
-
460
- # There are going to be a lot more of these kinds of things, so pay
461
- # attention.
462
- def to_pairs(other, &block)
463
- n = [self.size, other.size].min
464
- (0...n).map {|i| block.call(self[i], other[i]) }
465
- end
466
-
467
- # Finds the tanimoto coefficient: the intersection set size / union set
468
- # size. This is used to find the distance between two vectors.
469
- # >> [1,2,3].cor([2,3,5])
470
- # => 0.981980506061966
471
- # >> [1,2,3].tanimoto_pairs([2,3,5])
472
- # => 0.5
473
- def tanimoto_pairs(other)
474
- intersect(other).size / union(other).size.to_f
475
- end
476
- alias :tanimoto_correlation :tanimoto_pairs
477
-
478
- # Sometimes it just helps to have things spelled out. These are all
479
- # part of the Array class. This means, you have methods that you can't
480
- # run on some kinds of enumerables.
481
-
482
- # All of the left and right hand sides, excluding duplicates.
483
- # "The union of x and y"
484
- def union(other)
485
- other = other.to_a unless other.is_a?(Array)
486
- self | other
487
- end
488
-
489
- # What's shared on the left and right hand sides
490
- # "The intersection of x and y"
491
- def intersect(other)
492
- other = other.to_a unless other.is_a?(Array)
493
- self & other
494
- end
495
-
496
- # Everything on the left hand side except what's shared on the right
497
- # hand side.
498
- # "The relative compliment of y in x"
499
- def compliment(other)
500
- other = other.to_a unless other.is_a?(Array)
501
- self - other
502
- end
503
-
504
- # Everything but what's shared
505
- def exclusive_not(other)
506
- other = other.to_a unless other.is_a?(Array)
507
- (self | other) - (self & other)
508
- end
509
-
510
- # Finds the cartesian product, excluding duplicates items and self-
511
- # referential pairs. Yields the block value if given.
512
- def cartesian_product(other, &block)
513
- x,y = self.uniq.dup, other.uniq.dup
514
- pairs = x.inject([]) do |cp, i|
515
- cp | y.map{|b| i == b ? nil : [i,b]}.compact
516
- end
517
- return pairs unless block_given?
518
- pairs.map{|p| yield p.first, p.last}
519
- end
520
- alias :cp :cartesian_product
521
- alias :permutations :cartesian_product
522
-
523
- # Sigma of pairs. Returns a single float, or whatever object is sent in.
524
- # Example: [1,2,3].sigma_pairs([4,5,6], 0) {|x, y| x + y}
525
- # returns 21 instead of 21.0.
526
- def sigma_pairs(other, z=zero, &block)
527
- self.to_pairs(other,&block).inject(z) {|sum, i| sum += i}
528
- end
529
-
530
- # Returns the Euclidian distance between all points of a set of enumerables
531
- def euclidian_distance(other)
532
- Math.sqrt(self.sigma_pairs(other) {|a, b| (a - b) ** 2})
533
- end
534
-
535
- # Returns a random integer in the range for any number of lists. This
536
- # is a way to get a random vector that is tenable based on the sample
537
- # data. For example, given two sets of numbers:
538
- #
539
- # a = [1,2,3]; b = [8,8,8]
540
- #
541
- # rand_in_pair_range will return a value >= 1 and <= 8 in the first
542
- # place, >= 2 and <= 8 in the second place, and >= 3 and <= 8 in the
543
- # last place.
544
- # Works for integers. Rethink this for floats. May consider setting up
545
- # FixedRange for floats. O(n*5)
546
- def rand_in_range(*args)
547
- min = self.min_of_lists(*args)
548
- max = self.max_of_lists(*args)
549
- (0...size).inject([]) do |ary, i|
550
- ary << rand_between(min[i], max[i])
551
- end
552
- end
553
-
554
- # Finds the correlation between two enumerables.
555
- # Example: [1,2,3].cor [2,3,5]
556
- # returns 0.981980506061966
557
- def correlation(other)
558
- n = [self.size, other.size].min
559
- sum_of_products_of_pairs = self.sigma_pairs(other) {|a, b| a * b}
560
- self_sum = self.sum
561
- other_sum = other.sum
562
- sum_of_squared_self_scores = self.sum { |e| e * e }
563
- sum_of_squared_other_scores = other.sum { |e| e * e }
564
-
565
- numerator = (n * sum_of_products_of_pairs) - (self_sum * other_sum)
566
- self_denominator = ((n * sum_of_squared_self_scores) - (self_sum ** 2))
567
- other_denominator = ((n * sum_of_squared_other_scores) - (other_sum ** 2))
568
- denominator = Math.sqrt(self_denominator * other_denominator)
569
- return numerator / denominator
570
- end
571
- alias :cor :correlation
572
-
573
- # Transposes arrays of arrays and yields a block on the value.
574
- # The regular Array#transpose ignores blocks
575
- def yield_transpose(*enums, &block)
576
- enums.unshift(self)
577
- n = enums.map{ |x| x.size}.min
578
- block ||= lambda{|e| e}
579
- (0...n).map { |i| block.call enums.map{ |x| x[i] } }
580
- end
581
-
582
- # Returns the max of two or more enumerables.
583
- # >> [1,2,3].max_of_lists([0,5,6], [0,2,9])
584
- # => [1, 5, 9]
585
- def max_of_lists(*enums)
586
- yield_transpose(*enums) {|e| e.max}
587
- end
588
-
589
- # Returns the min of two or more enumerables.
590
- # >> [1,2,3].min_of_lists([4,5,6], [0,2,9])
591
- # => [0, 2, 3]
592
- def min_of_lists(*enums)
593
- yield_transpose(*enums) {|e| e.min}
594
- end
595
-
596
- end
597
- end