davidrichards-just_enumerable_stats 0.0.8 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +7 -31
- data/VERSION.yml +1 -1
- data/lib/just_enumerable_stats.rb +261 -190
- data/spec/just_enumerable_stats_spec.rb +197 -7
- data/spec/spec_helper.rb +62 -0
- metadata +1 -5
- data/lib/just_enumerable_stats/stats.rb +0 -597
- data/spec/just_enumerable_stats/stats_spec.rb +0 -534
@@ -271,7 +271,7 @@ describe "JustEnumerableStats" do
|
|
271
271
|
|
272
272
|
it "should be able to dichotomize a list" do
|
273
273
|
@a.dichotomize(2, :small, :big)
|
274
|
-
@a.categories.should eql([:
|
274
|
+
@a.categories.map{|e| e.to_s}.sort.map{|e| e.to_sym}.should eql([:big, :small])
|
275
275
|
@a.category_values[:small].should eql([1,2])
|
276
276
|
@a.category_values[:big].should eql([3])
|
277
277
|
end
|
@@ -523,14 +523,204 @@ describe "JustEnumerableStats" do
|
|
523
523
|
a.covariance(b).should eql(0.125)
|
524
524
|
end
|
525
525
|
|
526
|
-
it "should be able to return the Pearson correlation" do
|
527
|
-
a = [1,2,3,4]
|
528
|
-
b = [3,3,4,3]
|
529
|
-
a.pearson_correlation(b).should be_close(0.193649167310371, 1.0e-15)
|
530
|
-
end
|
531
|
-
|
532
526
|
it "should be able to force the list into floats" do
|
533
527
|
[1,2,3].to_f!.should eql([1.0, 2.0, 3.0])
|
534
528
|
end
|
535
529
|
|
530
|
+
context "unobstrusive" do
|
531
|
+
before do
|
532
|
+
@a = BusyClass.new(1,2,3)
|
533
|
+
@b = [2,3,1]
|
534
|
+
end
|
535
|
+
|
536
|
+
it "should not use the native max" do
|
537
|
+
lambda{@a._jes_max}.should_not raise_error
|
538
|
+
end
|
539
|
+
|
540
|
+
it "should not use the native max_index" do
|
541
|
+
lambda{@a._jes_max_index}.should_not raise_error
|
542
|
+
end
|
543
|
+
|
544
|
+
it "should not use the native min" do
|
545
|
+
lambda{@a._jes_min}.should_not raise_error
|
546
|
+
end
|
547
|
+
|
548
|
+
it "should not use the native min_index" do
|
549
|
+
lambda{@a._jes_min_index}.should_not raise_error
|
550
|
+
end
|
551
|
+
|
552
|
+
it "should not use the native default_block" do
|
553
|
+
lambda{@a._jes_default_block}.should_not raise_error
|
554
|
+
end
|
555
|
+
|
556
|
+
it "should not use the native default_block=" do
|
557
|
+
lambda{@a._jes_default_block= lambda{|e| 1} }.should_not raise_error
|
558
|
+
end
|
559
|
+
|
560
|
+
it "should not use the native sum" do
|
561
|
+
lambda{@a._jes_sum}.should_not raise_error
|
562
|
+
end
|
563
|
+
|
564
|
+
it "should not use the native average" do
|
565
|
+
lambda{@a._jes_average}.should_not raise_error
|
566
|
+
end
|
567
|
+
|
568
|
+
it "should not use the native variance" do
|
569
|
+
lambda{@a._jes_variance}.should_not raise_error
|
570
|
+
end
|
571
|
+
|
572
|
+
it "should not use the native standard_deviation" do
|
573
|
+
lambda{@a._jes_standard_deviation}.should_not raise_error
|
574
|
+
end
|
575
|
+
|
576
|
+
it "should not use the native median" do
|
577
|
+
lambda{@a._jes_median}.should_not raise_error
|
578
|
+
end
|
579
|
+
|
580
|
+
it "should not use the native categories" do
|
581
|
+
lambda{@a._jes_categories}.should_not raise_error
|
582
|
+
end
|
583
|
+
|
584
|
+
it "should not use the native is_numeric?" do
|
585
|
+
lambda{@a._jes_is_numeric?}.should_not raise_error
|
586
|
+
end
|
587
|
+
|
588
|
+
it "should not use the native range" do
|
589
|
+
lambda{@a._jes_range}.should_not raise_error
|
590
|
+
end
|
591
|
+
|
592
|
+
it "should not use the native set_range_class" do
|
593
|
+
lambda{@a._jes_set_range_class(FixedRange)}.should_not raise_error
|
594
|
+
end
|
595
|
+
|
596
|
+
it "should not use the native set_range" do
|
597
|
+
lambda{@a._jes_set_range({:a => 1})}.should_not raise_error
|
598
|
+
end
|
599
|
+
|
600
|
+
it "should not use the native dichotomize" do
|
601
|
+
lambda{@a._jes_dichotomize(2, :small, :big)}.should_not raise_error
|
602
|
+
end
|
603
|
+
|
604
|
+
it "should not use the native count_if" do
|
605
|
+
lambda{@a._jes_count_if {|e| e == 2}}.should_not raise_error
|
606
|
+
end
|
607
|
+
|
608
|
+
it "should not use the native category_values" do
|
609
|
+
lambda{@a._jes_category_values}.should_not raise_error
|
610
|
+
end
|
611
|
+
|
612
|
+
it "should not use the native range_class" do
|
613
|
+
lambda{@a._jes_range_class}.should_not raise_error
|
614
|
+
end
|
615
|
+
|
616
|
+
it "should not use the native range_as_range" do
|
617
|
+
lambda{@a._jes_range_as_range}.should_not raise_error
|
618
|
+
end
|
619
|
+
|
620
|
+
it "should not use the native new_sort" do
|
621
|
+
lambda{@a._jes_new_sort}.should_not raise_error
|
622
|
+
end
|
623
|
+
|
624
|
+
it "should not use the native rank" do
|
625
|
+
lambda{@a._jes_rank}.should_not raise_error
|
626
|
+
end
|
627
|
+
|
628
|
+
it "should not use the native order" do
|
629
|
+
lambda{@a._jes_order}.should_not raise_error
|
630
|
+
end
|
631
|
+
|
632
|
+
it "should not use the native quantile" do
|
633
|
+
lambda{@a._jes_quantile}.should_not raise_error
|
634
|
+
end
|
635
|
+
|
636
|
+
it "should not use the native cum_sum" do
|
637
|
+
lambda{@a._jes_cum_sum}.should_not raise_error
|
638
|
+
end
|
639
|
+
|
640
|
+
it "should not use the native cum_prod" do
|
641
|
+
lambda{@a._jes_cum_prod}.should_not raise_error
|
642
|
+
end
|
643
|
+
|
644
|
+
it "should not use the native cum_max" do
|
645
|
+
lambda{@a._jes_cum_max}.should_not raise_error
|
646
|
+
end
|
647
|
+
|
648
|
+
it "should not use the native cum_min" do
|
649
|
+
lambda{@a._jes_cum_min}.should_not raise_error
|
650
|
+
end
|
651
|
+
|
652
|
+
it "should not use the native product" do
|
653
|
+
lambda{@a._jes_product}.should_not raise_error
|
654
|
+
end
|
655
|
+
|
656
|
+
it "should not use the native to_pairs" do
|
657
|
+
lambda{@a._jes_to_pairs(@b) {|a, b| a}}.should_not raise_error
|
658
|
+
end
|
659
|
+
|
660
|
+
it "should not use the native tanimoto_pairs" do
|
661
|
+
lambda{@a._jes_tanimoto_pairs(@b)}.should_not raise_error
|
662
|
+
end
|
663
|
+
|
664
|
+
it "should not use the native union" do
|
665
|
+
lambda{@a._jes_union(@b)}.should_not raise_error
|
666
|
+
end
|
667
|
+
|
668
|
+
it "should not use the native intersect" do
|
669
|
+
lambda{@a._jes_intersect(@b)}.should_not raise_error
|
670
|
+
end
|
671
|
+
|
672
|
+
it "should not use the native compliment" do
|
673
|
+
lambda{@a._jes_compliment(@b)}.should_not raise_error
|
674
|
+
end
|
675
|
+
|
676
|
+
it "should not use the native exclusive_not" do
|
677
|
+
lambda{@a._jes_exclusive_not(@b)}.should_not raise_error
|
678
|
+
end
|
679
|
+
|
680
|
+
it "should not use the native cartesian_product" do
|
681
|
+
lambda{@a._jes_cartesian_product(@b)}.should_not raise_error
|
682
|
+
end
|
683
|
+
|
684
|
+
it "should not use the native sigma_pairs" do
|
685
|
+
lambda{@a._jes_sigma_pairs(@b) {|a, b| a}}.should_not raise_error
|
686
|
+
end
|
687
|
+
|
688
|
+
it "should not use the native euclidian_distance" do
|
689
|
+
lambda{@a._jes_euclidian_distance(@b)}.should_not raise_error
|
690
|
+
end
|
691
|
+
|
692
|
+
it "should not use the native rand_in_range" do
|
693
|
+
lambda{@a._jes_rand_in_range(1, 2)}.should_not raise_error
|
694
|
+
end
|
695
|
+
|
696
|
+
it "should not use the native correlation" do
|
697
|
+
lambda{@a._jes_correlation(@b)}.should_not raise_error
|
698
|
+
end
|
699
|
+
|
700
|
+
it "should not use the native yield_transpose" do
|
701
|
+
lambda{@a._jes_yield_transpose(@b)}.should_not raise_error
|
702
|
+
end
|
703
|
+
|
704
|
+
it "should not use the native max_of_lists" do
|
705
|
+
lambda{@a._jes_max_of_lists(@b)}.should_not raise_error
|
706
|
+
end
|
707
|
+
|
708
|
+
it "should not use the native min_of_lists" do
|
709
|
+
lambda{@a._jes_min_of_lists(@b)}.should_not raise_error
|
710
|
+
end
|
711
|
+
|
712
|
+
it "should not use the native covariance" do
|
713
|
+
lambda{@a._jes_covariance(@b)}.should_not raise_error
|
714
|
+
end
|
715
|
+
|
716
|
+
it "should not use the native pearson_correlation" do
|
717
|
+
lambda{@a._jes_pearson_correlation(@b)}.should_not raise_error
|
718
|
+
end
|
719
|
+
|
720
|
+
it "should not use the native to_f!" do
|
721
|
+
lambda{@a._jes_to_f!}.should_not raise_error
|
722
|
+
end
|
723
|
+
|
724
|
+
end
|
725
|
+
|
536
726
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -6,3 +6,65 @@ require 'just_enumerable_stats'
|
|
6
6
|
Spec::Runner.configure do |config|
|
7
7
|
|
8
8
|
end
|
9
|
+
|
10
|
+
class BusyClass
|
11
|
+
include Enumerable
|
12
|
+
def initialize(*vals)
|
13
|
+
@values = vals
|
14
|
+
end
|
15
|
+
|
16
|
+
def method_missing(sym, *args, &block)
|
17
|
+
@values.send(sym, *args, &block)
|
18
|
+
end
|
19
|
+
|
20
|
+
def max(&block); raise ArgumentError, "Should not be called"; end
|
21
|
+
def max_index(&block); raise ArgumentError, "Should not be called"; end
|
22
|
+
def min(&block); raise ArgumentError, "Should not be called"; end
|
23
|
+
def min_index(&block); raise ArgumentError, "Should not be called"; end
|
24
|
+
def default_block; raise ArgumentError, "Should not be called"; end
|
25
|
+
def default_block=(block); raise ArgumentError, "Should not be called"; end
|
26
|
+
def sum; raise ArgumentError, "Should not be called"; end
|
27
|
+
def average(&block); raise ArgumentError, "Should not be called"; end
|
28
|
+
def variance(&block); raise ArgumentError, "Should not be called"; end
|
29
|
+
def standard_deviation(&block); raise ArgumentError, "Should not be called"; end
|
30
|
+
def median(ratio=0.5, &block); raise ArgumentError, "Should not be called"; end
|
31
|
+
def categories; raise ArgumentError, "Should not be called"; end
|
32
|
+
def is_numeric?; raise ArgumentError, "Should not be called"; end
|
33
|
+
def range(&block); raise ArgumentError, "Should not be called"; end
|
34
|
+
def set_range_class(klass, *args); raise ArgumentError, "Should not be called"; end
|
35
|
+
def set_range(hash); raise ArgumentError, "Should not be called"; end
|
36
|
+
def dichotomize(split_value, first_label, second_label); raise ArgumentError, "Should not be called"; end
|
37
|
+
def count_if(&block); raise ArgumentError, "Should not be called"; end
|
38
|
+
def category_values(reset=false); raise ArgumentError, "Should not be called"; end
|
39
|
+
def range_class; raise ArgumentError, "Should not be called"; end
|
40
|
+
def range_as_range(&block); raise ArgumentError, "Should not be called"; end
|
41
|
+
def new_sort(&block); raise ArgumentError, "Should not be called"; end
|
42
|
+
def rank(&block); raise ArgumentError, "Should not be called"; end
|
43
|
+
def order(&block); raise ArgumentError, "Should not be called"; end
|
44
|
+
def quantile(&block); raise ArgumentError, "Should not be called"; end
|
45
|
+
def cum_sum(sorted=false, &block); raise ArgumentError, "Should not be called"; end
|
46
|
+
def cum_prod(sorted=false, &block); raise ArgumentError, "Should not be called"; end
|
47
|
+
def cum_max(&block); raise ArgumentError, "Should not be called"; end
|
48
|
+
def cum_min(&block); raise ArgumentError, "Should not be called"; end
|
49
|
+
def product; raise ArgumentError, "Should not be called"; end
|
50
|
+
def to_pairs(other, &block); raise ArgumentError, "Should not be called"; end
|
51
|
+
def tanimoto_pairs(other); raise ArgumentError, "Should not be called"; end
|
52
|
+
def union(other); raise ArgumentError, "Should not be called"; end
|
53
|
+
def intersect(other); raise ArgumentError, "Should not be called"; end
|
54
|
+
def compliment(other); raise ArgumentError, "Should not be called"; end
|
55
|
+
def exclusive_not(other); raise ArgumentError, "Should not be called"; end
|
56
|
+
def cartesian_product(other, &block); raise ArgumentError, "Should not be called"; end
|
57
|
+
def sigma_pairs(other, z=_jes_zero, &block); raise ArgumentError, "Should not be called"; end
|
58
|
+
def euclidian_distance(other); raise ArgumentError, "Should not be called"; end
|
59
|
+
def rand_in_range(*args); raise ArgumentError, "Should not be called"; end
|
60
|
+
def correlation(other); raise ArgumentError, "Should not be called"; end
|
61
|
+
def yield_transpose(*enums, &block); raise ArgumentError, "Should not be called"; end
|
62
|
+
def max_of_lists(*enums); raise ArgumentError, "Should not be called"; end
|
63
|
+
def min_of_lists(*enums); raise ArgumentError, "Should not be called"; end
|
64
|
+
def covariance(other); raise ArgumentError, "Should not be called"; end
|
65
|
+
def pearson_correlation(other); raise ArgumentError, "Should not be called"; end
|
66
|
+
def to_f!; raise ArgumentError, "Should not be called"; end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-just_enumerable_stats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -26,12 +26,8 @@ files:
|
|
26
26
|
- VERSION.yml
|
27
27
|
- bin/jes
|
28
28
|
- lib/fixed_range.rb
|
29
|
-
- lib/just_enumerable_stats
|
30
|
-
- lib/just_enumerable_stats/stats.rb
|
31
29
|
- lib/just_enumerable_stats.rb
|
32
30
|
- spec/fixed_range_spec.rb
|
33
|
-
- spec/just_enumerable_stats
|
34
|
-
- spec/just_enumerable_stats/stats_spec.rb
|
35
31
|
- spec/just_enumerable_stats_spec.rb
|
36
32
|
- spec/spec_helper.rb
|
37
33
|
has_rdoc: true
|
@@ -1,597 +0,0 @@
|
|
1
|
-
# This is a namespaced version of the gem, in case you can create a
|
2
|
-
# container for your data and only include these methods there.
|
3
|
-
# Example:
|
4
|
-
class Object
|
5
|
-
|
6
|
-
# Simpler way to handle a random number between to values
|
7
|
-
def rand_between(a, b)
|
8
|
-
return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
|
9
|
-
range = (a - b).abs + 1
|
10
|
-
rand(range) + [a,b].min
|
11
|
-
end
|
12
|
-
|
13
|
-
# Handles non-integers
|
14
|
-
def rand_in_floats(a, b)
|
15
|
-
range = (a - b).abs
|
16
|
-
(rand * range) + [a,b].min
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
module JustEnumerableStats #:nodoc:
|
22
|
-
module Stats
|
23
|
-
|
24
|
-
# To keep max and min DRY.
|
25
|
-
def block_sorter(a, b, &block)
|
26
|
-
if block
|
27
|
-
val = yield(a, b)
|
28
|
-
elsif default_block
|
29
|
-
val = default_block.call(a, b)
|
30
|
-
else
|
31
|
-
val = a <=> b
|
32
|
-
end
|
33
|
-
end
|
34
|
-
protected :block_sorter
|
35
|
-
|
36
|
-
# Returns the max, using an optional block.
|
37
|
-
def max(&block)
|
38
|
-
self.inject do |best, e|
|
39
|
-
val = block_sorter(best, e, &block)
|
40
|
-
best = val > 0 ? best : e
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
# Returns the first index of the max value
|
45
|
-
def max_index(&block)
|
46
|
-
self.index(max(&block))
|
47
|
-
end
|
48
|
-
|
49
|
-
# Min of any number of items
|
50
|
-
def min(&block)
|
51
|
-
self.inject do |best, e|
|
52
|
-
val = block_sorter(best, e, &block)
|
53
|
-
best = val < 0 ? best : e
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# Returns the first index of the min value
|
58
|
-
def min_index(&block)
|
59
|
-
self.index(min(&block))
|
60
|
-
end
|
61
|
-
|
62
|
-
# The block called to filter the values in the object.
|
63
|
-
def default_block
|
64
|
-
@default_stat_block
|
65
|
-
end
|
66
|
-
|
67
|
-
# Allows me to setup a block for a series of operations. Example:
|
68
|
-
# a = [1,2,3]
|
69
|
-
# a.sum # => 6.0
|
70
|
-
# a.default_block = lambda{|e| 1 / e}
|
71
|
-
# a.sum # => 1.0
|
72
|
-
def default_block=(block)
|
73
|
-
@default_stat_block = block
|
74
|
-
end
|
75
|
-
|
76
|
-
# Provides zero in the right class (Numeric or Float)
|
77
|
-
def zero
|
78
|
-
any? {|e| e.is_a?(Float)} ? 0.0 : 0
|
79
|
-
end
|
80
|
-
protected :zero
|
81
|
-
|
82
|
-
# Provides one in the right class (Numeric or Float)
|
83
|
-
def one
|
84
|
-
any? {|e| e.is_a?(Float)} ? 1.0 : 1
|
85
|
-
end
|
86
|
-
protected :one
|
87
|
-
|
88
|
-
# Adds up the list. Uses a block or default block if present.
|
89
|
-
def sum
|
90
|
-
sum = zero
|
91
|
-
if block_given?
|
92
|
-
each{|i| sum += yield(i)}
|
93
|
-
elsif default_block
|
94
|
-
each{|i| sum += default_block[*i]}
|
95
|
-
else
|
96
|
-
each{|i| sum += i}
|
97
|
-
end
|
98
|
-
sum
|
99
|
-
end
|
100
|
-
|
101
|
-
# The arithmetic mean, uses a block or default block.
|
102
|
-
def average(&block)
|
103
|
-
sum(&block)/size
|
104
|
-
end
|
105
|
-
alias :mean :average
|
106
|
-
alias :avg :average
|
107
|
-
|
108
|
-
# The variance, uses a block or default block.
|
109
|
-
def variance(&block)
|
110
|
-
m = mean(&block)
|
111
|
-
sum_of_differences = if block_given?
|
112
|
-
sum{ |i| j=yield(i); (m - j) ** 2 }
|
113
|
-
elsif default_block
|
114
|
-
sum{ |i| j=default_block[*i]; (m - j) ** 2 }
|
115
|
-
else
|
116
|
-
sum{ |i| (m - i) ** 2 }
|
117
|
-
end
|
118
|
-
sum_of_differences / (size - 1)
|
119
|
-
end
|
120
|
-
alias :var :variance
|
121
|
-
|
122
|
-
# The standard deviation. Uses a block or default block.
|
123
|
-
def standard_deviation(&block)
|
124
|
-
Math::sqrt(variance(&block))
|
125
|
-
end
|
126
|
-
alias :std :standard_deviation
|
127
|
-
|
128
|
-
# The slow way is to iterate up to the middle point. A faster way is to
|
129
|
-
# use the index, when available. If a block is supplied, always iterate
|
130
|
-
# to the middle point.
|
131
|
-
def median(ratio=0.5, &block)
|
132
|
-
return iterate_midway(ratio, &block) if block_given?
|
133
|
-
begin
|
134
|
-
mid1, mid2 = middle_two
|
135
|
-
sorted = new_sort
|
136
|
-
med1, med2 = sorted[mid1], sorted[mid2]
|
137
|
-
return med1 if med1 == med2
|
138
|
-
return med1 + ((med2 - med1) * ratio)
|
139
|
-
rescue
|
140
|
-
iterate_midway(ratio, &block)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def middle_two
|
145
|
-
mid2 = size.div(2)
|
146
|
-
mid1 = (size % 2 == 0) ? mid2 - 1 : mid2
|
147
|
-
return mid1, mid2
|
148
|
-
end
|
149
|
-
protected :middle_two
|
150
|
-
|
151
|
-
def median_position
|
152
|
-
middle_two.last
|
153
|
-
end
|
154
|
-
protected :median_position
|
155
|
-
|
156
|
-
def first_half(&block)
|
157
|
-
fh = self[0..median_position].dup
|
158
|
-
end
|
159
|
-
protected :first_half
|
160
|
-
|
161
|
-
def second_half(&block)
|
162
|
-
# Total crap, but it's the way R does things, and this will most likely
|
163
|
-
# only be used to feed R some numbers to plot, if at all.
|
164
|
-
sh = size <= 5 ? self[median_position..-1].dup : self[median_position - 1..-1].dup
|
165
|
-
end
|
166
|
-
protected :second_half
|
167
|
-
|
168
|
-
# An iterative version of median
|
169
|
-
def iterate_midway(ratio, &block)
|
170
|
-
mid1, mid2, last_value, j, sorted, sort1, sort2 = middle_two, nil, 0, new_sort, nil, nil
|
171
|
-
|
172
|
-
if block_given?
|
173
|
-
sorted.each do |i|
|
174
|
-
last_value = yield(i)
|
175
|
-
j += 1
|
176
|
-
sort1 = last_value if j == mid1
|
177
|
-
sort2 = last_value if j == mid2
|
178
|
-
break if j >= mid2
|
179
|
-
end
|
180
|
-
elsif default_block
|
181
|
-
sorted.each do |i|
|
182
|
-
last_value = default_block[*i]
|
183
|
-
j += 1
|
184
|
-
sort1 = last_value if j == mid1
|
185
|
-
sort2 = last_value if j == mid2
|
186
|
-
break if j >= mid2
|
187
|
-
end
|
188
|
-
else
|
189
|
-
sorted.each do |i|
|
190
|
-
last_value = i
|
191
|
-
sort1 = last_value if j == mid1
|
192
|
-
sort2 = last_value if j == mid2
|
193
|
-
j += 1
|
194
|
-
break if j >= mid2
|
195
|
-
end
|
196
|
-
end
|
197
|
-
return med1 if med1 == med2
|
198
|
-
return med1 + ((med2 - med1) * ratio)
|
199
|
-
end
|
200
|
-
protected :iterate_midway
|
201
|
-
|
202
|
-
# Takes the range_class and returns its map.
|
203
|
-
# Example:
|
204
|
-
# require 'mathn'
|
205
|
-
# a = [1,2,3]
|
206
|
-
# a
|
207
|
-
# range_class = FixedRange, a.min, a.max, 1/4
|
208
|
-
# a.categories
|
209
|
-
# => [1, 5/4, 3/2, 7/4, 2, 9/4, 5/2, 11/4, 3]
|
210
|
-
# For non-numeric values, returns a unique set,
|
211
|
-
# ordered if possible.
|
212
|
-
def categories
|
213
|
-
if @categories
|
214
|
-
@categories
|
215
|
-
elsif self.is_numeric?
|
216
|
-
self.range_instance.map
|
217
|
-
else
|
218
|
-
self.uniq.sort rescue self.uniq
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
def is_numeric?
|
223
|
-
self.all? {|e| e.is_a?(Numeric)}
|
224
|
-
end
|
225
|
-
|
226
|
-
# Just an array of [min, max] to comply with R uses of the work. Use
|
227
|
-
# range_as_range if you want a real Range.
|
228
|
-
def range(&block)
|
229
|
-
[min(&block), max(&block)]
|
230
|
-
end
|
231
|
-
|
232
|
-
# Useful for setting a real range class (FixedRange).
|
233
|
-
def set_range_class(klass, *args)
|
234
|
-
@range_class = klass
|
235
|
-
@range_class_args = args
|
236
|
-
self.range_class
|
237
|
-
end
|
238
|
-
|
239
|
-
# Takes a hash of arrays for categories
|
240
|
-
# If Facets happens to be loaded on the computer, this keeps the order
|
241
|
-
# of the categories straight.
|
242
|
-
def set_range(hash)
|
243
|
-
if defined?(Dictionary)
|
244
|
-
@range_hash = Dictionary.new
|
245
|
-
@range_hash.merge!(hash)
|
246
|
-
@categories = @range_hash.keys
|
247
|
-
else
|
248
|
-
@categories = hash.keys
|
249
|
-
@range_hash = hash
|
250
|
-
end
|
251
|
-
@categories
|
252
|
-
end
|
253
|
-
|
254
|
-
# The hash of lambdas that are used to categorize the enumerable.
|
255
|
-
attr_reader :range_hash
|
256
|
-
|
257
|
-
# The arguments needed to instantiate the custom-defined range class.
|
258
|
-
attr_reader :range_class_args
|
259
|
-
|
260
|
-
# Splits the values in two, <= the value and > the value.
|
261
|
-
def dichotomize(split_value, first_label, second_label)
|
262
|
-
set_range({
|
263
|
-
first_label => lambda{|e| e <= split_value},
|
264
|
-
second_label => lambda{|e| e > split_value}
|
265
|
-
})
|
266
|
-
end
|
267
|
-
|
268
|
-
# Counts each element where the block evaluates to true
|
269
|
-
# Example:
|
270
|
-
# a = [1,2,3]
|
271
|
-
# a.count_if {|e| e % 2 == 0}
|
272
|
-
def count_if(&block)
|
273
|
-
self.inject(0) do |s, e|
|
274
|
-
s += 1 if block.call(e)
|
275
|
-
s
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
|
-
# Returns a Hash or Dictionary (if available) for each category with a
|
280
|
-
# value as the set of matching values as an array.
|
281
|
-
# Because this is supposed to be lean (just enumerables), but this is an
|
282
|
-
# expensive call, I'm going to cache it and offer a parameter to reset
|
283
|
-
# the cache. So, call category_values(true) if you need to reset the
|
284
|
-
# cache.
|
285
|
-
def category_values(reset=false)
|
286
|
-
@category_values = nil if reset
|
287
|
-
return @category_values if @category_values
|
288
|
-
container = defined?(Dictionary) ? Dictionary.new : Hash.new
|
289
|
-
if self.range_hash
|
290
|
-
@category_values = self.categories.inject(container) do |cont, cat|
|
291
|
-
cont[cat] = self.find_all &self.range_hash[cat]
|
292
|
-
cont
|
293
|
-
end
|
294
|
-
else
|
295
|
-
@category_values = self.categories.inject(container) do |cont, cat|
|
296
|
-
cont[cat] = self.find_all {|e| e == cat}
|
297
|
-
cont
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
# When creating a range, what class will it be? Defaults to Range, but
|
303
|
-
# other classes are sometimes useful.
|
304
|
-
def range_class
|
305
|
-
@range_class ||= Range
|
306
|
-
end
|
307
|
-
|
308
|
-
# Actually instantiates the range, instead of producing a min and max array.
|
309
|
-
def range_as_range(&block)
|
310
|
-
if @range_class_args and not @range_class_args.empty?
|
311
|
-
self.range_class.new(*@range_class_args)
|
312
|
-
else
|
313
|
-
self.range_class.new(min(&block), max(&block))
|
314
|
-
end
|
315
|
-
end
|
316
|
-
alias :range_instance :range_as_range
|
317
|
-
|
318
|
-
# I don't pass the block to the sort, because a sort block needs to look
|
319
|
-
# something like: {|x,y| x <=> y}. To get around this, set the default
|
320
|
-
# block on the object.
|
321
|
-
def new_sort(&block)
|
322
|
-
if block_given?
|
323
|
-
map { |i| yield(i) }.sort.dup
|
324
|
-
elsif default_block
|
325
|
-
map { |i| default_block[*i] }.sort.dup
|
326
|
-
else
|
327
|
-
sort().dup
|
328
|
-
end
|
329
|
-
end
|
330
|
-
|
331
|
-
# Doesn't overwrite things like Matrix#rank
|
332
|
-
def rank(&block)
|
333
|
-
|
334
|
-
sorted = new_sort(&block)
|
335
|
-
|
336
|
-
if block_given?
|
337
|
-
map { |i| sorted.index(yield(i)) + 1 }
|
338
|
-
elsif default_block
|
339
|
-
map { |i| sorted.index(default_block[*i]) + 1 }
|
340
|
-
else
|
341
|
-
map { |i| sorted.index(i) + 1 }
|
342
|
-
end
|
343
|
-
|
344
|
-
end unless defined?(rank)
|
345
|
-
|
346
|
-
# Given values like [10,5,5,1]
|
347
|
-
# Rank should produce something like [4,2,2,1]
|
348
|
-
# And order should produce something like [4,2,3,1]
|
349
|
-
# The trick is that rank skips as many as were duplicated, so there
|
350
|
-
# could not be a 3 in the rank from the example above.
|
351
|
-
def order(&block)
|
352
|
-
hold = []
|
353
|
-
rank(&block).each do |x|
|
354
|
-
while hold.include?(x) do
|
355
|
-
x += 1
|
356
|
-
end
|
357
|
-
hold << x
|
358
|
-
end
|
359
|
-
hold
|
360
|
-
end
|
361
|
-
|
362
|
-
# First quartile: nth_split_by_m(1, 4)
|
363
|
-
# Third quartile: nth_split_by_m(3, 4)
|
364
|
-
# Median: nth_split_by_m(1, 2)
|
365
|
-
# Doesn't match R, and it's silly to try to.
|
366
|
-
# def nth_split_by_m(n, m)
|
367
|
-
# sorted = new_sort
|
368
|
-
# dividers = m - 1
|
369
|
-
# if size % m == dividers # Divides evenly
|
370
|
-
# # Because we have a 0-based list, we get the floor
|
371
|
-
# i = ((size / m.to_f) * n).floor
|
372
|
-
# j = i
|
373
|
-
# else
|
374
|
-
# # This reflects R's approach, which I don't think I agree with.
|
375
|
-
# i = (((size / m.to_f) * n) - 1)
|
376
|
-
# i = i > (size / m.to_f) ? i.floor : i.ceil
|
377
|
-
# j = i + 1
|
378
|
-
# end
|
379
|
-
# sorted[i] + ((n / m.to_f) * (sorted[j] - sorted[i]))
|
380
|
-
# end
|
381
|
-
def quantile(&block)
|
382
|
-
[
|
383
|
-
min(&block),
|
384
|
-
first_half(&block).median(0.25, &block),
|
385
|
-
median(&block),
|
386
|
-
second_half(&block).median(0.75, &block),
|
387
|
-
max(&block)
|
388
|
-
]
|
389
|
-
end
|
390
|
-
|
391
|
-
# The cummulative sum. Example:
|
392
|
-
# [1,2,3].cum_sum # => [1, 3, 6]
|
393
|
-
def cum_sum(sorted=false, &block)
|
394
|
-
sum = zero
|
395
|
-
obj = sorted ? self.new_sort : self
|
396
|
-
if block_given?
|
397
|
-
obj.map { |i| sum += yield(i) }
|
398
|
-
elsif default_block
|
399
|
-
obj.map { |i| sum += default_block[*i] }
|
400
|
-
else
|
401
|
-
obj.map { |i| sum += i }
|
402
|
-
end
|
403
|
-
end
|
404
|
-
alias :cumulative_sum :cum_sum
|
405
|
-
|
406
|
-
# The cummulative product. Example:
|
407
|
-
# [1,2,3].cum_prod # => [1.0, 2.0, 6.0]
|
408
|
-
def cum_prod(sorted=false, &block)
|
409
|
-
prod = one
|
410
|
-
obj = sorted ? self.new_sort : self
|
411
|
-
if block_given?
|
412
|
-
obj.map { |i| prod *= yield(i) }
|
413
|
-
elsif default_block
|
414
|
-
obj.map { |i| prod *= default_block[*i] }
|
415
|
-
else
|
416
|
-
obj.map { |i| prod *= i }
|
417
|
-
end
|
418
|
-
end
|
419
|
-
alias :cumulative_product :cum_prod
|
420
|
-
|
421
|
-
# Used to preprocess the list
|
422
|
-
def morph_list(&block)
|
423
|
-
if block
|
424
|
-
self.map{ |e| block.call(e) }
|
425
|
-
elsif self.default_block
|
426
|
-
self.map{ |e| self.default_block.call(e) }
|
427
|
-
else
|
428
|
-
self
|
429
|
-
end
|
430
|
-
end
|
431
|
-
protected :morph_list
|
432
|
-
|
433
|
-
# Example:
|
434
|
-
# [1,2,3,0,5].cum_max # => [1,2,3,3,5]
|
435
|
-
def cum_max(&block)
|
436
|
-
morph_list(&block).inject([]) do |list, e|
|
437
|
-
found = (list | [e]).max
|
438
|
-
list << (found ? found : e)
|
439
|
-
end
|
440
|
-
end
|
441
|
-
alias :cumulative_max :cum_max
|
442
|
-
|
443
|
-
# Example:
|
444
|
-
# [1,2,3,0,5].cum_min # => [1,1,1,0,0]
|
445
|
-
def cum_min(&block)
|
446
|
-
morph_list(&block).inject([]) do |list, e|
|
447
|
-
found = (list | [e]).min
|
448
|
-
list << (found ? found : e)
|
449
|
-
end
|
450
|
-
end
|
451
|
-
alias :cumulative_min :cum_min
|
452
|
-
|
453
|
-
# Multiplies the values:
|
454
|
-
# >> product(1,2,3)
|
455
|
-
# => 6.0
|
456
|
-
def product
|
457
|
-
self.inject(one) {|sum, a| sum *= a}
|
458
|
-
end
|
459
|
-
|
460
|
-
# There are going to be a lot more of these kinds of things, so pay
|
461
|
-
# attention.
|
462
|
-
def to_pairs(other, &block)
|
463
|
-
n = [self.size, other.size].min
|
464
|
-
(0...n).map {|i| block.call(self[i], other[i]) }
|
465
|
-
end
|
466
|
-
|
467
|
-
# Finds the tanimoto coefficient: the intersection set size / union set
|
468
|
-
# size. This is used to find the distance between two vectors.
|
469
|
-
# >> [1,2,3].cor([2,3,5])
|
470
|
-
# => 0.981980506061966
|
471
|
-
# >> [1,2,3].tanimoto_pairs([2,3,5])
|
472
|
-
# => 0.5
|
473
|
-
def tanimoto_pairs(other)
|
474
|
-
intersect(other).size / union(other).size.to_f
|
475
|
-
end
|
476
|
-
alias :tanimoto_correlation :tanimoto_pairs
|
477
|
-
|
478
|
-
# Sometimes it just helps to have things spelled out. These are all
|
479
|
-
# part of the Array class. This means, you have methods that you can't
|
480
|
-
# run on some kinds of enumerables.
|
481
|
-
|
482
|
-
# All of the left and right hand sides, excluding duplicates.
|
483
|
-
# "The union of x and y"
|
484
|
-
def union(other)
|
485
|
-
other = other.to_a unless other.is_a?(Array)
|
486
|
-
self | other
|
487
|
-
end
|
488
|
-
|
489
|
-
# What's shared on the left and right hand sides
|
490
|
-
# "The intersection of x and y"
|
491
|
-
def intersect(other)
|
492
|
-
other = other.to_a unless other.is_a?(Array)
|
493
|
-
self & other
|
494
|
-
end
|
495
|
-
|
496
|
-
# Everything on the left hand side except what's shared on the right
|
497
|
-
# hand side.
|
498
|
-
# "The relative compliment of y in x"
|
499
|
-
def compliment(other)
|
500
|
-
other = other.to_a unless other.is_a?(Array)
|
501
|
-
self - other
|
502
|
-
end
|
503
|
-
|
504
|
-
# Everything but what's shared
|
505
|
-
def exclusive_not(other)
|
506
|
-
other = other.to_a unless other.is_a?(Array)
|
507
|
-
(self | other) - (self & other)
|
508
|
-
end
|
509
|
-
|
510
|
-
# Finds the cartesian product, excluding duplicates items and self-
|
511
|
-
# referential pairs. Yields the block value if given.
|
512
|
-
def cartesian_product(other, &block)
|
513
|
-
x,y = self.uniq.dup, other.uniq.dup
|
514
|
-
pairs = x.inject([]) do |cp, i|
|
515
|
-
cp | y.map{|b| i == b ? nil : [i,b]}.compact
|
516
|
-
end
|
517
|
-
return pairs unless block_given?
|
518
|
-
pairs.map{|p| yield p.first, p.last}
|
519
|
-
end
|
520
|
-
alias :cp :cartesian_product
|
521
|
-
alias :permutations :cartesian_product
|
522
|
-
|
523
|
-
# Sigma of pairs. Returns a single float, or whatever object is sent in.
|
524
|
-
# Example: [1,2,3].sigma_pairs([4,5,6], 0) {|x, y| x + y}
|
525
|
-
# returns 21 instead of 21.0.
|
526
|
-
def sigma_pairs(other, z=zero, &block)
|
527
|
-
self.to_pairs(other,&block).inject(z) {|sum, i| sum += i}
|
528
|
-
end
|
529
|
-
|
530
|
-
# Returns the Euclidian distance between all points of a set of enumerables
|
531
|
-
def euclidian_distance(other)
|
532
|
-
Math.sqrt(self.sigma_pairs(other) {|a, b| (a - b) ** 2})
|
533
|
-
end
|
534
|
-
|
535
|
-
# Returns a random integer in the range for any number of lists. This
|
536
|
-
# is a way to get a random vector that is tenable based on the sample
|
537
|
-
# data. For example, given two sets of numbers:
|
538
|
-
#
|
539
|
-
# a = [1,2,3]; b = [8,8,8]
|
540
|
-
#
|
541
|
-
# rand_in_pair_range will return a value >= 1 and <= 8 in the first
|
542
|
-
# place, >= 2 and <= 8 in the second place, and >= 3 and <= 8 in the
|
543
|
-
# last place.
|
544
|
-
# Works for integers. Rethink this for floats. May consider setting up
|
545
|
-
# FixedRange for floats. O(n*5)
|
546
|
-
def rand_in_range(*args)
|
547
|
-
min = self.min_of_lists(*args)
|
548
|
-
max = self.max_of_lists(*args)
|
549
|
-
(0...size).inject([]) do |ary, i|
|
550
|
-
ary << rand_between(min[i], max[i])
|
551
|
-
end
|
552
|
-
end
|
553
|
-
|
554
|
-
# Finds the correlation between two enumerables.
|
555
|
-
# Example: [1,2,3].cor [2,3,5]
|
556
|
-
# returns 0.981980506061966
|
557
|
-
def correlation(other)
|
558
|
-
n = [self.size, other.size].min
|
559
|
-
sum_of_products_of_pairs = self.sigma_pairs(other) {|a, b| a * b}
|
560
|
-
self_sum = self.sum
|
561
|
-
other_sum = other.sum
|
562
|
-
sum_of_squared_self_scores = self.sum { |e| e * e }
|
563
|
-
sum_of_squared_other_scores = other.sum { |e| e * e }
|
564
|
-
|
565
|
-
numerator = (n * sum_of_products_of_pairs) - (self_sum * other_sum)
|
566
|
-
self_denominator = ((n * sum_of_squared_self_scores) - (self_sum ** 2))
|
567
|
-
other_denominator = ((n * sum_of_squared_other_scores) - (other_sum ** 2))
|
568
|
-
denominator = Math.sqrt(self_denominator * other_denominator)
|
569
|
-
return numerator / denominator
|
570
|
-
end
|
571
|
-
alias :cor :correlation
|
572
|
-
|
573
|
-
# Transposes arrays of arrays and yields a block on the value.
|
574
|
-
# The regular Array#transpose ignores blocks
|
575
|
-
def yield_transpose(*enums, &block)
|
576
|
-
enums.unshift(self)
|
577
|
-
n = enums.map{ |x| x.size}.min
|
578
|
-
block ||= lambda{|e| e}
|
579
|
-
(0...n).map { |i| block.call enums.map{ |x| x[i] } }
|
580
|
-
end
|
581
|
-
|
582
|
-
# Returns the max of two or more enumerables.
|
583
|
-
# >> [1,2,3].max_of_lists([0,5,6], [0,2,9])
|
584
|
-
# => [1, 5, 9]
|
585
|
-
def max_of_lists(*enums)
|
586
|
-
yield_transpose(*enums) {|e| e.max}
|
587
|
-
end
|
588
|
-
|
589
|
-
# Returns the min of two or more enumerables.
|
590
|
-
# >> [1,2,3].min_of_lists([4,5,6], [0,2,9])
|
591
|
-
# => [0, 2, 3]
|
592
|
-
def min_of_lists(*enums)
|
593
|
-
yield_transpose(*enums) {|e| e.min}
|
594
|
-
end
|
595
|
-
|
596
|
-
end
|
597
|
-
end
|