davidrichards-just_enumerable_stats 0.0.8 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +7 -31
- data/VERSION.yml +1 -1
- data/lib/just_enumerable_stats.rb +261 -190
- data/spec/just_enumerable_stats_spec.rb +197 -7
- data/spec/spec_helper.rb +62 -0
- metadata +1 -5
- data/lib/just_enumerable_stats/stats.rb +0 -597
- data/spec/just_enumerable_stats/stats_spec.rb +0 -534
@@ -271,7 +271,7 @@ describe "JustEnumerableStats" do
|
|
271
271
|
|
272
272
|
it "should be able to dichotomize a list" do
|
273
273
|
@a.dichotomize(2, :small, :big)
|
274
|
-
@a.categories.should eql([:
|
274
|
+
@a.categories.map{|e| e.to_s}.sort.map{|e| e.to_sym}.should eql([:big, :small])
|
275
275
|
@a.category_values[:small].should eql([1,2])
|
276
276
|
@a.category_values[:big].should eql([3])
|
277
277
|
end
|
@@ -523,14 +523,204 @@ describe "JustEnumerableStats" do
|
|
523
523
|
a.covariance(b).should eql(0.125)
|
524
524
|
end
|
525
525
|
|
526
|
-
it "should be able to return the Pearson correlation" do
|
527
|
-
a = [1,2,3,4]
|
528
|
-
b = [3,3,4,3]
|
529
|
-
a.pearson_correlation(b).should be_close(0.193649167310371, 1.0e-15)
|
530
|
-
end
|
531
|
-
|
532
526
|
it "should be able to force the list into floats" do
|
533
527
|
[1,2,3].to_f!.should eql([1.0, 2.0, 3.0])
|
534
528
|
end
|
535
529
|
|
530
|
+
context "unobstrusive" do
|
531
|
+
before do
|
532
|
+
@a = BusyClass.new(1,2,3)
|
533
|
+
@b = [2,3,1]
|
534
|
+
end
|
535
|
+
|
536
|
+
it "should not use the native max" do
|
537
|
+
lambda{@a._jes_max}.should_not raise_error
|
538
|
+
end
|
539
|
+
|
540
|
+
it "should not use the native max_index" do
|
541
|
+
lambda{@a._jes_max_index}.should_not raise_error
|
542
|
+
end
|
543
|
+
|
544
|
+
it "should not use the native min" do
|
545
|
+
lambda{@a._jes_min}.should_not raise_error
|
546
|
+
end
|
547
|
+
|
548
|
+
it "should not use the native min_index" do
|
549
|
+
lambda{@a._jes_min_index}.should_not raise_error
|
550
|
+
end
|
551
|
+
|
552
|
+
it "should not use the native default_block" do
|
553
|
+
lambda{@a._jes_default_block}.should_not raise_error
|
554
|
+
end
|
555
|
+
|
556
|
+
it "should not use the native default_block=" do
|
557
|
+
lambda{@a._jes_default_block= lambda{|e| 1} }.should_not raise_error
|
558
|
+
end
|
559
|
+
|
560
|
+
it "should not use the native sum" do
|
561
|
+
lambda{@a._jes_sum}.should_not raise_error
|
562
|
+
end
|
563
|
+
|
564
|
+
it "should not use the native average" do
|
565
|
+
lambda{@a._jes_average}.should_not raise_error
|
566
|
+
end
|
567
|
+
|
568
|
+
it "should not use the native variance" do
|
569
|
+
lambda{@a._jes_variance}.should_not raise_error
|
570
|
+
end
|
571
|
+
|
572
|
+
it "should not use the native standard_deviation" do
|
573
|
+
lambda{@a._jes_standard_deviation}.should_not raise_error
|
574
|
+
end
|
575
|
+
|
576
|
+
it "should not use the native median" do
|
577
|
+
lambda{@a._jes_median}.should_not raise_error
|
578
|
+
end
|
579
|
+
|
580
|
+
it "should not use the native categories" do
|
581
|
+
lambda{@a._jes_categories}.should_not raise_error
|
582
|
+
end
|
583
|
+
|
584
|
+
it "should not use the native is_numeric?" do
|
585
|
+
lambda{@a._jes_is_numeric?}.should_not raise_error
|
586
|
+
end
|
587
|
+
|
588
|
+
it "should not use the native range" do
|
589
|
+
lambda{@a._jes_range}.should_not raise_error
|
590
|
+
end
|
591
|
+
|
592
|
+
it "should not use the native set_range_class" do
|
593
|
+
lambda{@a._jes_set_range_class(FixedRange)}.should_not raise_error
|
594
|
+
end
|
595
|
+
|
596
|
+
it "should not use the native set_range" do
|
597
|
+
lambda{@a._jes_set_range({:a => 1})}.should_not raise_error
|
598
|
+
end
|
599
|
+
|
600
|
+
it "should not use the native dichotomize" do
|
601
|
+
lambda{@a._jes_dichotomize(2, :small, :big)}.should_not raise_error
|
602
|
+
end
|
603
|
+
|
604
|
+
it "should not use the native count_if" do
|
605
|
+
lambda{@a._jes_count_if {|e| e == 2}}.should_not raise_error
|
606
|
+
end
|
607
|
+
|
608
|
+
it "should not use the native category_values" do
|
609
|
+
lambda{@a._jes_category_values}.should_not raise_error
|
610
|
+
end
|
611
|
+
|
612
|
+
it "should not use the native range_class" do
|
613
|
+
lambda{@a._jes_range_class}.should_not raise_error
|
614
|
+
end
|
615
|
+
|
616
|
+
it "should not use the native range_as_range" do
|
617
|
+
lambda{@a._jes_range_as_range}.should_not raise_error
|
618
|
+
end
|
619
|
+
|
620
|
+
it "should not use the native new_sort" do
|
621
|
+
lambda{@a._jes_new_sort}.should_not raise_error
|
622
|
+
end
|
623
|
+
|
624
|
+
it "should not use the native rank" do
|
625
|
+
lambda{@a._jes_rank}.should_not raise_error
|
626
|
+
end
|
627
|
+
|
628
|
+
it "should not use the native order" do
|
629
|
+
lambda{@a._jes_order}.should_not raise_error
|
630
|
+
end
|
631
|
+
|
632
|
+
it "should not use the native quantile" do
|
633
|
+
lambda{@a._jes_quantile}.should_not raise_error
|
634
|
+
end
|
635
|
+
|
636
|
+
it "should not use the native cum_sum" do
|
637
|
+
lambda{@a._jes_cum_sum}.should_not raise_error
|
638
|
+
end
|
639
|
+
|
640
|
+
it "should not use the native cum_prod" do
|
641
|
+
lambda{@a._jes_cum_prod}.should_not raise_error
|
642
|
+
end
|
643
|
+
|
644
|
+
it "should not use the native cum_max" do
|
645
|
+
lambda{@a._jes_cum_max}.should_not raise_error
|
646
|
+
end
|
647
|
+
|
648
|
+
it "should not use the native cum_min" do
|
649
|
+
lambda{@a._jes_cum_min}.should_not raise_error
|
650
|
+
end
|
651
|
+
|
652
|
+
it "should not use the native product" do
|
653
|
+
lambda{@a._jes_product}.should_not raise_error
|
654
|
+
end
|
655
|
+
|
656
|
+
it "should not use the native to_pairs" do
|
657
|
+
lambda{@a._jes_to_pairs(@b) {|a, b| a}}.should_not raise_error
|
658
|
+
end
|
659
|
+
|
660
|
+
it "should not use the native tanimoto_pairs" do
|
661
|
+
lambda{@a._jes_tanimoto_pairs(@b)}.should_not raise_error
|
662
|
+
end
|
663
|
+
|
664
|
+
it "should not use the native union" do
|
665
|
+
lambda{@a._jes_union(@b)}.should_not raise_error
|
666
|
+
end
|
667
|
+
|
668
|
+
it "should not use the native intersect" do
|
669
|
+
lambda{@a._jes_intersect(@b)}.should_not raise_error
|
670
|
+
end
|
671
|
+
|
672
|
+
it "should not use the native compliment" do
|
673
|
+
lambda{@a._jes_compliment(@b)}.should_not raise_error
|
674
|
+
end
|
675
|
+
|
676
|
+
it "should not use the native exclusive_not" do
|
677
|
+
lambda{@a._jes_exclusive_not(@b)}.should_not raise_error
|
678
|
+
end
|
679
|
+
|
680
|
+
it "should not use the native cartesian_product" do
|
681
|
+
lambda{@a._jes_cartesian_product(@b)}.should_not raise_error
|
682
|
+
end
|
683
|
+
|
684
|
+
it "should not use the native sigma_pairs" do
|
685
|
+
lambda{@a._jes_sigma_pairs(@b) {|a, b| a}}.should_not raise_error
|
686
|
+
end
|
687
|
+
|
688
|
+
it "should not use the native euclidian_distance" do
|
689
|
+
lambda{@a._jes_euclidian_distance(@b)}.should_not raise_error
|
690
|
+
end
|
691
|
+
|
692
|
+
it "should not use the native rand_in_range" do
|
693
|
+
lambda{@a._jes_rand_in_range(1, 2)}.should_not raise_error
|
694
|
+
end
|
695
|
+
|
696
|
+
it "should not use the native correlation" do
|
697
|
+
lambda{@a._jes_correlation(@b)}.should_not raise_error
|
698
|
+
end
|
699
|
+
|
700
|
+
it "should not use the native yield_transpose" do
|
701
|
+
lambda{@a._jes_yield_transpose(@b)}.should_not raise_error
|
702
|
+
end
|
703
|
+
|
704
|
+
it "should not use the native max_of_lists" do
|
705
|
+
lambda{@a._jes_max_of_lists(@b)}.should_not raise_error
|
706
|
+
end
|
707
|
+
|
708
|
+
it "should not use the native min_of_lists" do
|
709
|
+
lambda{@a._jes_min_of_lists(@b)}.should_not raise_error
|
710
|
+
end
|
711
|
+
|
712
|
+
it "should not use the native covariance" do
|
713
|
+
lambda{@a._jes_covariance(@b)}.should_not raise_error
|
714
|
+
end
|
715
|
+
|
716
|
+
it "should not use the native pearson_correlation" do
|
717
|
+
lambda{@a._jes_pearson_correlation(@b)}.should_not raise_error
|
718
|
+
end
|
719
|
+
|
720
|
+
it "should not use the native to_f!" do
|
721
|
+
lambda{@a._jes_to_f!}.should_not raise_error
|
722
|
+
end
|
723
|
+
|
724
|
+
end
|
725
|
+
|
536
726
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -6,3 +6,65 @@ require 'just_enumerable_stats'
|
|
6
6
|
Spec::Runner.configure do |config|
|
7
7
|
|
8
8
|
end
|
9
|
+
|
10
|
+
class BusyClass
|
11
|
+
include Enumerable
|
12
|
+
def initialize(*vals)
|
13
|
+
@values = vals
|
14
|
+
end
|
15
|
+
|
16
|
+
def method_missing(sym, *args, &block)
|
17
|
+
@values.send(sym, *args, &block)
|
18
|
+
end
|
19
|
+
|
20
|
+
def max(&block); raise ArgumentError, "Should not be called"; end
|
21
|
+
def max_index(&block); raise ArgumentError, "Should not be called"; end
|
22
|
+
def min(&block); raise ArgumentError, "Should not be called"; end
|
23
|
+
def min_index(&block); raise ArgumentError, "Should not be called"; end
|
24
|
+
def default_block; raise ArgumentError, "Should not be called"; end
|
25
|
+
def default_block=(block); raise ArgumentError, "Should not be called"; end
|
26
|
+
def sum; raise ArgumentError, "Should not be called"; end
|
27
|
+
def average(&block); raise ArgumentError, "Should not be called"; end
|
28
|
+
def variance(&block); raise ArgumentError, "Should not be called"; end
|
29
|
+
def standard_deviation(&block); raise ArgumentError, "Should not be called"; end
|
30
|
+
def median(ratio=0.5, &block); raise ArgumentError, "Should not be called"; end
|
31
|
+
def categories; raise ArgumentError, "Should not be called"; end
|
32
|
+
def is_numeric?; raise ArgumentError, "Should not be called"; end
|
33
|
+
def range(&block); raise ArgumentError, "Should not be called"; end
|
34
|
+
def set_range_class(klass, *args); raise ArgumentError, "Should not be called"; end
|
35
|
+
def set_range(hash); raise ArgumentError, "Should not be called"; end
|
36
|
+
def dichotomize(split_value, first_label, second_label); raise ArgumentError, "Should not be called"; end
|
37
|
+
def count_if(&block); raise ArgumentError, "Should not be called"; end
|
38
|
+
def category_values(reset=false); raise ArgumentError, "Should not be called"; end
|
39
|
+
def range_class; raise ArgumentError, "Should not be called"; end
|
40
|
+
def range_as_range(&block); raise ArgumentError, "Should not be called"; end
|
41
|
+
def new_sort(&block); raise ArgumentError, "Should not be called"; end
|
42
|
+
def rank(&block); raise ArgumentError, "Should not be called"; end
|
43
|
+
def order(&block); raise ArgumentError, "Should not be called"; end
|
44
|
+
def quantile(&block); raise ArgumentError, "Should not be called"; end
|
45
|
+
def cum_sum(sorted=false, &block); raise ArgumentError, "Should not be called"; end
|
46
|
+
def cum_prod(sorted=false, &block); raise ArgumentError, "Should not be called"; end
|
47
|
+
def cum_max(&block); raise ArgumentError, "Should not be called"; end
|
48
|
+
def cum_min(&block); raise ArgumentError, "Should not be called"; end
|
49
|
+
def product; raise ArgumentError, "Should not be called"; end
|
50
|
+
def to_pairs(other, &block); raise ArgumentError, "Should not be called"; end
|
51
|
+
def tanimoto_pairs(other); raise ArgumentError, "Should not be called"; end
|
52
|
+
def union(other); raise ArgumentError, "Should not be called"; end
|
53
|
+
def intersect(other); raise ArgumentError, "Should not be called"; end
|
54
|
+
def compliment(other); raise ArgumentError, "Should not be called"; end
|
55
|
+
def exclusive_not(other); raise ArgumentError, "Should not be called"; end
|
56
|
+
def cartesian_product(other, &block); raise ArgumentError, "Should not be called"; end
|
57
|
+
def sigma_pairs(other, z=_jes_zero, &block); raise ArgumentError, "Should not be called"; end
|
58
|
+
def euclidian_distance(other); raise ArgumentError, "Should not be called"; end
|
59
|
+
def rand_in_range(*args); raise ArgumentError, "Should not be called"; end
|
60
|
+
def correlation(other); raise ArgumentError, "Should not be called"; end
|
61
|
+
def yield_transpose(*enums, &block); raise ArgumentError, "Should not be called"; end
|
62
|
+
def max_of_lists(*enums); raise ArgumentError, "Should not be called"; end
|
63
|
+
def min_of_lists(*enums); raise ArgumentError, "Should not be called"; end
|
64
|
+
def covariance(other); raise ArgumentError, "Should not be called"; end
|
65
|
+
def pearson_correlation(other); raise ArgumentError, "Should not be called"; end
|
66
|
+
def to_f!; raise ArgumentError, "Should not be called"; end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-just_enumerable_stats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -26,12 +26,8 @@ files:
|
|
26
26
|
- VERSION.yml
|
27
27
|
- bin/jes
|
28
28
|
- lib/fixed_range.rb
|
29
|
-
- lib/just_enumerable_stats
|
30
|
-
- lib/just_enumerable_stats/stats.rb
|
31
29
|
- lib/just_enumerable_stats.rb
|
32
30
|
- spec/fixed_range_spec.rb
|
33
|
-
- spec/just_enumerable_stats
|
34
|
-
- spec/just_enumerable_stats/stats_spec.rb
|
35
31
|
- spec/just_enumerable_stats_spec.rb
|
36
32
|
- spec/spec_helper.rb
|
37
33
|
has_rdoc: true
|
@@ -1,597 +0,0 @@
|
|
1
|
-
# This is a namespaced version of the gem, in case you can create a
|
2
|
-
# container for your data and only include these methods there.
|
3
|
-
# Example:
|
4
|
-
class Object
|
5
|
-
|
6
|
-
# Simpler way to handle a random number between to values
|
7
|
-
def rand_between(a, b)
|
8
|
-
return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
|
9
|
-
range = (a - b).abs + 1
|
10
|
-
rand(range) + [a,b].min
|
11
|
-
end
|
12
|
-
|
13
|
-
# Handles non-integers
|
14
|
-
def rand_in_floats(a, b)
|
15
|
-
range = (a - b).abs
|
16
|
-
(rand * range) + [a,b].min
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
module JustEnumerableStats #:nodoc:
|
22
|
-
module Stats
|
23
|
-
|
24
|
-
# To keep max and min DRY.
|
25
|
-
def block_sorter(a, b, &block)
|
26
|
-
if block
|
27
|
-
val = yield(a, b)
|
28
|
-
elsif default_block
|
29
|
-
val = default_block.call(a, b)
|
30
|
-
else
|
31
|
-
val = a <=> b
|
32
|
-
end
|
33
|
-
end
|
34
|
-
protected :block_sorter
|
35
|
-
|
36
|
-
# Returns the max, using an optional block.
|
37
|
-
def max(&block)
|
38
|
-
self.inject do |best, e|
|
39
|
-
val = block_sorter(best, e, &block)
|
40
|
-
best = val > 0 ? best : e
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
# Returns the first index of the max value
|
45
|
-
def max_index(&block)
|
46
|
-
self.index(max(&block))
|
47
|
-
end
|
48
|
-
|
49
|
-
# Min of any number of items
|
50
|
-
def min(&block)
|
51
|
-
self.inject do |best, e|
|
52
|
-
val = block_sorter(best, e, &block)
|
53
|
-
best = val < 0 ? best : e
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# Returns the first index of the min value
|
58
|
-
def min_index(&block)
|
59
|
-
self.index(min(&block))
|
60
|
-
end
|
61
|
-
|
62
|
-
# The block called to filter the values in the object.
|
63
|
-
def default_block
|
64
|
-
@default_stat_block
|
65
|
-
end
|
66
|
-
|
67
|
-
# Allows me to setup a block for a series of operations. Example:
|
68
|
-
# a = [1,2,3]
|
69
|
-
# a.sum # => 6.0
|
70
|
-
# a.default_block = lambda{|e| 1 / e}
|
71
|
-
# a.sum # => 1.0
|
72
|
-
def default_block=(block)
|
73
|
-
@default_stat_block = block
|
74
|
-
end
|
75
|
-
|
76
|
-
# Provides zero in the right class (Numeric or Float)
|
77
|
-
def zero
|
78
|
-
any? {|e| e.is_a?(Float)} ? 0.0 : 0
|
79
|
-
end
|
80
|
-
protected :zero
|
81
|
-
|
82
|
-
# Provides one in the right class (Numeric or Float)
|
83
|
-
def one
|
84
|
-
any? {|e| e.is_a?(Float)} ? 1.0 : 1
|
85
|
-
end
|
86
|
-
protected :one
|
87
|
-
|
88
|
-
# Adds up the list. Uses a block or default block if present.
|
89
|
-
def sum
|
90
|
-
sum = zero
|
91
|
-
if block_given?
|
92
|
-
each{|i| sum += yield(i)}
|
93
|
-
elsif default_block
|
94
|
-
each{|i| sum += default_block[*i]}
|
95
|
-
else
|
96
|
-
each{|i| sum += i}
|
97
|
-
end
|
98
|
-
sum
|
99
|
-
end
|
100
|
-
|
101
|
-
# The arithmetic mean, uses a block or default block.
|
102
|
-
def average(&block)
|
103
|
-
sum(&block)/size
|
104
|
-
end
|
105
|
-
alias :mean :average
|
106
|
-
alias :avg :average
|
107
|
-
|
108
|
-
# The variance, uses a block or default block.
|
109
|
-
def variance(&block)
|
110
|
-
m = mean(&block)
|
111
|
-
sum_of_differences = if block_given?
|
112
|
-
sum{ |i| j=yield(i); (m - j) ** 2 }
|
113
|
-
elsif default_block
|
114
|
-
sum{ |i| j=default_block[*i]; (m - j) ** 2 }
|
115
|
-
else
|
116
|
-
sum{ |i| (m - i) ** 2 }
|
117
|
-
end
|
118
|
-
sum_of_differences / (size - 1)
|
119
|
-
end
|
120
|
-
alias :var :variance
|
121
|
-
|
122
|
-
# The standard deviation. Uses a block or default block.
|
123
|
-
def standard_deviation(&block)
|
124
|
-
Math::sqrt(variance(&block))
|
125
|
-
end
|
126
|
-
alias :std :standard_deviation
|
127
|
-
|
128
|
-
# The slow way is to iterate up to the middle point. A faster way is to
|
129
|
-
# use the index, when available. If a block is supplied, always iterate
|
130
|
-
# to the middle point.
|
131
|
-
def median(ratio=0.5, &block)
|
132
|
-
return iterate_midway(ratio, &block) if block_given?
|
133
|
-
begin
|
134
|
-
mid1, mid2 = middle_two
|
135
|
-
sorted = new_sort
|
136
|
-
med1, med2 = sorted[mid1], sorted[mid2]
|
137
|
-
return med1 if med1 == med2
|
138
|
-
return med1 + ((med2 - med1) * ratio)
|
139
|
-
rescue
|
140
|
-
iterate_midway(ratio, &block)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def middle_two
|
145
|
-
mid2 = size.div(2)
|
146
|
-
mid1 = (size % 2 == 0) ? mid2 - 1 : mid2
|
147
|
-
return mid1, mid2
|
148
|
-
end
|
149
|
-
protected :middle_two
|
150
|
-
|
151
|
-
def median_position
|
152
|
-
middle_two.last
|
153
|
-
end
|
154
|
-
protected :median_position
|
155
|
-
|
156
|
-
def first_half(&block)
|
157
|
-
fh = self[0..median_position].dup
|
158
|
-
end
|
159
|
-
protected :first_half
|
160
|
-
|
161
|
-
def second_half(&block)
|
162
|
-
# Total crap, but it's the way R does things, and this will most likely
|
163
|
-
# only be used to feed R some numbers to plot, if at all.
|
164
|
-
sh = size <= 5 ? self[median_position..-1].dup : self[median_position - 1..-1].dup
|
165
|
-
end
|
166
|
-
protected :second_half
|
167
|
-
|
168
|
-
# An iterative version of median
|
169
|
-
def iterate_midway(ratio, &block)
|
170
|
-
mid1, mid2, last_value, j, sorted, sort1, sort2 = middle_two, nil, 0, new_sort, nil, nil
|
171
|
-
|
172
|
-
if block_given?
|
173
|
-
sorted.each do |i|
|
174
|
-
last_value = yield(i)
|
175
|
-
j += 1
|
176
|
-
sort1 = last_value if j == mid1
|
177
|
-
sort2 = last_value if j == mid2
|
178
|
-
break if j >= mid2
|
179
|
-
end
|
180
|
-
elsif default_block
|
181
|
-
sorted.each do |i|
|
182
|
-
last_value = default_block[*i]
|
183
|
-
j += 1
|
184
|
-
sort1 = last_value if j == mid1
|
185
|
-
sort2 = last_value if j == mid2
|
186
|
-
break if j >= mid2
|
187
|
-
end
|
188
|
-
else
|
189
|
-
sorted.each do |i|
|
190
|
-
last_value = i
|
191
|
-
sort1 = last_value if j == mid1
|
192
|
-
sort2 = last_value if j == mid2
|
193
|
-
j += 1
|
194
|
-
break if j >= mid2
|
195
|
-
end
|
196
|
-
end
|
197
|
-
return med1 if med1 == med2
|
198
|
-
return med1 + ((med2 - med1) * ratio)
|
199
|
-
end
|
200
|
-
protected :iterate_midway
|
201
|
-
|
202
|
-
# Takes the range_class and returns its map.
|
203
|
-
# Example:
|
204
|
-
# require 'mathn'
|
205
|
-
# a = [1,2,3]
|
206
|
-
# a
|
207
|
-
# range_class = FixedRange, a.min, a.max, 1/4
|
208
|
-
# a.categories
|
209
|
-
# => [1, 5/4, 3/2, 7/4, 2, 9/4, 5/2, 11/4, 3]
|
210
|
-
# For non-numeric values, returns a unique set,
|
211
|
-
# ordered if possible.
|
212
|
-
def categories
|
213
|
-
if @categories
|
214
|
-
@categories
|
215
|
-
elsif self.is_numeric?
|
216
|
-
self.range_instance.map
|
217
|
-
else
|
218
|
-
self.uniq.sort rescue self.uniq
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
def is_numeric?
|
223
|
-
self.all? {|e| e.is_a?(Numeric)}
|
224
|
-
end
|
225
|
-
|
226
|
-
# Just an array of [min, max] to comply with R uses of the work. Use
|
227
|
-
# range_as_range if you want a real Range.
|
228
|
-
def range(&block)
|
229
|
-
[min(&block), max(&block)]
|
230
|
-
end
|
231
|
-
|
232
|
-
# Useful for setting a real range class (FixedRange).
|
233
|
-
def set_range_class(klass, *args)
|
234
|
-
@range_class = klass
|
235
|
-
@range_class_args = args
|
236
|
-
self.range_class
|
237
|
-
end
|
238
|
-
|
239
|
-
# Takes a hash of arrays for categories
|
240
|
-
# If Facets happens to be loaded on the computer, this keeps the order
|
241
|
-
# of the categories straight.
|
242
|
-
def set_range(hash)
|
243
|
-
if defined?(Dictionary)
|
244
|
-
@range_hash = Dictionary.new
|
245
|
-
@range_hash.merge!(hash)
|
246
|
-
@categories = @range_hash.keys
|
247
|
-
else
|
248
|
-
@categories = hash.keys
|
249
|
-
@range_hash = hash
|
250
|
-
end
|
251
|
-
@categories
|
252
|
-
end
|
253
|
-
|
254
|
-
# The hash of lambdas that are used to categorize the enumerable.
|
255
|
-
attr_reader :range_hash
|
256
|
-
|
257
|
-
# The arguments needed to instantiate the custom-defined range class.
|
258
|
-
attr_reader :range_class_args
|
259
|
-
|
260
|
-
# Splits the values in two, <= the value and > the value.
|
261
|
-
def dichotomize(split_value, first_label, second_label)
|
262
|
-
set_range({
|
263
|
-
first_label => lambda{|e| e <= split_value},
|
264
|
-
second_label => lambda{|e| e > split_value}
|
265
|
-
})
|
266
|
-
end
|
267
|
-
|
268
|
-
# Counts each element where the block evaluates to true
|
269
|
-
# Example:
|
270
|
-
# a = [1,2,3]
|
271
|
-
# a.count_if {|e| e % 2 == 0}
|
272
|
-
def count_if(&block)
|
273
|
-
self.inject(0) do |s, e|
|
274
|
-
s += 1 if block.call(e)
|
275
|
-
s
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
|
-
# Returns a Hash or Dictionary (if available) for each category with a
|
280
|
-
# value as the set of matching values as an array.
|
281
|
-
# Because this is supposed to be lean (just enumerables), but this is an
|
282
|
-
# expensive call, I'm going to cache it and offer a parameter to reset
|
283
|
-
# the cache. So, call category_values(true) if you need to reset the
|
284
|
-
# cache.
|
285
|
-
def category_values(reset=false)
|
286
|
-
@category_values = nil if reset
|
287
|
-
return @category_values if @category_values
|
288
|
-
container = defined?(Dictionary) ? Dictionary.new : Hash.new
|
289
|
-
if self.range_hash
|
290
|
-
@category_values = self.categories.inject(container) do |cont, cat|
|
291
|
-
cont[cat] = self.find_all &self.range_hash[cat]
|
292
|
-
cont
|
293
|
-
end
|
294
|
-
else
|
295
|
-
@category_values = self.categories.inject(container) do |cont, cat|
|
296
|
-
cont[cat] = self.find_all {|e| e == cat}
|
297
|
-
cont
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
# When creating a range, what class will it be? Defaults to Range, but
|
303
|
-
# other classes are sometimes useful.
|
304
|
-
def range_class
|
305
|
-
@range_class ||= Range
|
306
|
-
end
|
307
|
-
|
308
|
-
# Actually instantiates the range, instead of producing a min and max array.
|
309
|
-
def range_as_range(&block)
|
310
|
-
if @range_class_args and not @range_class_args.empty?
|
311
|
-
self.range_class.new(*@range_class_args)
|
312
|
-
else
|
313
|
-
self.range_class.new(min(&block), max(&block))
|
314
|
-
end
|
315
|
-
end
|
316
|
-
alias :range_instance :range_as_range
|
317
|
-
|
318
|
-
# I don't pass the block to the sort, because a sort block needs to look
|
319
|
-
# something like: {|x,y| x <=> y}. To get around this, set the default
|
320
|
-
# block on the object.
|
321
|
-
def new_sort(&block)
|
322
|
-
if block_given?
|
323
|
-
map { |i| yield(i) }.sort.dup
|
324
|
-
elsif default_block
|
325
|
-
map { |i| default_block[*i] }.sort.dup
|
326
|
-
else
|
327
|
-
sort().dup
|
328
|
-
end
|
329
|
-
end
|
330
|
-
|
331
|
-
# Doesn't overwrite things like Matrix#rank
|
332
|
-
def rank(&block)
|
333
|
-
|
334
|
-
sorted = new_sort(&block)
|
335
|
-
|
336
|
-
if block_given?
|
337
|
-
map { |i| sorted.index(yield(i)) + 1 }
|
338
|
-
elsif default_block
|
339
|
-
map { |i| sorted.index(default_block[*i]) + 1 }
|
340
|
-
else
|
341
|
-
map { |i| sorted.index(i) + 1 }
|
342
|
-
end
|
343
|
-
|
344
|
-
end unless defined?(rank)
|
345
|
-
|
346
|
-
# Given values like [10,5,5,1]
|
347
|
-
# Rank should produce something like [4,2,2,1]
|
348
|
-
# And order should produce something like [4,2,3,1]
|
349
|
-
# The trick is that rank skips as many as were duplicated, so there
|
350
|
-
# could not be a 3 in the rank from the example above.
|
351
|
-
def order(&block)
|
352
|
-
hold = []
|
353
|
-
rank(&block).each do |x|
|
354
|
-
while hold.include?(x) do
|
355
|
-
x += 1
|
356
|
-
end
|
357
|
-
hold << x
|
358
|
-
end
|
359
|
-
hold
|
360
|
-
end
|
361
|
-
|
362
|
-
# First quartile: nth_split_by_m(1, 4)
|
363
|
-
# Third quartile: nth_split_by_m(3, 4)
|
364
|
-
# Median: nth_split_by_m(1, 2)
|
365
|
-
# Doesn't match R, and it's silly to try to.
|
366
|
-
# def nth_split_by_m(n, m)
|
367
|
-
# sorted = new_sort
|
368
|
-
# dividers = m - 1
|
369
|
-
# if size % m == dividers # Divides evenly
|
370
|
-
# # Because we have a 0-based list, we get the floor
|
371
|
-
# i = ((size / m.to_f) * n).floor
|
372
|
-
# j = i
|
373
|
-
# else
|
374
|
-
# # This reflects R's approach, which I don't think I agree with.
|
375
|
-
# i = (((size / m.to_f) * n) - 1)
|
376
|
-
# i = i > (size / m.to_f) ? i.floor : i.ceil
|
377
|
-
# j = i + 1
|
378
|
-
# end
|
379
|
-
# sorted[i] + ((n / m.to_f) * (sorted[j] - sorted[i]))
|
380
|
-
# end
|
381
|
-
def quantile(&block)
|
382
|
-
[
|
383
|
-
min(&block),
|
384
|
-
first_half(&block).median(0.25, &block),
|
385
|
-
median(&block),
|
386
|
-
second_half(&block).median(0.75, &block),
|
387
|
-
max(&block)
|
388
|
-
]
|
389
|
-
end
|
390
|
-
|
391
|
-
# The cummulative sum. Example:
|
392
|
-
# [1,2,3].cum_sum # => [1, 3, 6]
|
393
|
-
def cum_sum(sorted=false, &block)
|
394
|
-
sum = zero
|
395
|
-
obj = sorted ? self.new_sort : self
|
396
|
-
if block_given?
|
397
|
-
obj.map { |i| sum += yield(i) }
|
398
|
-
elsif default_block
|
399
|
-
obj.map { |i| sum += default_block[*i] }
|
400
|
-
else
|
401
|
-
obj.map { |i| sum += i }
|
402
|
-
end
|
403
|
-
end
|
404
|
-
alias :cumulative_sum :cum_sum
|
405
|
-
|
406
|
-
# The cummulative product. Example:
|
407
|
-
# [1,2,3].cum_prod # => [1.0, 2.0, 6.0]
|
408
|
-
def cum_prod(sorted=false, &block)
|
409
|
-
prod = one
|
410
|
-
obj = sorted ? self.new_sort : self
|
411
|
-
if block_given?
|
412
|
-
obj.map { |i| prod *= yield(i) }
|
413
|
-
elsif default_block
|
414
|
-
obj.map { |i| prod *= default_block[*i] }
|
415
|
-
else
|
416
|
-
obj.map { |i| prod *= i }
|
417
|
-
end
|
418
|
-
end
|
419
|
-
alias :cumulative_product :cum_prod
|
420
|
-
|
421
|
-
# Used to preprocess the list
|
422
|
-
def morph_list(&block)
|
423
|
-
if block
|
424
|
-
self.map{ |e| block.call(e) }
|
425
|
-
elsif self.default_block
|
426
|
-
self.map{ |e| self.default_block.call(e) }
|
427
|
-
else
|
428
|
-
self
|
429
|
-
end
|
430
|
-
end
|
431
|
-
protected :morph_list
|
432
|
-
|
433
|
-
# Example:
|
434
|
-
# [1,2,3,0,5].cum_max # => [1,2,3,3,5]
|
435
|
-
def cum_max(&block)
|
436
|
-
morph_list(&block).inject([]) do |list, e|
|
437
|
-
found = (list | [e]).max
|
438
|
-
list << (found ? found : e)
|
439
|
-
end
|
440
|
-
end
|
441
|
-
alias :cumulative_max :cum_max
|
442
|
-
|
443
|
-
# Example:
|
444
|
-
# [1,2,3,0,5].cum_min # => [1,1,1,0,0]
|
445
|
-
def cum_min(&block)
|
446
|
-
morph_list(&block).inject([]) do |list, e|
|
447
|
-
found = (list | [e]).min
|
448
|
-
list << (found ? found : e)
|
449
|
-
end
|
450
|
-
end
|
451
|
-
alias :cumulative_min :cum_min
|
452
|
-
|
453
|
-
# Multiplies the values:
|
454
|
-
# >> product(1,2,3)
|
455
|
-
# => 6.0
|
456
|
-
def product
|
457
|
-
self.inject(one) {|sum, a| sum *= a}
|
458
|
-
end
|
459
|
-
|
460
|
-
# There are going to be a lot more of these kinds of things, so pay
|
461
|
-
# attention.
|
462
|
-
def to_pairs(other, &block)
|
463
|
-
n = [self.size, other.size].min
|
464
|
-
(0...n).map {|i| block.call(self[i], other[i]) }
|
465
|
-
end
|
466
|
-
|
467
|
-
# Finds the tanimoto coefficient: the intersection set size / union set
|
468
|
-
# size. This is used to find the distance between two vectors.
|
469
|
-
# >> [1,2,3].cor([2,3,5])
|
470
|
-
# => 0.981980506061966
|
471
|
-
# >> [1,2,3].tanimoto_pairs([2,3,5])
|
472
|
-
# => 0.5
|
473
|
-
def tanimoto_pairs(other)
|
474
|
-
intersect(other).size / union(other).size.to_f
|
475
|
-
end
|
476
|
-
alias :tanimoto_correlation :tanimoto_pairs
|
477
|
-
|
478
|
-
# Sometimes it just helps to have things spelled out. These are all
|
479
|
-
# part of the Array class. This means, you have methods that you can't
|
480
|
-
# run on some kinds of enumerables.
|
481
|
-
|
482
|
-
# All of the left and right hand sides, excluding duplicates.
|
483
|
-
# "The union of x and y"
|
484
|
-
def union(other)
|
485
|
-
other = other.to_a unless other.is_a?(Array)
|
486
|
-
self | other
|
487
|
-
end
|
488
|
-
|
489
|
-
# What's shared on the left and right hand sides
|
490
|
-
# "The intersection of x and y"
|
491
|
-
def intersect(other)
|
492
|
-
other = other.to_a unless other.is_a?(Array)
|
493
|
-
self & other
|
494
|
-
end
|
495
|
-
|
496
|
-
# Everything on the left hand side except what's shared on the right
|
497
|
-
# hand side.
|
498
|
-
# "The relative compliment of y in x"
|
499
|
-
def compliment(other)
|
500
|
-
other = other.to_a unless other.is_a?(Array)
|
501
|
-
self - other
|
502
|
-
end
|
503
|
-
|
504
|
-
# Everything but what's shared
|
505
|
-
def exclusive_not(other)
|
506
|
-
other = other.to_a unless other.is_a?(Array)
|
507
|
-
(self | other) - (self & other)
|
508
|
-
end
|
509
|
-
|
510
|
-
# Finds the cartesian product, excluding duplicates items and self-
|
511
|
-
# referential pairs. Yields the block value if given.
|
512
|
-
def cartesian_product(other, &block)
|
513
|
-
x,y = self.uniq.dup, other.uniq.dup
|
514
|
-
pairs = x.inject([]) do |cp, i|
|
515
|
-
cp | y.map{|b| i == b ? nil : [i,b]}.compact
|
516
|
-
end
|
517
|
-
return pairs unless block_given?
|
518
|
-
pairs.map{|p| yield p.first, p.last}
|
519
|
-
end
|
520
|
-
alias :cp :cartesian_product
|
521
|
-
alias :permutations :cartesian_product
|
522
|
-
|
523
|
-
# Sigma of pairs. Returns a single float, or whatever object is sent in.
|
524
|
-
# Example: [1,2,3].sigma_pairs([4,5,6], 0) {|x, y| x + y}
|
525
|
-
# returns 21 instead of 21.0.
|
526
|
-
def sigma_pairs(other, z=zero, &block)
|
527
|
-
self.to_pairs(other,&block).inject(z) {|sum, i| sum += i}
|
528
|
-
end
|
529
|
-
|
530
|
-
# Returns the Euclidian distance between all points of a set of enumerables
|
531
|
-
def euclidian_distance(other)
|
532
|
-
Math.sqrt(self.sigma_pairs(other) {|a, b| (a - b) ** 2})
|
533
|
-
end
|
534
|
-
|
535
|
-
# Returns a random integer in the range for any number of lists. This
|
536
|
-
# is a way to get a random vector that is tenable based on the sample
|
537
|
-
# data. For example, given two sets of numbers:
|
538
|
-
#
|
539
|
-
# a = [1,2,3]; b = [8,8,8]
|
540
|
-
#
|
541
|
-
# rand_in_pair_range will return a value >= 1 and <= 8 in the first
|
542
|
-
# place, >= 2 and <= 8 in the second place, and >= 3 and <= 8 in the
|
543
|
-
# last place.
|
544
|
-
# Works for integers. Rethink this for floats. May consider setting up
|
545
|
-
# FixedRange for floats. O(n*5)
|
546
|
-
def rand_in_range(*args)
|
547
|
-
min = self.min_of_lists(*args)
|
548
|
-
max = self.max_of_lists(*args)
|
549
|
-
(0...size).inject([]) do |ary, i|
|
550
|
-
ary << rand_between(min[i], max[i])
|
551
|
-
end
|
552
|
-
end
|
553
|
-
|
554
|
-
# Finds the correlation between two enumerables.
|
555
|
-
# Example: [1,2,3].cor [2,3,5]
|
556
|
-
# returns 0.981980506061966
|
557
|
-
def correlation(other)
|
558
|
-
n = [self.size, other.size].min
|
559
|
-
sum_of_products_of_pairs = self.sigma_pairs(other) {|a, b| a * b}
|
560
|
-
self_sum = self.sum
|
561
|
-
other_sum = other.sum
|
562
|
-
sum_of_squared_self_scores = self.sum { |e| e * e }
|
563
|
-
sum_of_squared_other_scores = other.sum { |e| e * e }
|
564
|
-
|
565
|
-
numerator = (n * sum_of_products_of_pairs) - (self_sum * other_sum)
|
566
|
-
self_denominator = ((n * sum_of_squared_self_scores) - (self_sum ** 2))
|
567
|
-
other_denominator = ((n * sum_of_squared_other_scores) - (other_sum ** 2))
|
568
|
-
denominator = Math.sqrt(self_denominator * other_denominator)
|
569
|
-
return numerator / denominator
|
570
|
-
end
|
571
|
-
alias :cor :correlation
|
572
|
-
|
573
|
-
# Transposes arrays of arrays and yields a block on the value.
|
574
|
-
# The regular Array#transpose ignores blocks
|
575
|
-
def yield_transpose(*enums, &block)
|
576
|
-
enums.unshift(self)
|
577
|
-
n = enums.map{ |x| x.size}.min
|
578
|
-
block ||= lambda{|e| e}
|
579
|
-
(0...n).map { |i| block.call enums.map{ |x| x[i] } }
|
580
|
-
end
|
581
|
-
|
582
|
-
# Returns the max of two or more enumerables.
|
583
|
-
# >> [1,2,3].max_of_lists([0,5,6], [0,2,9])
|
584
|
-
# => [1, 5, 9]
|
585
|
-
def max_of_lists(*enums)
|
586
|
-
yield_transpose(*enums) {|e| e.max}
|
587
|
-
end
|
588
|
-
|
589
|
-
# Returns the min of two or more enumerables.
|
590
|
-
# >> [1,2,3].min_of_lists([4,5,6], [0,2,9])
|
591
|
-
# => [0, 2, 3]
|
592
|
-
def min_of_lists(*enums)
|
593
|
-
yield_transpose(*enums) {|e| e.min}
|
594
|
-
end
|
595
|
-
|
596
|
-
end
|
597
|
-
end
|