ralgorithms 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ begin
2
+ require 'sorting_ext' unless ENV["RA_PURE"]
3
+ rescue Exception => e
4
+ #ignore
5
+ end
@@ -0,0 +1,74 @@
1
+ require File.join(File.dirname(File.realpath(__FILE__)), "insertion_sort")
2
+ require File.join(File.dirname(File.realpath(__FILE__)), "helper")
3
+ module Sorting
4
+ class MergeSort
5
+ extend Helper
6
+ TEST_DATA_SIZE=100_000
7
+ SIZE_FOR_INSERTION=15
8
+
9
+ # Merge sort
10
+ # Comparison sort
11
+ # Merging
12
+ # Stable
13
+ # Time complexity: O(nlogn), Ө(nlogn), O(nlogn)
14
+ # Space complexity: O(n)
15
+ # Support parallelization
16
+ # Better for linked list
17
+ # Efficient at handling slow-to-access sequential media
18
+ def self.sort(data)
19
+ if data.size < SIZE_FOR_INSERTION
20
+ data = Sorting::InsertionSort.sort(data)
21
+ return data
22
+ end
23
+ mid = data.size / 2
24
+ left = data[0...mid]
25
+ right = data[mid...data.size]
26
+ merge(sort(left), sort(right))
27
+ end
28
+
29
+ protected
30
+
31
+ def self.merge(left, right)
32
+ if left.empty? || right.empty?
33
+ return left.empty? ? right : left
34
+ end
35
+ left_size = left.size
36
+ right_size = right.size
37
+ sorted = []
38
+ left_i = right_i = 0
39
+ while true
40
+ if (left_value = left[left_i]) > (right_value = right[right_i])
41
+ value = right_value
42
+ right_i += 1
43
+ else
44
+ value = left_value
45
+ left_i += 1
46
+ end
47
+ sorted << value
48
+ if left_i == left_size
49
+ sorted += right[right_i..-1]
50
+ break
51
+ elsif right_i == right_size
52
+ sorted += left[left_i..-1]
53
+ break
54
+ end
55
+ end
56
+ sorted
57
+ end
58
+
59
+ #def self.merge(left, right)
60
+ # sorted = []
61
+ # until left.empty? or right.empty?
62
+ # left.first <= right.first ? sorted << left.shift : sorted << right.shift
63
+ # end
64
+ # sorted + left + right
65
+ #end
66
+
67
+
68
+ end
69
+ end
70
+
71
+ if __FILE__ == $0
72
+ require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
73
+ Sorting::TestHelper.test __FILE__
74
+ end
@@ -0,0 +1,67 @@
1
+ require File.join(File.dirname(File.realpath(__FILE__)), "insertion_sort")
2
+ require File.join(File.dirname(File.realpath(__FILE__)), "helper")
3
+ module Sorting
4
+ class QuickSort
5
+ extend Helper
6
+ TEST_DATA_SIZE=100_000
7
+ SIZE_FOR_INSERTION=12
8
+
9
+ # Quick sort
10
+ # Comparison sort
11
+ # Partitioning
12
+ # Unstable(in efficient implementations)
13
+ # Time complexity: O(nlogn), Ө(nlogn), O(n2)
14
+ # Space complexity: O(logn)
15
+ # Support parallelization
16
+ # Median-of-3 killer
17
+ def self.sort!(data)
18
+ quicksort(data)
19
+ nil
20
+ end
21
+
22
+ protected
23
+
24
+ def self.quicksort(data, p=0, r=data.size-1)
25
+ while (distance = (r - p)) > 0
26
+ #optimization by insertion sort
27
+ if distance < SIZE_FOR_INSERTION
28
+ Sorting::InsertionSort.sort!(data, p, r)
29
+ return
30
+ end
31
+ q = partition(data, p, r)
32
+ quicksort(data, q+1, r)
33
+ r = q - 1
34
+ end
35
+ end
36
+
37
+ def self.partition(data, p, r)
38
+ median_of_3 data, p, r
39
+ pivot = data[r]
40
+ i = p - 1
41
+ p.upto(r-1) do |j|
42
+ if data[j] <= pivot
43
+ i = i+1
44
+ data[i], data[j] = data[j],data[i]
45
+ end
46
+ end
47
+ data[i+1],data[r] = data[r],data[i+1]
48
+ return i + 1
49
+ end
50
+
51
+ def self.median_of_3(data, p, r)
52
+ m = p + (r-p)/2
53
+ pivot_candidates_hash = {data[p] => p, data[m] => m, data[r] => r}
54
+ pivot_candidates = pivot_candidates_hash.keys
55
+ pivot = (pivot_candidates - pivot_candidates.minmax)[0] || pivot_candidates.last
56
+ pivot_index = pivot_candidates_hash[pivot]
57
+ data[pivot_index], data[r] = data[r], pivot if pivot_index != r
58
+ end
59
+
60
+ end
61
+ end
62
+
63
+ if __FILE__ == $0
64
+ require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
65
+ Sorting::TestHelper.test __FILE__
66
+ end
67
+
@@ -0,0 +1,33 @@
1
+ require File.join(File.dirname(File.realpath(__FILE__)), "helper")
2
+ module Sorting
3
+ class SelectionSort
4
+ extend Helper
5
+ TEST_DATA_SIZE=1000
6
+
7
+ # Selection sort
8
+ # Comparison sort
9
+ # Selection
10
+ # Unstable
11
+ # Time complexity: Ω(n2), Ө(n2), O(n2)
12
+ # Space complexity: O(n) total, O(1) auxiliary
13
+ # Suitable for small arrays (10-20) or write heavy situation
14
+ # Family: Heap sort, Smooth sort
15
+ def self.sort!(data)
16
+ max_i = data.size - 1
17
+ (0...max_i).each do |i|
18
+ min_index = i
19
+ ((i+1)..max_i).each do |j|
20
+ min_index = j if data[j] < data[min_index]
21
+ end
22
+ data[i], data[min_index] = data[min_index], data[i]
23
+ end
24
+ nil
25
+ end
26
+
27
+ end
28
+ end
29
+
30
+ if __FILE__ == $0
31
+ require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
32
+ Sorting::TestHelper.test __FILE__
33
+ end
@@ -0,0 +1,45 @@
1
+ require File.join(File.dirname(File.realpath(__FILE__)), "helper")
2
+ module Sorting
3
+ class ShellSort
4
+ extend Helper
5
+ TEST_DATA_SIZE=100_000
6
+ GAP_FACTOR = 2.35
7
+ GAP_REQUIRED_THRESHOLD = 10
8
+
9
+ # Shell sort
10
+ # Comparison sort
11
+ # Insertion
12
+ # Unstable
13
+ # Time complexity: depends on gap sequence, best known is O(n(logn)2)
14
+ # Space complexity: O(1)
15
+ # Simple, fast
16
+ # Empirical gap [1750, 701, 301, 132, 57, 23, 10, 4, 1]
17
+ def self.sort!(data)
18
+ #see http://oeis.org/A102549
19
+ gaps = [1, 4, 10, 23, 57, 132, 301, 701, 1750].reverse
20
+ data_size = data.size
21
+ expect_gap = data_size / GAP_REQUIRED_THRESHOLD
22
+ while expect_gap > gaps.first
23
+ gaps.unshift (gaps.first*GAP_FACTOR).to_i
24
+ end
25
+ gaps.each do |gap|
26
+ (gap...data_size).each do |i|
27
+ value = data[i]
28
+ j = i
29
+ while j >= gap && value < (previous = data[j-gap])
30
+ data[j] = previous
31
+ j -= gap
32
+ end
33
+ data[j] = value
34
+ end
35
+ end
36
+ nil
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ if __FILE__ == $0
43
+ require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
44
+ Sorting::TestHelper.test __FILE__
45
+ end
@@ -0,0 +1,172 @@
1
+ #http://www.keithschwarz.com/smoothsort/
2
+ #http://www.keithschwarz.com/interesting/code/?dir=smoothsort
3
+ require File.join(File.dirname(File.realpath(__FILE__)), "helper")
4
+ module Sorting
5
+ class SmoothSort
6
+ extend Helper
7
+ TEST_DATA_SIZE=100_000
8
+ LEONARDO_NUMBERS = [
9
+ 1, 1, 3, 5, 9, 15, 25, 41, 67, 109, 177, 287, 465, 753,
10
+ 1219, 1973, 3193, 5167, 8361, 13529, 21891, 35421, 57313, 92735,
11
+ 150049, 242785, 392835, 635621, 1028457, 1664079, 2692537,
12
+ 4356617, 7049155, 11405773, 18454929, 29860703, 48315633, 78176337,
13
+ 126491971, 204668309, 331160281, 535828591, 866988873, 1402817465,
14
+ 2269806339, 3672623805
15
+ ]
16
+ LEONARDO_MAPPING = Hash[LEONARDO_NUMBERS.each_with_index.to_a]
17
+ LAST_ZERO = 2**32 - 2
18
+
19
+ class HeapHelper
20
+ attr_accessor :bit_trees
21
+ attr_accessor :smallest_heap_size
22
+ def initialize
23
+ @bit_trees = 0
24
+ @smallest_heap_size = 0
25
+ end
26
+ end
27
+
28
+ class << self
29
+
30
+ # Smooth sort
31
+ # Comparison sort
32
+ # Selection
33
+ # Unstable
34
+ # Time complexity: Ω(n), Ө(nlogn), O(nlogn)
35
+ # Space complexity: O(n) total, O(1) auxiliary
36
+ # Complicated, slow than heap sort in average and worse case
37
+ def sort!(data)
38
+ max_i = data.size - 1
39
+ helper = HeapHelper.new
40
+ build_leonardo_heap data, max_i, helper
41
+ max_i.downto(1) do |i|
42
+ leonardo_heap_remove(data, i, helper)
43
+ end
44
+ nil
45
+ end
46
+
47
+ protected
48
+ def build_leonardo_heap(data, last_i, helper)
49
+ (0..last_i).each do |i|
50
+ if helper.bit_trees == 0 #init
51
+ helper.smallest_heap_size = 1
52
+ helper.bit_trees = 1
53
+ elsif helper.bit_trees & 0b11 == 0b11 #The last two trees have adjacent order
54
+ helper.bit_trees >>= 2
55
+ helper.smallest_heap_size += 2
56
+ helper.bit_trees |= 1
57
+ elsif helper.smallest_heap_size == 1
58
+ helper.bit_trees <<= 1
59
+ helper.smallest_heap_size = 0
60
+ helper.bit_trees |= 1
61
+ else
62
+ helper.bit_trees <<= (helper.smallest_heap_size - 1)
63
+ helper.smallest_heap_size = 1
64
+ helper.bit_trees |= 1
65
+ end
66
+ is_last = case helper.smallest_heap_size
67
+ when 0
68
+ i == last_i
69
+ when 1
70
+ (i == last_i) || (i + 1 == last_i && helper.bit_trees & 0b10 == 0)
71
+ else
72
+ last_i - i < LEONARDO_NUMBERS[helper.smallest_heap_size-1] + 1
73
+ end
74
+ if is_last
75
+ rectify_heaps data, i, helper.dup
76
+ else
77
+ rebalance_single_heap data, i, helper.smallest_heap_size
78
+ end
79
+ end
80
+ end
81
+
82
+ def leonardo_heap_remove(data, i, helper)
83
+ if helper.smallest_heap_size <= 1
84
+ begin
85
+ helper.bit_trees >>= 1
86
+ helper.smallest_heap_size += 1
87
+ end while helper.bit_trees > 0 && helper.bit_trees & 1 == 0
88
+ return
89
+ end
90
+ heap_size = helper.smallest_heap_size
91
+ helper.bit_trees &= LAST_ZERO
92
+ helper.bit_trees <<= 2
93
+ helper.bit_trees |= 0b11
94
+ helper.smallest_heap_size -= 2
95
+ left_child = left_child(i, heap_size)
96
+ right_child = right_child(i)
97
+
98
+ all_but_last_helper = helper.dup
99
+ all_but_last_helper.smallest_heap_size += 1
100
+ all_but_last_helper.bit_trees >>= 1
101
+ rectify_heaps data, left_child, all_but_last_helper
102
+ rectify_heaps data, right_child, helper.dup
103
+ end
104
+
105
+ def rebalance_single_heap(data, root, size)
106
+ while size > 1
107
+ left_child = left_child root, size
108
+ right_child = right_child root
109
+ larger_child = nil
110
+ child_size = nil
111
+ if data[left_child] > data[right_child]
112
+ larger_child = left_child
113
+ child_size = size - 1
114
+ else
115
+ larger_child = right_child
116
+ child_size = size - 2
117
+ end
118
+ if data[root] < data[larger_child]
119
+ data[root], data[larger_child] = data[larger_child], data[root]
120
+ root = larger_child
121
+ size = child_size
122
+ else
123
+ break
124
+ end
125
+ end
126
+ end
127
+
128
+ def rectify_heaps(data, i, helper)
129
+ j = i
130
+ while true
131
+ last_heap_size = helper.smallest_heap_size
132
+ break if j == LEONARDO_NUMBERS[last_heap_size] - 1
133
+ to_compare = j
134
+ if last_heap_size > 1
135
+ larger_child = larger_child(data, j, last_heap_size)
136
+ to_compare = larger_child if data[larger_child] > to_compare
137
+ end
138
+ prior_heap = j - LEONARDO_NUMBERS[last_heap_size]
139
+ break if data[to_compare] > data[prior_heap]
140
+ data[prior_heap], data[j] = data[j], data[prior_heap] if data[j] < data[prior_heap]
141
+ j = prior_heap
142
+ begin
143
+ helper.bit_trees >>= 1
144
+ helper.smallest_heap_size += 1
145
+ end while helper.bit_trees & 1 == 0
146
+ end
147
+ rebalance_single_heap data, j, last_heap_size
148
+ end
149
+
150
+ def left_child(root, size)
151
+ right_child(root) - LEONARDO_NUMBERS[size-2]
152
+ end
153
+
154
+ def right_child(root)
155
+ root - 1
156
+ end
157
+
158
+ def larger_child(data, root, size)
159
+ left_child = left_child(root, size)
160
+ right_child = right_child(root)
161
+ data[left_child] > data[right_child] ? left_child : right_child
162
+ end
163
+
164
+ end
165
+
166
+ end
167
+ end
168
+
169
+ if __FILE__ == $0
170
+ require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
171
+ Sorting::TestHelper.test __FILE__
172
+ end
@@ -0,0 +1,90 @@
1
+ require 'benchmark'
2
+ module Sorting
3
+ class TestHelper
4
+
5
+ def self.get_sample_data(size=10, shuffle=true)
6
+ data = (1..size).to_a
7
+ shuffle ? data.shuffle : data
8
+ end
9
+
10
+ def self.sort(clazz, data)
11
+ if clazz.respond_to? :sort!
12
+ clazz.sort! data
13
+ else
14
+ data = clazz.sort data
15
+ end
16
+ data
17
+ end
18
+
19
+ def self.test(filename, benchmark_data_size=nil)
20
+ data = get_sample_data
21
+ clazz = eval("Sorting::#{File.basename(filename).split("_")[0].capitalize}Sort")
22
+ benchmark_data_size ||= clazz::TEST_DATA_SIZE
23
+ return if benchmark_data_size.nil?
24
+ is_ext = clazz.const_defined?("IS_EXT") && clazz::IS_EXT ? "(C ext)" : ""
25
+ puts "#{clazz}#{is_ext} - before sort: #{data.inspect}"
26
+ data = sort(clazz, data)
27
+ puts "#{clazz}#{is_ext} - after sort: #{data.inspect}"
28
+
29
+ sample_data = get_sample_data benchmark_data_size, false
30
+ benchmark_data = sample_data.dup
31
+ result = Benchmark.measure do
32
+ benchmark_data = sort(clazz, benchmark_data)
33
+ #10.times {sort(clazz, benchmark_data.dup)} #notice, you should dup sample data for multiple test
34
+ end
35
+ diff = diff(benchmark_data, sample_data.sort)
36
+ raise "sort wrong for #{sample_data.inspect}, diff: #{diff.inspect}" unless diff.empty?
37
+ puts "#{sample_data.size} ordered number cost"
38
+ puts result
39
+
40
+ sample_data = get_sample_data benchmark_data_size
41
+ benchmark_data = sample_data.dup
42
+ result = Benchmark.measure do
43
+ benchmark_data = sort(clazz, benchmark_data)
44
+ #10.times {sort(clazz, benchmark_data.dup)} #notice, you should dup sample data for multiple test
45
+ end
46
+ diff = diff(benchmark_data, sample_data.sort)
47
+ raise "sort wrong for #{sample_data.inspect}, diff: #{diff.inspect}" unless diff.empty?
48
+ puts "#{sample_data.size} ordered number cost"
49
+ puts result
50
+ end
51
+
52
+ def self.test_original_sort(benchmark_data_size=nil)
53
+ benchmark_data_size ||= 100_000
54
+ sample_data = get_sample_data benchmark_data_size, false
55
+ benchmark_data = sample_data.dup
56
+ result = Benchmark.measure do
57
+ benchmark_data.sort!
58
+ end
59
+ puts "Array.sort: #{sample_data.size} ordered number cost"
60
+ puts result
61
+
62
+ sample_data = get_sample_data benchmark_data_size
63
+ benchmark_data = sample_data.dup
64
+ result = Benchmark.measure do
65
+ benchmark_data.sort!
66
+ end
67
+ puts "Array.sort: #{sample_data.size} ordered number cost"
68
+ puts result
69
+ end
70
+
71
+ def self.diff(a, b)
72
+ diff = {}
73
+ a.each_with_index do |x, i|
74
+ y = b[i]
75
+ diff[i] = [x, y] if x != y
76
+ end
77
+ diff
78
+ end
79
+
80
+ end
81
+ end
82
+ if __FILE__ == $0
83
+ Sorting::TestHelper.test_original_sort 100_000
84
+ dir = File.dirname(File.realpath(__FILE__))
85
+ Dir.foreach(dir).grep /sort/ do |file|
86
+ require File.join(dir, file)
87
+ Sorting::TestHelper.test file
88
+ puts "\n"
89
+ end
90
+ end