ralgorithms 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/sorting/extconf.rb +8 -0
- data/ext/sorting/sorting_common.c +401 -0
- data/ext/sorting/sorting_ext.c +112 -0
- data/lib/searching/skip_list.rb +135 -0
- data/lib/sorting/bubble_sort.rb +36 -0
- data/lib/sorting/heap_sort.rb +65 -0
- data/lib/sorting/helper.rb +11 -0
- data/lib/sorting/insertion_sort.rb +73 -0
- data/lib/sorting/intro_sort.rb +72 -0
- data/lib/sorting/library_sort.rb +80 -0
- data/lib/sorting/load_ext.rb +5 -0
- data/lib/sorting/merge_sort.rb +74 -0
- data/lib/sorting/quick_sort.rb +67 -0
- data/lib/sorting/selection_sort.rb +33 -0
- data/lib/sorting/shell_sort.rb +45 -0
- data/lib/sorting/smooth_sort.rb +172 -0
- data/lib/sorting/test_helper.rb +90 -0
- data/lib/sorting/tim_sort.rb +21 -0
- metadata +21 -2
@@ -0,0 +1,74 @@
|
|
1
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "insertion_sort")
|
2
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "helper")
|
3
|
+
module Sorting
|
4
|
+
class MergeSort
|
5
|
+
extend Helper
|
6
|
+
TEST_DATA_SIZE=100_000
|
7
|
+
SIZE_FOR_INSERTION=15
|
8
|
+
|
9
|
+
# Merge sort
|
10
|
+
# Comparison sort
|
11
|
+
# Merging
|
12
|
+
# Stable
|
13
|
+
# Time complexity: O(nlogn), Ө(nlogn), O(nlogn)
|
14
|
+
# Space complexity: O(n)
|
15
|
+
# Support parallelization
|
16
|
+
# Better for linked list
|
17
|
+
# Efficient at handling slow-to-access sequential media
|
18
|
+
def self.sort(data)
|
19
|
+
if data.size < SIZE_FOR_INSERTION
|
20
|
+
data = Sorting::InsertionSort.sort(data)
|
21
|
+
return data
|
22
|
+
end
|
23
|
+
mid = data.size / 2
|
24
|
+
left = data[0...mid]
|
25
|
+
right = data[mid...data.size]
|
26
|
+
merge(sort(left), sort(right))
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def self.merge(left, right)
|
32
|
+
if left.empty? || right.empty?
|
33
|
+
return left.empty? ? right : left
|
34
|
+
end
|
35
|
+
left_size = left.size
|
36
|
+
right_size = right.size
|
37
|
+
sorted = []
|
38
|
+
left_i = right_i = 0
|
39
|
+
while true
|
40
|
+
if (left_value = left[left_i]) > (right_value = right[right_i])
|
41
|
+
value = right_value
|
42
|
+
right_i += 1
|
43
|
+
else
|
44
|
+
value = left_value
|
45
|
+
left_i += 1
|
46
|
+
end
|
47
|
+
sorted << value
|
48
|
+
if left_i == left_size
|
49
|
+
sorted += right[right_i..-1]
|
50
|
+
break
|
51
|
+
elsif right_i == right_size
|
52
|
+
sorted += left[left_i..-1]
|
53
|
+
break
|
54
|
+
end
|
55
|
+
end
|
56
|
+
sorted
|
57
|
+
end
|
58
|
+
|
59
|
+
#def self.merge(left, right)
|
60
|
+
# sorted = []
|
61
|
+
# until left.empty? or right.empty?
|
62
|
+
# left.first <= right.first ? sorted << left.shift : sorted << right.shift
|
63
|
+
# end
|
64
|
+
# sorted + left + right
|
65
|
+
#end
|
66
|
+
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if __FILE__ == $0
|
72
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
|
73
|
+
Sorting::TestHelper.test __FILE__
|
74
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "insertion_sort")
|
2
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "helper")
|
3
|
+
module Sorting
|
4
|
+
class QuickSort
|
5
|
+
extend Helper
|
6
|
+
TEST_DATA_SIZE=100_000
|
7
|
+
SIZE_FOR_INSERTION=12
|
8
|
+
|
9
|
+
# Quick sort
|
10
|
+
# Comparison sort
|
11
|
+
# Partitioning
|
12
|
+
# Unstable(in efficient implementations)
|
13
|
+
# Time complexity: O(nlogn), Ө(nlogn), O(n2)
|
14
|
+
# Space complexity: O(logn)
|
15
|
+
# Support parallelization
|
16
|
+
# Median-of-3 killer
|
17
|
+
def self.sort!(data)
|
18
|
+
quicksort(data)
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
|
24
|
+
def self.quicksort(data, p=0, r=data.size-1)
|
25
|
+
while (distance = (r - p)) > 0
|
26
|
+
#optimization by insertion sort
|
27
|
+
if distance < SIZE_FOR_INSERTION
|
28
|
+
Sorting::InsertionSort.sort!(data, p, r)
|
29
|
+
return
|
30
|
+
end
|
31
|
+
q = partition(data, p, r)
|
32
|
+
quicksort(data, q+1, r)
|
33
|
+
r = q - 1
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.partition(data, p, r)
|
38
|
+
median_of_3 data, p, r
|
39
|
+
pivot = data[r]
|
40
|
+
i = p - 1
|
41
|
+
p.upto(r-1) do |j|
|
42
|
+
if data[j] <= pivot
|
43
|
+
i = i+1
|
44
|
+
data[i], data[j] = data[j],data[i]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
data[i+1],data[r] = data[r],data[i+1]
|
48
|
+
return i + 1
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.median_of_3(data, p, r)
|
52
|
+
m = p + (r-p)/2
|
53
|
+
pivot_candidates_hash = {data[p] => p, data[m] => m, data[r] => r}
|
54
|
+
pivot_candidates = pivot_candidates_hash.keys
|
55
|
+
pivot = (pivot_candidates - pivot_candidates.minmax)[0] || pivot_candidates.last
|
56
|
+
pivot_index = pivot_candidates_hash[pivot]
|
57
|
+
data[pivot_index], data[r] = data[r], pivot if pivot_index != r
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
if __FILE__ == $0
|
64
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
|
65
|
+
Sorting::TestHelper.test __FILE__
|
66
|
+
end
|
67
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "helper")
|
2
|
+
module Sorting
|
3
|
+
class SelectionSort
|
4
|
+
extend Helper
|
5
|
+
TEST_DATA_SIZE=1000
|
6
|
+
|
7
|
+
# Selection sort
|
8
|
+
# Comparison sort
|
9
|
+
# Selection
|
10
|
+
# Unstable
|
11
|
+
# Time complexity: Ω(n2), Ө(n2), O(n2)
|
12
|
+
# Space complexity: O(n) total, O(1) auxiliary
|
13
|
+
# Suitable for small arrays (10-20) or write heavy situation
|
14
|
+
# Family: Heap sort, Smooth sort
|
15
|
+
def self.sort!(data)
|
16
|
+
max_i = data.size - 1
|
17
|
+
(0...max_i).each do |i|
|
18
|
+
min_index = i
|
19
|
+
((i+1)..max_i).each do |j|
|
20
|
+
min_index = j if data[j] < data[min_index]
|
21
|
+
end
|
22
|
+
data[i], data[min_index] = data[min_index], data[i]
|
23
|
+
end
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if __FILE__ == $0
|
31
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
|
32
|
+
Sorting::TestHelper.test __FILE__
|
33
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "helper")
|
2
|
+
module Sorting
|
3
|
+
class ShellSort
|
4
|
+
extend Helper
|
5
|
+
TEST_DATA_SIZE=100_000
|
6
|
+
GAP_FACTOR = 2.35
|
7
|
+
GAP_REQUIRED_THRESHOLD = 10
|
8
|
+
|
9
|
+
# Shell sort
|
10
|
+
# Comparison sort
|
11
|
+
# Insertion
|
12
|
+
# Unstable
|
13
|
+
# Time complexity: depends on gap sequence, best known is O(n(logn)2)
|
14
|
+
# Space complexity: O(1)
|
15
|
+
# Simple, fast
|
16
|
+
# Empirical gap [1750, 701, 301, 132, 57, 23, 10, 4, 1]
|
17
|
+
def self.sort!(data)
|
18
|
+
#see http://oeis.org/A102549
|
19
|
+
gaps = [1, 4, 10, 23, 57, 132, 301, 701, 1750].reverse
|
20
|
+
data_size = data.size
|
21
|
+
expect_gap = data_size / GAP_REQUIRED_THRESHOLD
|
22
|
+
while expect_gap > gaps.first
|
23
|
+
gaps.unshift (gaps.first*GAP_FACTOR).to_i
|
24
|
+
end
|
25
|
+
gaps.each do |gap|
|
26
|
+
(gap...data_size).each do |i|
|
27
|
+
value = data[i]
|
28
|
+
j = i
|
29
|
+
while j >= gap && value < (previous = data[j-gap])
|
30
|
+
data[j] = previous
|
31
|
+
j -= gap
|
32
|
+
end
|
33
|
+
data[j] = value
|
34
|
+
end
|
35
|
+
end
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
if __FILE__ == $0
|
43
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
|
44
|
+
Sorting::TestHelper.test __FILE__
|
45
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#http://www.keithschwarz.com/smoothsort/
|
2
|
+
#http://www.keithschwarz.com/interesting/code/?dir=smoothsort
|
3
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "helper")
|
4
|
+
module Sorting
|
5
|
+
class SmoothSort
|
6
|
+
extend Helper
|
7
|
+
TEST_DATA_SIZE=100_000
|
8
|
+
LEONARDO_NUMBERS = [
|
9
|
+
1, 1, 3, 5, 9, 15, 25, 41, 67, 109, 177, 287, 465, 753,
|
10
|
+
1219, 1973, 3193, 5167, 8361, 13529, 21891, 35421, 57313, 92735,
|
11
|
+
150049, 242785, 392835, 635621, 1028457, 1664079, 2692537,
|
12
|
+
4356617, 7049155, 11405773, 18454929, 29860703, 48315633, 78176337,
|
13
|
+
126491971, 204668309, 331160281, 535828591, 866988873, 1402817465,
|
14
|
+
2269806339, 3672623805
|
15
|
+
]
|
16
|
+
LEONARDO_MAPPING = Hash[LEONARDO_NUMBERS.each_with_index.to_a]
|
17
|
+
LAST_ZERO = 2**32 - 2
|
18
|
+
|
19
|
+
class HeapHelper
|
20
|
+
attr_accessor :bit_trees
|
21
|
+
attr_accessor :smallest_heap_size
|
22
|
+
def initialize
|
23
|
+
@bit_trees = 0
|
24
|
+
@smallest_heap_size = 0
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class << self
|
29
|
+
|
30
|
+
# Smooth sort
|
31
|
+
# Comparison sort
|
32
|
+
# Selection
|
33
|
+
# Unstable
|
34
|
+
# Time complexity: Ω(n), Ө(nlogn), O(nlogn)
|
35
|
+
# Space complexity: O(n) total, O(1) auxiliary
|
36
|
+
# Complicated, slow than heap sort in average and worse case
|
37
|
+
def sort!(data)
|
38
|
+
max_i = data.size - 1
|
39
|
+
helper = HeapHelper.new
|
40
|
+
build_leonardo_heap data, max_i, helper
|
41
|
+
max_i.downto(1) do |i|
|
42
|
+
leonardo_heap_remove(data, i, helper)
|
43
|
+
end
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
protected
|
48
|
+
def build_leonardo_heap(data, last_i, helper)
|
49
|
+
(0..last_i).each do |i|
|
50
|
+
if helper.bit_trees == 0 #init
|
51
|
+
helper.smallest_heap_size = 1
|
52
|
+
helper.bit_trees = 1
|
53
|
+
elsif helper.bit_trees & 0b11 == 0b11 #The last two trees have adjacent order
|
54
|
+
helper.bit_trees >>= 2
|
55
|
+
helper.smallest_heap_size += 2
|
56
|
+
helper.bit_trees |= 1
|
57
|
+
elsif helper.smallest_heap_size == 1
|
58
|
+
helper.bit_trees <<= 1
|
59
|
+
helper.smallest_heap_size = 0
|
60
|
+
helper.bit_trees |= 1
|
61
|
+
else
|
62
|
+
helper.bit_trees <<= (helper.smallest_heap_size - 1)
|
63
|
+
helper.smallest_heap_size = 1
|
64
|
+
helper.bit_trees |= 1
|
65
|
+
end
|
66
|
+
is_last = case helper.smallest_heap_size
|
67
|
+
when 0
|
68
|
+
i == last_i
|
69
|
+
when 1
|
70
|
+
(i == last_i) || (i + 1 == last_i && helper.bit_trees & 0b10 == 0)
|
71
|
+
else
|
72
|
+
last_i - i < LEONARDO_NUMBERS[helper.smallest_heap_size-1] + 1
|
73
|
+
end
|
74
|
+
if is_last
|
75
|
+
rectify_heaps data, i, helper.dup
|
76
|
+
else
|
77
|
+
rebalance_single_heap data, i, helper.smallest_heap_size
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def leonardo_heap_remove(data, i, helper)
|
83
|
+
if helper.smallest_heap_size <= 1
|
84
|
+
begin
|
85
|
+
helper.bit_trees >>= 1
|
86
|
+
helper.smallest_heap_size += 1
|
87
|
+
end while helper.bit_trees > 0 && helper.bit_trees & 1 == 0
|
88
|
+
return
|
89
|
+
end
|
90
|
+
heap_size = helper.smallest_heap_size
|
91
|
+
helper.bit_trees &= LAST_ZERO
|
92
|
+
helper.bit_trees <<= 2
|
93
|
+
helper.bit_trees |= 0b11
|
94
|
+
helper.smallest_heap_size -= 2
|
95
|
+
left_child = left_child(i, heap_size)
|
96
|
+
right_child = right_child(i)
|
97
|
+
|
98
|
+
all_but_last_helper = helper.dup
|
99
|
+
all_but_last_helper.smallest_heap_size += 1
|
100
|
+
all_but_last_helper.bit_trees >>= 1
|
101
|
+
rectify_heaps data, left_child, all_but_last_helper
|
102
|
+
rectify_heaps data, right_child, helper.dup
|
103
|
+
end
|
104
|
+
|
105
|
+
def rebalance_single_heap(data, root, size)
|
106
|
+
while size > 1
|
107
|
+
left_child = left_child root, size
|
108
|
+
right_child = right_child root
|
109
|
+
larger_child = nil
|
110
|
+
child_size = nil
|
111
|
+
if data[left_child] > data[right_child]
|
112
|
+
larger_child = left_child
|
113
|
+
child_size = size - 1
|
114
|
+
else
|
115
|
+
larger_child = right_child
|
116
|
+
child_size = size - 2
|
117
|
+
end
|
118
|
+
if data[root] < data[larger_child]
|
119
|
+
data[root], data[larger_child] = data[larger_child], data[root]
|
120
|
+
root = larger_child
|
121
|
+
size = child_size
|
122
|
+
else
|
123
|
+
break
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def rectify_heaps(data, i, helper)
|
129
|
+
j = i
|
130
|
+
while true
|
131
|
+
last_heap_size = helper.smallest_heap_size
|
132
|
+
break if j == LEONARDO_NUMBERS[last_heap_size] - 1
|
133
|
+
to_compare = j
|
134
|
+
if last_heap_size > 1
|
135
|
+
larger_child = larger_child(data, j, last_heap_size)
|
136
|
+
to_compare = larger_child if data[larger_child] > to_compare
|
137
|
+
end
|
138
|
+
prior_heap = j - LEONARDO_NUMBERS[last_heap_size]
|
139
|
+
break if data[to_compare] > data[prior_heap]
|
140
|
+
data[prior_heap], data[j] = data[j], data[prior_heap] if data[j] < data[prior_heap]
|
141
|
+
j = prior_heap
|
142
|
+
begin
|
143
|
+
helper.bit_trees >>= 1
|
144
|
+
helper.smallest_heap_size += 1
|
145
|
+
end while helper.bit_trees & 1 == 0
|
146
|
+
end
|
147
|
+
rebalance_single_heap data, j, last_heap_size
|
148
|
+
end
|
149
|
+
|
150
|
+
def left_child(root, size)
|
151
|
+
right_child(root) - LEONARDO_NUMBERS[size-2]
|
152
|
+
end
|
153
|
+
|
154
|
+
def right_child(root)
|
155
|
+
root - 1
|
156
|
+
end
|
157
|
+
|
158
|
+
def larger_child(data, root, size)
|
159
|
+
left_child = left_child(root, size)
|
160
|
+
right_child = right_child(root)
|
161
|
+
data[left_child] > data[right_child] ? left_child : right_child
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
if __FILE__ == $0
|
170
|
+
require File.join(File.dirname(File.realpath(__FILE__)), "test_helper")
|
171
|
+
Sorting::TestHelper.test __FILE__
|
172
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
module Sorting
|
3
|
+
class TestHelper
|
4
|
+
|
5
|
+
def self.get_sample_data(size=10, shuffle=true)
|
6
|
+
data = (1..size).to_a
|
7
|
+
shuffle ? data.shuffle : data
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.sort(clazz, data)
|
11
|
+
if clazz.respond_to? :sort!
|
12
|
+
clazz.sort! data
|
13
|
+
else
|
14
|
+
data = clazz.sort data
|
15
|
+
end
|
16
|
+
data
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.test(filename, benchmark_data_size=nil)
|
20
|
+
data = get_sample_data
|
21
|
+
clazz = eval("Sorting::#{File.basename(filename).split("_")[0].capitalize}Sort")
|
22
|
+
benchmark_data_size ||= clazz::TEST_DATA_SIZE
|
23
|
+
return if benchmark_data_size.nil?
|
24
|
+
is_ext = clazz.const_defined?("IS_EXT") && clazz::IS_EXT ? "(C ext)" : ""
|
25
|
+
puts "#{clazz}#{is_ext} - before sort: #{data.inspect}"
|
26
|
+
data = sort(clazz, data)
|
27
|
+
puts "#{clazz}#{is_ext} - after sort: #{data.inspect}"
|
28
|
+
|
29
|
+
sample_data = get_sample_data benchmark_data_size, false
|
30
|
+
benchmark_data = sample_data.dup
|
31
|
+
result = Benchmark.measure do
|
32
|
+
benchmark_data = sort(clazz, benchmark_data)
|
33
|
+
#10.times {sort(clazz, benchmark_data.dup)} #notice, you should dup sample data for multiple test
|
34
|
+
end
|
35
|
+
diff = diff(benchmark_data, sample_data.sort)
|
36
|
+
raise "sort wrong for #{sample_data.inspect}, diff: #{diff.inspect}" unless diff.empty?
|
37
|
+
puts "#{sample_data.size} ordered number cost"
|
38
|
+
puts result
|
39
|
+
|
40
|
+
sample_data = get_sample_data benchmark_data_size
|
41
|
+
benchmark_data = sample_data.dup
|
42
|
+
result = Benchmark.measure do
|
43
|
+
benchmark_data = sort(clazz, benchmark_data)
|
44
|
+
#10.times {sort(clazz, benchmark_data.dup)} #notice, you should dup sample data for multiple test
|
45
|
+
end
|
46
|
+
diff = diff(benchmark_data, sample_data.sort)
|
47
|
+
raise "sort wrong for #{sample_data.inspect}, diff: #{diff.inspect}" unless diff.empty?
|
48
|
+
puts "#{sample_data.size} ordered number cost"
|
49
|
+
puts result
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.test_original_sort(benchmark_data_size=nil)
|
53
|
+
benchmark_data_size ||= 100_000
|
54
|
+
sample_data = get_sample_data benchmark_data_size, false
|
55
|
+
benchmark_data = sample_data.dup
|
56
|
+
result = Benchmark.measure do
|
57
|
+
benchmark_data.sort!
|
58
|
+
end
|
59
|
+
puts "Array.sort: #{sample_data.size} ordered number cost"
|
60
|
+
puts result
|
61
|
+
|
62
|
+
sample_data = get_sample_data benchmark_data_size
|
63
|
+
benchmark_data = sample_data.dup
|
64
|
+
result = Benchmark.measure do
|
65
|
+
benchmark_data.sort!
|
66
|
+
end
|
67
|
+
puts "Array.sort: #{sample_data.size} ordered number cost"
|
68
|
+
puts result
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.diff(a, b)
|
72
|
+
diff = {}
|
73
|
+
a.each_with_index do |x, i|
|
74
|
+
y = b[i]
|
75
|
+
diff[i] = [x, y] if x != y
|
76
|
+
end
|
77
|
+
diff
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
if __FILE__ == $0
|
83
|
+
Sorting::TestHelper.test_original_sort 100_000
|
84
|
+
dir = File.dirname(File.realpath(__FILE__))
|
85
|
+
Dir.foreach(dir).grep /sort/ do |file|
|
86
|
+
require File.join(dir, file)
|
87
|
+
Sorting::TestHelper.test file
|
88
|
+
puts "\n"
|
89
|
+
end
|
90
|
+
end
|