amorim-algorithms 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.markdown +193 -0
- data/Gemfile +9 -0
- data/Manifest +51 -0
- data/README.markdown +87 -0
- data/Rakefile +22 -0
- data/algorithms.gemspec +23 -0
- data/benchmarks/deque.rb +17 -0
- data/benchmarks/sorts.rb +34 -0
- data/benchmarks/treemaps.rb +51 -0
- data/ext/algorithms/string/extconf.rb +4 -0
- data/ext/algorithms/string/string.c +68 -0
- data/ext/containers/bst/bst.c +247 -0
- data/ext/containers/bst/extconf.rb +4 -0
- data/ext/containers/deque/deque.c +247 -0
- data/ext/containers/deque/extconf.rb +4 -0
- data/ext/containers/rbtree_map/extconf.rb +4 -0
- data/ext/containers/rbtree_map/rbtree.c +498 -0
- data/ext/containers/splaytree_map/extconf.rb +4 -0
- data/ext/containers/splaytree_map/splaytree.c +419 -0
- data/lib/algorithms.rb +66 -0
- data/lib/algorithms/search.rb +84 -0
- data/lib/algorithms/sort.rb +368 -0
- data/lib/algorithms/string.rb +9 -0
- data/lib/containers/deque.rb +171 -0
- data/lib/containers/heap.rb +499 -0
- data/lib/containers/kd_tree.rb +110 -0
- data/lib/containers/priority_queue.rb +113 -0
- data/lib/containers/queue.rb +68 -0
- data/lib/containers/rb_tree_map.rb +398 -0
- data/lib/containers/splay_tree_map.rb +269 -0
- data/lib/containers/stack.rb +67 -0
- data/lib/containers/suffix_array.rb +68 -0
- data/lib/containers/trie.rb +182 -0
- data/spec/bst_gc_mark_spec.rb +25 -0
- data/spec/bst_spec.rb +25 -0
- data/spec/deque_gc_mark_spec.rb +18 -0
- data/spec/deque_spec.rb +108 -0
- data/spec/heap_spec.rb +131 -0
- data/spec/kd_expected_out.txt +10000 -0
- data/spec/kd_test_in.txt +10000 -0
- data/spec/kd_tree_spec.rb +34 -0
- data/spec/map_gc_mark_spec.rb +29 -0
- data/spec/priority_queue_spec.rb +75 -0
- data/spec/queue_spec.rb +61 -0
- data/spec/rb_tree_map_spec.rb +123 -0
- data/spec/search_spec.rb +28 -0
- data/spec/sort_spec.rb +29 -0
- data/spec/splay_tree_map_spec.rb +106 -0
- data/spec/stack_spec.rb +60 -0
- data/spec/string_spec.rb +15 -0
- data/spec/suffix_array_spec.rb +40 -0
- data/spec/trie_spec.rb +59 -0
- metadata +108 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
This module implements search algorithms. Documentation is provided for each algorithm.
|
3
|
+
|
4
|
+
=end
|
5
|
+
module Algorithms::Search
|
6
|
+
# Binary Search: This search finds an item in log(n) time provided that the container is already sorted.
|
7
|
+
# The method returns the item if it is found, or nil if it is not. If there are duplicates, the first one
|
8
|
+
# found is returned, and this is not guaranteed to be the smallest or largest item.
|
9
|
+
#
|
10
|
+
# Complexity: O(lg N)
|
11
|
+
#
|
12
|
+
# Algorithms::Search.binary_search([1, 2, 3], 1) #=> 1
|
13
|
+
# Algorithms::Search.binary_search([1, 2, 3], 4) #=> nil
|
14
|
+
def self.binary_search(container, item)
|
15
|
+
return nil if item.nil?
|
16
|
+
low = 0
|
17
|
+
high = container.size - 1
|
18
|
+
while low <= high
|
19
|
+
mid = (low + high) / 2
|
20
|
+
val = container[mid]
|
21
|
+
if val > item
|
22
|
+
high = mid - 1
|
23
|
+
elsif val < item
|
24
|
+
low = mid + 1
|
25
|
+
else
|
26
|
+
return val
|
27
|
+
end
|
28
|
+
end
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
# Knuth-Morris-Pratt Algorithm substring search algorithm: Efficiently finds the starting position of a
|
33
|
+
# substring in a string. The algorithm calculates the best position to resume searching from if a failure
|
34
|
+
# occurs.
|
35
|
+
#
|
36
|
+
# The method returns the index of the starting position in the string where the substring is found. If there
|
37
|
+
# is no match, nil is returned.
|
38
|
+
#
|
39
|
+
# Complexity: O(n + k), where n is the length of the string and k is the length of the substring.
|
40
|
+
#
|
41
|
+
# Algorithms::Search.kmp_search("ABC ABCDAB ABCDABCDABDE", "ABCDABD") #=> 15
|
42
|
+
# Algorithms::Search.kmp_search("ABC ABCDAB ABCDABCDABDE", "ABCDEF") #=> nil
|
43
|
+
def self.kmp_search(string, substring)
|
44
|
+
return nil if string.nil? or substring.nil?
|
45
|
+
|
46
|
+
# create failure function table
|
47
|
+
pos = 2
|
48
|
+
cnd = 0
|
49
|
+
failure_table = [-1, 0]
|
50
|
+
while pos < substring.length
|
51
|
+
if substring[pos - 1] == substring[cnd]
|
52
|
+
failure_table[pos] = cnd + 1
|
53
|
+
pos += 1
|
54
|
+
cnd += 1
|
55
|
+
elsif cnd > 0
|
56
|
+
cnd = failure_table[cnd]
|
57
|
+
else
|
58
|
+
failure_table[pos] = 0
|
59
|
+
pos += 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
m = i = 0
|
64
|
+
while m + i < string.length
|
65
|
+
if substring[i] == string[m + i]
|
66
|
+
i += 1
|
67
|
+
return m if i == substring.length
|
68
|
+
else
|
69
|
+
m = m + i - failure_table[i]
|
70
|
+
i = failure_table[i] if i > 0
|
71
|
+
end
|
72
|
+
end
|
73
|
+
return nil
|
74
|
+
end
|
75
|
+
|
76
|
+
# Allows kmp_search to be called as an instance method in classes that include the Search module.
|
77
|
+
#
|
78
|
+
# class String; include Algorithms::Search; end
|
79
|
+
# "ABC ABCDAB ABCDABCDABDE".kmp_search("ABCDABD") #=> 15
|
80
|
+
def kmp_search(substring)
|
81
|
+
Algorithms::Search.kmp_search(self, substring)
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,368 @@
|
|
1
|
+
require 'containers/heap' # for heapsort
|
2
|
+
|
3
|
+
=begin rdoc
|
4
|
+
This module implements sorting algorithms. Documentation is provided for each algorithm.
|
5
|
+
|
6
|
+
=end
|
7
|
+
module Algorithms::Sort
|
8
|
+
# Bubble sort: A very naive sort that keeps swapping elements until the container is sorted.
|
9
|
+
# Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
|
10
|
+
# be implemented for the container.
|
11
|
+
# Time Complexity: О(n^2)
|
12
|
+
# Space Complexity: О(n) total, O(1) auxiliary
|
13
|
+
# Stable: Yes
|
14
|
+
#
|
15
|
+
# Algorithms::Sort.bubble_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
16
|
+
def self.bubble_sort(container)
|
17
|
+
loop do
|
18
|
+
swapped = false
|
19
|
+
(container.size-1).times do |i|
|
20
|
+
if (container[i] <=> container[i+1]) == 1
|
21
|
+
container[i], container[i+1] = container[i+1], container[i] # Swap
|
22
|
+
swapped = true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
break unless swapped
|
26
|
+
end
|
27
|
+
container
|
28
|
+
end
|
29
|
+
|
30
|
+
# Comb sort: A variation on bubble sort that dramatically improves performance.
|
31
|
+
# Source: http://yagni.com/combsort/
|
32
|
+
# Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
|
33
|
+
# be implemented for the container.
|
34
|
+
# Time Complexity: О(n^2)
|
35
|
+
# Space Complexity: О(n) total, O(1) auxiliary
|
36
|
+
# Stable: Yes
|
37
|
+
#
|
38
|
+
# Algorithms::Sort.comb_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
39
|
+
def self.comb_sort(container)
|
40
|
+
container
|
41
|
+
gap = container.size
|
42
|
+
loop do
|
43
|
+
gap = gap * 10/13
|
44
|
+
gap = 11 if gap == 9 || gap == 10
|
45
|
+
gap = 1 if gap < 1
|
46
|
+
swapped = false
|
47
|
+
(container.size - gap).times do |i|
|
48
|
+
if (container[i] <=> container[i + gap]) == 1
|
49
|
+
container[i], container[i+gap] = container[i+gap], container[i] # Swap
|
50
|
+
swapped = true
|
51
|
+
end
|
52
|
+
end
|
53
|
+
break if !swapped && gap == 1
|
54
|
+
end
|
55
|
+
container
|
56
|
+
end
|
57
|
+
|
58
|
+
# Selection sort: A naive sort that goes through the container and selects the smallest element,
|
59
|
+
# putting it at the beginning. Repeat until the end is reached.
|
60
|
+
# Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
|
61
|
+
# be implemented for the container.
|
62
|
+
# Time Complexity: О(n^2)
|
63
|
+
# Space Complexity: О(n) total, O(1) auxiliary
|
64
|
+
# Stable: Yes
|
65
|
+
#
|
66
|
+
# Algorithms::Sort.selection_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
67
|
+
def self.selection_sort(container)
|
68
|
+
0.upto(container.size-1) do |i|
|
69
|
+
min = i
|
70
|
+
(i+1).upto(container.size-1) do |j|
|
71
|
+
min = j if (container[j] <=> container[min]) == -1
|
72
|
+
end
|
73
|
+
container[i], container[min] = container[min], container[i] # Swap
|
74
|
+
end
|
75
|
+
container
|
76
|
+
end
|
77
|
+
|
78
|
+
# Heap sort: Uses a heap (implemented by the Containers module) to sort the collection.
|
79
|
+
# Requirements: Needs to be able to compare elements with <=>
|
80
|
+
# Time Complexity: О(n^2)
|
81
|
+
# Space Complexity: О(n) total, O(1) auxiliary
|
82
|
+
# Stable: Yes
|
83
|
+
#
|
84
|
+
# Algorithms::Sort.heapsort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
85
|
+
def self.heapsort(container)
|
86
|
+
heap = Containers::Heap.new(container)
|
87
|
+
ary = []
|
88
|
+
ary << heap.pop until heap.empty?
|
89
|
+
ary
|
90
|
+
end
|
91
|
+
|
92
|
+
# Insertion sort: Elements are inserted sequentially into the right position.
|
93
|
+
# Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
|
94
|
+
# be implemented for the container.
|
95
|
+
# Time Complexity: О(n^2)
|
96
|
+
# Space Complexity: О(n) total, O(1) auxiliary
|
97
|
+
# Stable: Yes
|
98
|
+
#
|
99
|
+
# Algorithms::Sort.insertion_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
100
|
+
def self.insertion_sort(container)
|
101
|
+
return container if container.size < 2
|
102
|
+
(1..container.size-1).each do |i|
|
103
|
+
value = container[i]
|
104
|
+
j = i-1
|
105
|
+
while j >= 0 and container[j] > value do
|
106
|
+
container[j+1] = container[j]
|
107
|
+
j = j-1
|
108
|
+
end
|
109
|
+
container[j+1] = value
|
110
|
+
end
|
111
|
+
container
|
112
|
+
end
|
113
|
+
|
114
|
+
# Shell sort: Similar approach as insertion sort but slightly better.
|
115
|
+
# Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
|
116
|
+
# be implemented for the container.
|
117
|
+
# Time Complexity: О(n^2)
|
118
|
+
# Space Complexity: О(n) total, O(1) auxiliary
|
119
|
+
# Stable: Yes
|
120
|
+
#
|
121
|
+
# Algorithms::Sort.shell_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
122
|
+
def self.shell_sort(container)
|
123
|
+
increment = container.size/2
|
124
|
+
while increment > 0 do
|
125
|
+
(increment..container.size-1).each do |i|
|
126
|
+
temp = container[i]
|
127
|
+
j = i
|
128
|
+
while j >= increment && container[j - increment] > temp do
|
129
|
+
container[j] = container[j-increment]
|
130
|
+
j -= increment
|
131
|
+
end
|
132
|
+
container[j] = temp
|
133
|
+
end
|
134
|
+
increment = (increment == 2 ? 1 : (increment / 2.2).round)
|
135
|
+
end
|
136
|
+
container
|
137
|
+
end
|
138
|
+
|
139
|
+
# Quicksort: A divide-and-conquer sort that recursively partitions a container until it is sorted.
|
140
|
+
# Requirements: Container should implement #pop and include the Enumerable module.
|
141
|
+
# Time Complexity: О(n log n) average, O(n^2) worst-case
|
142
|
+
# Space Complexity: О(n) auxiliary
|
143
|
+
# Stable: No
|
144
|
+
#
|
145
|
+
# Algorithms::Sort.quicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
146
|
+
# def self.quicksort(container)
|
147
|
+
# return [] if container.empty?
|
148
|
+
#
|
149
|
+
# x, *xs = container
|
150
|
+
#
|
151
|
+
# quicksort(xs.select { |i| i < x }) + [x] + quicksort(xs.select { |i| i >= x })
|
152
|
+
# end
|
153
|
+
|
154
|
+
def self.partition(data, left, right)
|
155
|
+
pivot = data[front]
|
156
|
+
left += 1
|
157
|
+
|
158
|
+
while left <= right do
|
159
|
+
if data[frontUnknown] < pivot
|
160
|
+
back += 1
|
161
|
+
data[frontUnknown], data[back] = data[back], data[frontUnknown] # Swap
|
162
|
+
end
|
163
|
+
|
164
|
+
frontUnknown += 1
|
165
|
+
end
|
166
|
+
|
167
|
+
data[front], data[back] = data[back], data[front] # Swap
|
168
|
+
back
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
# def self.quicksort(container, left = 0, right = container.size - 1)
|
173
|
+
# if left < right
|
174
|
+
# middle = partition(container, left, right)
|
175
|
+
# quicksort(container, left, middle - 1)
|
176
|
+
# quicksort(container, middle + 1, right)
|
177
|
+
# end
|
178
|
+
# end
|
179
|
+
|
180
|
+
def self.quicksort(container)
|
181
|
+
bottom, top = [], []
|
182
|
+
top[0] = 0
|
183
|
+
bottom[0] = container.size
|
184
|
+
i = 0
|
185
|
+
while i >= 0 do
|
186
|
+
l = top[i]
|
187
|
+
r = bottom[i] - 1;
|
188
|
+
if l < r
|
189
|
+
pivot = container[l]
|
190
|
+
while l < r do
|
191
|
+
r -= 1 while (container[r] >= pivot && l < r)
|
192
|
+
if (l < r)
|
193
|
+
container[l] = container[r]
|
194
|
+
l += 1
|
195
|
+
end
|
196
|
+
l += 1 while (container[l] <= pivot && l < r)
|
197
|
+
if (l < r)
|
198
|
+
container[r] = container[l]
|
199
|
+
r -= 1
|
200
|
+
end
|
201
|
+
end
|
202
|
+
container[l] = pivot
|
203
|
+
top[i+1] = l + 1
|
204
|
+
bottom[i+1] = bottom[i]
|
205
|
+
bottom[i] = l
|
206
|
+
i += 1
|
207
|
+
else
|
208
|
+
i -= 1
|
209
|
+
end
|
210
|
+
end
|
211
|
+
container
|
212
|
+
end
|
213
|
+
|
214
|
+
# Mergesort: A stable divide-and-conquer sort that sorts small chunks of the container and then merges them together.
|
215
|
+
# Returns an array of the sorted elements.
|
216
|
+
# Requirements: Container should implement []
|
217
|
+
# Time Complexity: О(n log n) average and worst-case
|
218
|
+
# Space Complexity: О(n) auxiliary
|
219
|
+
# Stable: Yes
|
220
|
+
#
|
221
|
+
# Algorithms::Sort.mergesort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
222
|
+
def self.mergesort(container)
|
223
|
+
return container if container.size <= 1
|
224
|
+
mid = container.size / 2
|
225
|
+
left = container[0...mid]
|
226
|
+
right = container[mid...container.size]
|
227
|
+
merge(mergesort(left), mergesort(right))
|
228
|
+
end
|
229
|
+
|
230
|
+
def self.merge(left, right)
|
231
|
+
sorted = []
|
232
|
+
until left.empty? or right.empty?
|
233
|
+
left.first <= right.first ? sorted << left.shift : sorted << right.shift
|
234
|
+
end
|
235
|
+
sorted + left + right
|
236
|
+
end
|
237
|
+
|
238
|
+
# Dual-Pivot Quicksort is a variation of Quicksort by Vladimir Yaroslavskiy.
|
239
|
+
# This is an implementation of the algorithm as it was found in the original
|
240
|
+
# research paper:
|
241
|
+
#
|
242
|
+
# http://iaroslavski.narod.ru/quicksort/DualPivotQuicksort.pdf
|
243
|
+
#
|
244
|
+
# Mirror:
|
245
|
+
# http://codeblab.com/wp-content/uploads/2009/09/DualPivotQuicksort.pdf
|
246
|
+
#
|
247
|
+
# "This algorithm offers O(n log(n)) performance on many data sets that cause
|
248
|
+
# other quicksorts to degrade to quadratic performance, and is typically
|
249
|
+
# faster than traditional (one-pivot) Quicksort implementations."
|
250
|
+
# -- http://download.oracle.com/javase/7/docs/api/java/util/Arrays.html
|
251
|
+
#
|
252
|
+
# The algorithm was improved by Vladimir Yaroslavskiy, Jon Bentley, and
|
253
|
+
# Joshua Bloch, and was implemented as the default sort algorithm for
|
254
|
+
# primatives in Java 7.
|
255
|
+
#
|
256
|
+
# Implementation in the Java JDK as of November, 2011:
|
257
|
+
# http://www.docjar.com/html/api/java/util/DualPivotQuicksort.java.html
|
258
|
+
#
|
259
|
+
# It is proved that for the Dual-Pivot Quicksort the average number
|
260
|
+
# of comparisons is 2*n*ln(n), the average number of swaps is
|
261
|
+
# 0.8*n*ln(n), whereas classical Quicksort algorithm has 2*n*ln(n)
|
262
|
+
# and 1*n*ln(n) respectively. This has been fully examined mathematically
|
263
|
+
# and experimentally.
|
264
|
+
#
|
265
|
+
# Requirements: Container should implement #pop and include the Enumerable module.
|
266
|
+
# Time Complexity: О(n log n) average, О(n log n) worst-case
|
267
|
+
# Space Complexity: О(n) auxiliary
|
268
|
+
#
|
269
|
+
# Stable: No
|
270
|
+
#
|
271
|
+
# Algorithms::Sort.dualpivotquicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
272
|
+
|
273
|
+
def self.dualpivotquicksort(container)
|
274
|
+
return container if container.size <= 1
|
275
|
+
dualpivot(container, 0, container.size-1, 3)
|
276
|
+
end
|
277
|
+
|
278
|
+
def self.dualpivot(container, left=0, right=container.size-1, div=3)
|
279
|
+
length = right - left
|
280
|
+
if length < 27 # insertion sort for tiny array
|
281
|
+
container.each_with_index do |data,i|
|
282
|
+
j = i - 1
|
283
|
+
while j >= 0
|
284
|
+
break if container[j] <= data
|
285
|
+
container[j + 1] = container[j]
|
286
|
+
j = j - 1
|
287
|
+
end
|
288
|
+
container[j + 1] = data
|
289
|
+
end
|
290
|
+
else # full dual-pivot quicksort
|
291
|
+
third = length / div
|
292
|
+
# medians
|
293
|
+
m1 = left + third
|
294
|
+
m2 = right - third
|
295
|
+
if m1 <= left
|
296
|
+
m1 = left + 1
|
297
|
+
end
|
298
|
+
if m2 >= right
|
299
|
+
m2 = right - 1
|
300
|
+
end
|
301
|
+
if container[m1] < container[m2]
|
302
|
+
dualpivot_swap(container, m1, left)
|
303
|
+
dualpivot_swap(container, m2, right)
|
304
|
+
else
|
305
|
+
dualpivot_swap(container, m1, right)
|
306
|
+
dualpivot_swap(container, m2, left)
|
307
|
+
end
|
308
|
+
# pivots
|
309
|
+
pivot1 = container[left]
|
310
|
+
pivot2 = container[right]
|
311
|
+
# pointers
|
312
|
+
less = left + 1
|
313
|
+
great = right -1
|
314
|
+
# sorting
|
315
|
+
k = less
|
316
|
+
while k <= great
|
317
|
+
if container[k] < pivot1
|
318
|
+
dualpivot_swap(container, k, less += 1)
|
319
|
+
elsif container[k] > pivot2
|
320
|
+
while k < great && container[great] > pivot2
|
321
|
+
great -= 1
|
322
|
+
end
|
323
|
+
dualpivot_swap(container, k, great -= 1)
|
324
|
+
if container[k] < pivot1
|
325
|
+
dualpivot_swap(container, k, less += 1)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
k += 1
|
329
|
+
end
|
330
|
+
# swaps
|
331
|
+
dist = great - less
|
332
|
+
if dist < 13
|
333
|
+
div += 1
|
334
|
+
end
|
335
|
+
dualpivot_swap(container, less-1, left)
|
336
|
+
dualpivot_swap(container, great+1, right)
|
337
|
+
# subarrays
|
338
|
+
dualpivot(container, left, less-2, div)
|
339
|
+
dualpivot(container, great+2, right, div)
|
340
|
+
# equal elements
|
341
|
+
if dist > length - 13 && pivot1 != pivot2
|
342
|
+
for k in less..great do
|
343
|
+
if container[k] == pivot1
|
344
|
+
dualpivot_swap(container, k, less)
|
345
|
+
less += 1
|
346
|
+
elsif container[k] == pivot2
|
347
|
+
dualpivot_swap(container, k, great)
|
348
|
+
great -= 1
|
349
|
+
if container[k] == pivot1
|
350
|
+
dualpivot_swap(container, k, less)
|
351
|
+
less += 1
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
# subarray
|
357
|
+
if pivot1 < pivot2
|
358
|
+
dualpivot(container, less, great, div)
|
359
|
+
end
|
360
|
+
container
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def self.dualpivot_swap(container, i, j)
|
365
|
+
container[i], container[j] = container[j], container[i]
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|