amorim-algorithms 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.markdown +193 -0
  3. data/Gemfile +9 -0
  4. data/Manifest +51 -0
  5. data/README.markdown +87 -0
  6. data/Rakefile +22 -0
  7. data/algorithms.gemspec +23 -0
  8. data/benchmarks/deque.rb +17 -0
  9. data/benchmarks/sorts.rb +34 -0
  10. data/benchmarks/treemaps.rb +51 -0
  11. data/ext/algorithms/string/extconf.rb +4 -0
  12. data/ext/algorithms/string/string.c +68 -0
  13. data/ext/containers/bst/bst.c +247 -0
  14. data/ext/containers/bst/extconf.rb +4 -0
  15. data/ext/containers/deque/deque.c +247 -0
  16. data/ext/containers/deque/extconf.rb +4 -0
  17. data/ext/containers/rbtree_map/extconf.rb +4 -0
  18. data/ext/containers/rbtree_map/rbtree.c +498 -0
  19. data/ext/containers/splaytree_map/extconf.rb +4 -0
  20. data/ext/containers/splaytree_map/splaytree.c +419 -0
  21. data/lib/algorithms.rb +66 -0
  22. data/lib/algorithms/search.rb +84 -0
  23. data/lib/algorithms/sort.rb +368 -0
  24. data/lib/algorithms/string.rb +9 -0
  25. data/lib/containers/deque.rb +171 -0
  26. data/lib/containers/heap.rb +499 -0
  27. data/lib/containers/kd_tree.rb +110 -0
  28. data/lib/containers/priority_queue.rb +113 -0
  29. data/lib/containers/queue.rb +68 -0
  30. data/lib/containers/rb_tree_map.rb +398 -0
  31. data/lib/containers/splay_tree_map.rb +269 -0
  32. data/lib/containers/stack.rb +67 -0
  33. data/lib/containers/suffix_array.rb +68 -0
  34. data/lib/containers/trie.rb +182 -0
  35. data/spec/bst_gc_mark_spec.rb +25 -0
  36. data/spec/bst_spec.rb +25 -0
  37. data/spec/deque_gc_mark_spec.rb +18 -0
  38. data/spec/deque_spec.rb +108 -0
  39. data/spec/heap_spec.rb +131 -0
  40. data/spec/kd_expected_out.txt +10000 -0
  41. data/spec/kd_test_in.txt +10000 -0
  42. data/spec/kd_tree_spec.rb +34 -0
  43. data/spec/map_gc_mark_spec.rb +29 -0
  44. data/spec/priority_queue_spec.rb +75 -0
  45. data/spec/queue_spec.rb +61 -0
  46. data/spec/rb_tree_map_spec.rb +123 -0
  47. data/spec/search_spec.rb +28 -0
  48. data/spec/sort_spec.rb +29 -0
  49. data/spec/splay_tree_map_spec.rb +106 -0
  50. data/spec/stack_spec.rb +60 -0
  51. data/spec/string_spec.rb +15 -0
  52. data/spec/suffix_array_spec.rb +40 -0
  53. data/spec/trie_spec.rb +59 -0
  54. metadata +108 -0
@@ -0,0 +1,84 @@
1
+ =begin rdoc
2
+ This module implements search algorithms. Documentation is provided for each algorithm.
3
+
4
+ =end
5
+ module Algorithms::Search
6
+ # Binary Search: This search finds an item in log(n) time provided that the container is already sorted.
7
+ # The method returns the item if it is found, or nil if it is not. If there are duplicates, the first one
8
+ # found is returned, and this is not guaranteed to be the smallest or largest item.
9
+ #
10
+ # Complexity: O(lg N)
11
+ #
12
+ # Algorithms::Search.binary_search([1, 2, 3], 1) #=> 1
13
+ # Algorithms::Search.binary_search([1, 2, 3], 4) #=> nil
14
+ def self.binary_search(container, item)
15
+ return nil if item.nil?
16
+ low = 0
17
+ high = container.size - 1
18
+ while low <= high
19
+ mid = (low + high) / 2
20
+ val = container[mid]
21
+ if val > item
22
+ high = mid - 1
23
+ elsif val < item
24
+ low = mid + 1
25
+ else
26
+ return val
27
+ end
28
+ end
29
+ nil
30
+ end
31
+
32
+ # Knuth-Morris-Pratt Algorithm substring search algorithm: Efficiently finds the starting position of a
33
+ # substring in a string. The algorithm calculates the best position to resume searching from if a failure
34
+ # occurs.
35
+ #
36
+ # The method returns the index of the starting position in the string where the substring is found. If there
37
+ # is no match, nil is returned.
38
+ #
39
+ # Complexity: O(n + k), where n is the length of the string and k is the length of the substring.
40
+ #
41
+ # Algorithms::Search.kmp_search("ABC ABCDAB ABCDABCDABDE", "ABCDABD") #=> 15
42
+ # Algorithms::Search.kmp_search("ABC ABCDAB ABCDABCDABDE", "ABCDEF") #=> nil
43
+ def self.kmp_search(string, substring)
44
+ return nil if string.nil? or substring.nil?
45
+
46
+ # create failure function table
47
+ pos = 2
48
+ cnd = 0
49
+ failure_table = [-1, 0]
50
+ while pos < substring.length
51
+ if substring[pos - 1] == substring[cnd]
52
+ failure_table[pos] = cnd + 1
53
+ pos += 1
54
+ cnd += 1
55
+ elsif cnd > 0
56
+ cnd = failure_table[cnd]
57
+ else
58
+ failure_table[pos] = 0
59
+ pos += 1
60
+ end
61
+ end
62
+
63
+ m = i = 0
64
+ while m + i < string.length
65
+ if substring[i] == string[m + i]
66
+ i += 1
67
+ return m if i == substring.length
68
+ else
69
+ m = m + i - failure_table[i]
70
+ i = failure_table[i] if i > 0
71
+ end
72
+ end
73
+ return nil
74
+ end
75
+
76
+ # Allows kmp_search to be called as an instance method in classes that include the Search module.
77
+ #
78
+ # class String; include Algorithms::Search; end
79
+ # "ABC ABCDAB ABCDABCDABDE".kmp_search("ABCDABD") #=> 15
80
+ def kmp_search(substring)
81
+ Algorithms::Search.kmp_search(self, substring)
82
+ end
83
+
84
+ end
@@ -0,0 +1,368 @@
1
+ require 'containers/heap' # for heapsort
2
+
3
+ =begin rdoc
4
+ This module implements sorting algorithms. Documentation is provided for each algorithm.
5
+
6
+ =end
7
+ module Algorithms::Sort
8
+ # Bubble sort: A very naive sort that keeps swapping elements until the container is sorted.
9
+ # Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
10
+ # be implemented for the container.
11
+ # Time Complexity: О(n^2)
12
+ # Space Complexity: О(n) total, O(1) auxiliary
13
+ # Stable: Yes
14
+ #
15
+ # Algorithms::Sort.bubble_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
16
+ def self.bubble_sort(container)
17
+ loop do
18
+ swapped = false
19
+ (container.size-1).times do |i|
20
+ if (container[i] <=> container[i+1]) == 1
21
+ container[i], container[i+1] = container[i+1], container[i] # Swap
22
+ swapped = true
23
+ end
24
+ end
25
+ break unless swapped
26
+ end
27
+ container
28
+ end
29
+
30
+ # Comb sort: A variation on bubble sort that dramatically improves performance.
31
+ # Source: http://yagni.com/combsort/
32
+ # Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
33
+ # be implemented for the container.
34
+ # Time Complexity: О(n^2)
35
+ # Space Complexity: О(n) total, O(1) auxiliary
36
+ # Stable: Yes
37
+ #
38
+ # Algorithms::Sort.comb_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
39
+ def self.comb_sort(container)
40
+ container
41
+ gap = container.size
42
+ loop do
43
+ gap = gap * 10/13
44
+ gap = 11 if gap == 9 || gap == 10
45
+ gap = 1 if gap < 1
46
+ swapped = false
47
+ (container.size - gap).times do |i|
48
+ if (container[i] <=> container[i + gap]) == 1
49
+ container[i], container[i+gap] = container[i+gap], container[i] # Swap
50
+ swapped = true
51
+ end
52
+ end
53
+ break if !swapped && gap == 1
54
+ end
55
+ container
56
+ end
57
+
58
+ # Selection sort: A naive sort that goes through the container and selects the smallest element,
59
+ # putting it at the beginning. Repeat until the end is reached.
60
+ # Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
61
+ # be implemented for the container.
62
+ # Time Complexity: О(n^2)
63
+ # Space Complexity: О(n) total, O(1) auxiliary
64
+ # Stable: Yes
65
+ #
66
+ # Algorithms::Sort.selection_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
67
+ def self.selection_sort(container)
68
+ 0.upto(container.size-1) do |i|
69
+ min = i
70
+ (i+1).upto(container.size-1) do |j|
71
+ min = j if (container[j] <=> container[min]) == -1
72
+ end
73
+ container[i], container[min] = container[min], container[i] # Swap
74
+ end
75
+ container
76
+ end
77
+
78
+ # Heap sort: Uses a heap (implemented by the Containers module) to sort the collection.
79
+ # Requirements: Needs to be able to compare elements with <=>
80
+ # Time Complexity: О(n^2)
81
+ # Space Complexity: О(n) total, O(1) auxiliary
82
+ # Stable: Yes
83
+ #
84
+ # Algorithms::Sort.heapsort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
85
+ def self.heapsort(container)
86
+ heap = Containers::Heap.new(container)
87
+ ary = []
88
+ ary << heap.pop until heap.empty?
89
+ ary
90
+ end
91
+
92
+ # Insertion sort: Elements are inserted sequentially into the right position.
93
+ # Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
94
+ # be implemented for the container.
95
+ # Time Complexity: О(n^2)
96
+ # Space Complexity: О(n) total, O(1) auxiliary
97
+ # Stable: Yes
98
+ #
99
+ # Algorithms::Sort.insertion_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
100
+ def self.insertion_sort(container)
101
+ return container if container.size < 2
102
+ (1..container.size-1).each do |i|
103
+ value = container[i]
104
+ j = i-1
105
+ while j >= 0 and container[j] > value do
106
+ container[j+1] = container[j]
107
+ j = j-1
108
+ end
109
+ container[j+1] = value
110
+ end
111
+ container
112
+ end
113
+
114
+ # Shell sort: Similar approach as insertion sort but slightly better.
115
+ # Requirements: Needs to be able to compare elements with <=>, and the [] []= methods should
116
+ # be implemented for the container.
117
+ # Time Complexity: О(n^2)
118
+ # Space Complexity: О(n) total, O(1) auxiliary
119
+ # Stable: Yes
120
+ #
121
+ # Algorithms::Sort.shell_sort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
122
+ def self.shell_sort(container)
123
+ increment = container.size/2
124
+ while increment > 0 do
125
+ (increment..container.size-1).each do |i|
126
+ temp = container[i]
127
+ j = i
128
+ while j >= increment && container[j - increment] > temp do
129
+ container[j] = container[j-increment]
130
+ j -= increment
131
+ end
132
+ container[j] = temp
133
+ end
134
+ increment = (increment == 2 ? 1 : (increment / 2.2).round)
135
+ end
136
+ container
137
+ end
138
+
139
+ # Quicksort: A divide-and-conquer sort that recursively partitions a container until it is sorted.
140
+ # Requirements: Container should implement #pop and include the Enumerable module.
141
+ # Time Complexity: О(n log n) average, O(n^2) worst-case
142
+ # Space Complexity: О(n) auxiliary
143
+ # Stable: No
144
+ #
145
+ # Algorithms::Sort.quicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
146
+ # def self.quicksort(container)
147
+ # return [] if container.empty?
148
+ #
149
+ # x, *xs = container
150
+ #
151
+ # quicksort(xs.select { |i| i < x }) + [x] + quicksort(xs.select { |i| i >= x })
152
+ # end
153
+
154
+ def self.partition(data, left, right)
155
+ pivot = data[front]
156
+ left += 1
157
+
158
+ while left <= right do
159
+ if data[frontUnknown] < pivot
160
+ back += 1
161
+ data[frontUnknown], data[back] = data[back], data[frontUnknown] # Swap
162
+ end
163
+
164
+ frontUnknown += 1
165
+ end
166
+
167
+ data[front], data[back] = data[back], data[front] # Swap
168
+ back
169
+ end
170
+
171
+
172
+ # def self.quicksort(container, left = 0, right = container.size - 1)
173
+ # if left < right
174
+ # middle = partition(container, left, right)
175
+ # quicksort(container, left, middle - 1)
176
+ # quicksort(container, middle + 1, right)
177
+ # end
178
+ # end
179
+
180
+ def self.quicksort(container)
181
+ bottom, top = [], []
182
+ top[0] = 0
183
+ bottom[0] = container.size
184
+ i = 0
185
+ while i >= 0 do
186
+ l = top[i]
187
+ r = bottom[i] - 1;
188
+ if l < r
189
+ pivot = container[l]
190
+ while l < r do
191
+ r -= 1 while (container[r] >= pivot && l < r)
192
+ if (l < r)
193
+ container[l] = container[r]
194
+ l += 1
195
+ end
196
+ l += 1 while (container[l] <= pivot && l < r)
197
+ if (l < r)
198
+ container[r] = container[l]
199
+ r -= 1
200
+ end
201
+ end
202
+ container[l] = pivot
203
+ top[i+1] = l + 1
204
+ bottom[i+1] = bottom[i]
205
+ bottom[i] = l
206
+ i += 1
207
+ else
208
+ i -= 1
209
+ end
210
+ end
211
+ container
212
+ end
213
+
214
+ # Mergesort: A stable divide-and-conquer sort that sorts small chunks of the container and then merges them together.
215
+ # Returns an array of the sorted elements.
216
+ # Requirements: Container should implement []
217
+ # Time Complexity: О(n log n) average and worst-case
218
+ # Space Complexity: О(n) auxiliary
219
+ # Stable: Yes
220
+ #
221
+ # Algorithms::Sort.mergesort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
222
+ def self.mergesort(container)
223
+ return container if container.size <= 1
224
+ mid = container.size / 2
225
+ left = container[0...mid]
226
+ right = container[mid...container.size]
227
+ merge(mergesort(left), mergesort(right))
228
+ end
229
+
230
+ def self.merge(left, right)
231
+ sorted = []
232
+ until left.empty? or right.empty?
233
+ left.first <= right.first ? sorted << left.shift : sorted << right.shift
234
+ end
235
+ sorted + left + right
236
+ end
237
+
238
+ # Dual-Pivot Quicksort is a variation of Quicksort by Vladimir Yaroslavskiy.
239
+ # This is an implementation of the algorithm as it was found in the original
240
+ # research paper:
241
+ #
242
+ # http://iaroslavski.narod.ru/quicksort/DualPivotQuicksort.pdf
243
+ #
244
+ # Mirror:
245
+ # http://codeblab.com/wp-content/uploads/2009/09/DualPivotQuicksort.pdf
246
+ #
247
+ # "This algorithm offers O(n log(n)) performance on many data sets that cause
248
+ # other quicksorts to degrade to quadratic performance, and is typically
249
+ # faster than traditional (one-pivot) Quicksort implementations."
250
+ # -- http://download.oracle.com/javase/7/docs/api/java/util/Arrays.html
251
+ #
252
+ # The algorithm was improved by Vladimir Yaroslavskiy, Jon Bentley, and
253
+ # Joshua Bloch, and was implemented as the default sort algorithm for
254
+ # primatives in Java 7.
255
+ #
256
+ # Implementation in the Java JDK as of November, 2011:
257
+ # http://www.docjar.com/html/api/java/util/DualPivotQuicksort.java.html
258
+ #
259
+ # It is proved that for the Dual-Pivot Quicksort the average number
260
+ # of comparisons is 2*n*ln(n), the average number of swaps is
261
+ # 0.8*n*ln(n), whereas classical Quicksort algorithm has 2*n*ln(n)
262
+ # and 1*n*ln(n) respectively. This has been fully examined mathematically
263
+ # and experimentally.
264
+ #
265
+ # Requirements: Container should implement #pop and include the Enumerable module.
266
+ # Time Complexity: О(n log n) average, О(n log n) worst-case
267
+ # Space Complexity: О(n) auxiliary
268
+ #
269
+ # Stable: No
270
+ #
271
+ # Algorithms::Sort.dualpivotquicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
272
+
273
+ def self.dualpivotquicksort(container)
274
+ return container if container.size <= 1
275
+ dualpivot(container, 0, container.size-1, 3)
276
+ end
277
+
278
+ def self.dualpivot(container, left=0, right=container.size-1, div=3)
279
+ length = right - left
280
+ if length < 27 # insertion sort for tiny array
281
+ container.each_with_index do |data,i|
282
+ j = i - 1
283
+ while j >= 0
284
+ break if container[j] <= data
285
+ container[j + 1] = container[j]
286
+ j = j - 1
287
+ end
288
+ container[j + 1] = data
289
+ end
290
+ else # full dual-pivot quicksort
291
+ third = length / div
292
+ # medians
293
+ m1 = left + third
294
+ m2 = right - third
295
+ if m1 <= left
296
+ m1 = left + 1
297
+ end
298
+ if m2 >= right
299
+ m2 = right - 1
300
+ end
301
+ if container[m1] < container[m2]
302
+ dualpivot_swap(container, m1, left)
303
+ dualpivot_swap(container, m2, right)
304
+ else
305
+ dualpivot_swap(container, m1, right)
306
+ dualpivot_swap(container, m2, left)
307
+ end
308
+ # pivots
309
+ pivot1 = container[left]
310
+ pivot2 = container[right]
311
+ # pointers
312
+ less = left + 1
313
+ great = right -1
314
+ # sorting
315
+ k = less
316
+ while k <= great
317
+ if container[k] < pivot1
318
+ dualpivot_swap(container, k, less += 1)
319
+ elsif container[k] > pivot2
320
+ while k < great && container[great] > pivot2
321
+ great -= 1
322
+ end
323
+ dualpivot_swap(container, k, great -= 1)
324
+ if container[k] < pivot1
325
+ dualpivot_swap(container, k, less += 1)
326
+ end
327
+ end
328
+ k += 1
329
+ end
330
+ # swaps
331
+ dist = great - less
332
+ if dist < 13
333
+ div += 1
334
+ end
335
+ dualpivot_swap(container, less-1, left)
336
+ dualpivot_swap(container, great+1, right)
337
+ # subarrays
338
+ dualpivot(container, left, less-2, div)
339
+ dualpivot(container, great+2, right, div)
340
+ # equal elements
341
+ if dist > length - 13 && pivot1 != pivot2
342
+ for k in less..great do
343
+ if container[k] == pivot1
344
+ dualpivot_swap(container, k, less)
345
+ less += 1
346
+ elsif container[k] == pivot2
347
+ dualpivot_swap(container, k, great)
348
+ great -= 1
349
+ if container[k] == pivot1
350
+ dualpivot_swap(container, k, less)
351
+ less += 1
352
+ end
353
+ end
354
+ end
355
+ end
356
+ # subarray
357
+ if pivot1 < pivot2
358
+ dualpivot(container, less, great, div)
359
+ end
360
+ container
361
+ end
362
+ end
363
+
364
+ def self.dualpivot_swap(container, i, j)
365
+ container[i], container[j] = container[j], container[i]
366
+ end
367
+ end
368
+