algorithms 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/{History.txt → CHANGELOG.markdown} +25 -0
- data/Gemfile +9 -0
- data/Manifest +16 -6
- data/README.markdown +40 -66
- data/Rakefile +16 -25
- data/algorithms.gemspec +14 -24
- data/benchmarks/treemaps.rb +25 -10
- data/ext/algorithms/string/extconf.rb +4 -0
- data/ext/algorithms/string/string.c +68 -0
- data/ext/containers/bst/bst.c +247 -0
- data/ext/containers/bst/extconf.rb +4 -0
- data/ext/containers/deque/deque.c +3 -3
- data/ext/containers/rbtree_map/rbtree.c +43 -18
- data/ext/containers/splaytree_map/splaytree.c +154 -105
- data/lib/algorithms.rb +5 -6
- data/lib/algorithms/sort.rb +130 -0
- data/lib/algorithms/string.rb +9 -0
- data/lib/containers/heap.rb +16 -0
- data/lib/containers/kd_tree.rb +40 -17
- data/lib/containers/trie.rb +1 -1
- data/spec/bst_gc_mark_spec.rb +25 -0
- data/spec/bst_spec.rb +25 -0
- data/spec/deque_gc_mark_spec.rb +1 -1
- data/spec/deque_spec.rb +20 -20
- data/spec/heap_spec.rb +28 -23
- data/spec/kd_expected_out.txt +10000 -0
- data/spec/kd_test_in.txt +10000 -0
- data/spec/kd_tree_spec.rb +31 -1
- data/spec/{rb_tree_map_gc_mark_spec.rb → map_gc_mark_spec.rb} +10 -6
- data/spec/priority_queue_spec.rb +20 -20
- data/spec/queue_spec.rb +10 -10
- data/spec/rb_tree_map_spec.rb +25 -25
- data/spec/search_spec.rb +9 -9
- data/spec/sort_spec.rb +6 -5
- data/spec/splay_tree_map_spec.rb +21 -17
- data/spec/stack_spec.rb +10 -10
- data/spec/string_spec.rb +15 -0
- data/spec/suffix_array_spec.rb +17 -17
- data/spec/trie_spec.rb +16 -16
- metadata +49 -62
data/lib/algorithms.rb
CHANGED
@@ -29,6 +29,7 @@
|
|
29
29
|
* Splay Trees - Containers::SplayTreeMap
|
30
30
|
* Tries - Containers::Trie
|
31
31
|
* Suffix Array - Containers::SuffixArray
|
32
|
+
* kd Tree - Containers::KDTree
|
32
33
|
|
33
34
|
* Search algorithms
|
34
35
|
- Binary Search - Algorithms::Search.binary_search
|
@@ -42,19 +43,17 @@
|
|
42
43
|
- Shell sort - Algorithms::Sort.shell_sort
|
43
44
|
- Quicksort - Algorithms::Sort.quicksort
|
44
45
|
- Mergesort - Algorithms::Sort.mergesort
|
46
|
+
- Dual-Pivot Quicksort - Algorithms::Sort.dualpivotquicksort
|
47
|
+
* String algorithms
|
48
|
+
- Levenshtein distance - Algorithms::String.levenshtein_dist
|
45
49
|
=end
|
46
50
|
|
47
51
|
module Algorithms; end
|
48
52
|
module Containers; end
|
49
53
|
|
50
|
-
begin
|
51
|
-
require 'CBst'
|
52
|
-
Containers::Bst = Containers::CBst
|
53
|
-
rescue LoadError # C Version could not be found
|
54
|
-
end
|
55
|
-
|
56
54
|
require 'algorithms/search'
|
57
55
|
require 'algorithms/sort'
|
56
|
+
require 'algorithms/string'
|
58
57
|
require 'containers/heap'
|
59
58
|
require 'containers/stack'
|
60
59
|
require 'containers/deque'
|
data/lib/algorithms/sort.rb
CHANGED
@@ -235,4 +235,134 @@ module Algorithms::Sort
|
|
235
235
|
sorted + left + right
|
236
236
|
end
|
237
237
|
|
238
|
+
# Dual-Pivot Quicksort is a variation of Quicksort by Vladimir Yaroslavskiy.
|
239
|
+
# This is an implementation of the algorithm as it was found in the original
|
240
|
+
# research paper:
|
241
|
+
#
|
242
|
+
# http://iaroslavski.narod.ru/quicksort/DualPivotQuicksort.pdf
|
243
|
+
#
|
244
|
+
# Mirror:
|
245
|
+
# http://codeblab.com/wp-content/uploads/2009/09/DualPivotQuicksort.pdf
|
246
|
+
#
|
247
|
+
# "This algorithm offers O(n log(n)) performance on many data sets that cause
|
248
|
+
# other quicksorts to degrade to quadratic performance, and is typically
|
249
|
+
# faster than traditional (one-pivot) Quicksort implementations."
|
250
|
+
# -- http://download.oracle.com/javase/7/docs/api/java/util/Arrays.html
|
251
|
+
#
|
252
|
+
# The algorithm was improved by Vladimir Yaroslavskiy, Jon Bentley, and
|
253
|
+
# Joshua Bloch, and was implemented as the default sort algorithm for
|
254
|
+
# primatives in Java 7.
|
255
|
+
#
|
256
|
+
# Implementation in the Java JDK as of November, 2011:
|
257
|
+
# http://www.docjar.com/html/api/java/util/DualPivotQuicksort.java.html
|
258
|
+
#
|
259
|
+
# It is proved that for the Dual-Pivot Quicksort the average number
|
260
|
+
# of comparisons is 2*n*ln(n), the average number of swaps is
|
261
|
+
# 0.8*n*ln(n), whereas classical Quicksort algorithm has 2*n*ln(n)
|
262
|
+
# and 1*n*ln(n) respectively. This has been fully examined mathematically
|
263
|
+
# and experimentally.
|
264
|
+
#
|
265
|
+
# Requirements: Container should implement #pop and include the Enumerable module.
|
266
|
+
# Time Complexity: О(n log n) average, О(n log n) worst-case
|
267
|
+
# Space Complexity: О(n) auxiliary
|
268
|
+
#
|
269
|
+
# Stable: No
|
270
|
+
#
|
271
|
+
# Algorithms::Sort.dualpivotquicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
272
|
+
|
273
|
+
def self.dualpivotquicksort(container)
|
274
|
+
return container if container.size <= 1
|
275
|
+
dualpivot(container, 0, container.size-1, 3)
|
276
|
+
end
|
277
|
+
|
278
|
+
def self.dualpivot(container, left=0, right=container.size-1, div=3)
|
279
|
+
length = right - left
|
280
|
+
if length < 27 # insertion sort for tiny array
|
281
|
+
container.each_with_index do |data,i|
|
282
|
+
j = i - 1
|
283
|
+
while j >= 0
|
284
|
+
break if container[j] <= data
|
285
|
+
container[j + 1] = container[j]
|
286
|
+
j = j - 1
|
287
|
+
end
|
288
|
+
container[j + 1] = data
|
289
|
+
end
|
290
|
+
else # full dual-pivot quicksort
|
291
|
+
third = length / div
|
292
|
+
# medians
|
293
|
+
m1 = left + third
|
294
|
+
m2 = right - third
|
295
|
+
if m1 <= left
|
296
|
+
m1 = left + 1
|
297
|
+
end
|
298
|
+
if m2 >= right
|
299
|
+
m2 = right - 1
|
300
|
+
end
|
301
|
+
if container[m1] < container[m2]
|
302
|
+
dualpivot_swap(container, m1, left)
|
303
|
+
dualpivot_swap(container, m2, right)
|
304
|
+
else
|
305
|
+
dualpivot_swap(container, m1, right)
|
306
|
+
dualpivot_swap(container, m2, left)
|
307
|
+
end
|
308
|
+
# pivots
|
309
|
+
pivot1 = container[left]
|
310
|
+
pivot2 = container[right]
|
311
|
+
# pointers
|
312
|
+
less = left + 1
|
313
|
+
great = right - 1
|
314
|
+
# sorting
|
315
|
+
k = less
|
316
|
+
while k <= great
|
317
|
+
if container[k] < pivot1
|
318
|
+
dualpivot_swap(container, k, less += 1)
|
319
|
+
elsif container[k] > pivot2
|
320
|
+
while k < great && container[great] > pivot2
|
321
|
+
great -= 1
|
322
|
+
end
|
323
|
+
dualpivot_swap(container, k, great -= 1)
|
324
|
+
if container[k] < pivot1
|
325
|
+
dualpivot_swap(container, k, less += 1)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
k += 1
|
329
|
+
end
|
330
|
+
# swaps
|
331
|
+
dist = great - less
|
332
|
+
if dist < 13
|
333
|
+
div += 1
|
334
|
+
end
|
335
|
+
dualpivot_swap(container, less-1, left)
|
336
|
+
dualpivot_swap(container, great+1, right)
|
337
|
+
# subarrays
|
338
|
+
dualpivot(container, left, less-2, div)
|
339
|
+
dualpivot(container, great+2, right, div)
|
340
|
+
# equal elements
|
341
|
+
if dist > length - 13 && pivot1 != pivot2
|
342
|
+
for k in less..great do
|
343
|
+
if container[k] == pivot1
|
344
|
+
dualpivot_swap(container, k, less)
|
345
|
+
less += 1
|
346
|
+
elsif container[k] == pivot2
|
347
|
+
dualpivot_swap(container, k, great)
|
348
|
+
great -= 1
|
349
|
+
if container[k] == pivot1
|
350
|
+
dualpivot_swap(container, k, less)
|
351
|
+
less += 1
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
# subarray
|
357
|
+
if pivot1 < pivot2
|
358
|
+
dualpivot(container, less, great, div)
|
359
|
+
end
|
360
|
+
container
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def self.dualpivot_swap(container, i, j)
|
365
|
+
container[i], container[j] = container[j], container[i]
|
366
|
+
end
|
238
367
|
end
|
368
|
+
|
data/lib/containers/heap.rb
CHANGED
@@ -117,6 +117,22 @@ class Containers::Heap
|
|
117
117
|
@next && @next.value
|
118
118
|
end
|
119
119
|
|
120
|
+
# call-seq:
|
121
|
+
# next_key -> key
|
122
|
+
# next_key -> nil
|
123
|
+
#
|
124
|
+
# Returns the key associated with the next item in heap order, but does not remove the value.
|
125
|
+
#
|
126
|
+
# Complexity: O(1)
|
127
|
+
#
|
128
|
+
# minheap = MinHeap.new
|
129
|
+
# minheap.push(1, :a)
|
130
|
+
# minheap.next_key #=> 1
|
131
|
+
#
|
132
|
+
def next_key
|
133
|
+
@next && @next.key
|
134
|
+
end
|
135
|
+
|
120
136
|
# call-seq:
|
121
137
|
# clear -> nil
|
122
138
|
#
|
data/lib/containers/kd_tree.rb
CHANGED
@@ -1,28 +1,55 @@
|
|
1
1
|
=begin rdoc
|
2
2
|
|
3
|
-
A kd-tree allows
|
4
|
-
|
3
|
+
A kd-tree is a binary tree that allows one to store points (of any space dimension: 2D, 3D, etc).
|
4
|
+
The structure of the resulting tree makes it so that large portions of the tree are pruned
|
5
|
+
during queries.
|
6
|
+
|
7
|
+
One very good use of the tree is to allow nearest neighbor searching. Let's say you have a number
|
8
|
+
of points in 2D space, and you want to find the nearest 2 points from a specific point:
|
9
|
+
|
10
|
+
First, put the points into the tree:
|
11
|
+
|
12
|
+
kdtree = Containers::KDTree.new( {0 => [4, 3], 1 => [3, 4], 2 => [-1, 2], 3 => [6, 4],
|
13
|
+
4 => [3, -5], 5 => [-2, -5] })
|
14
|
+
|
15
|
+
Then, query on the tree:
|
16
|
+
|
17
|
+
puts kd.find_nearest([0, 0], 2) => [[5, 2], [9, 1]]
|
18
|
+
|
19
|
+
The result is an array of [distance, id] pairs. There seems to be a bug in this version.
|
20
|
+
|
21
|
+
Note that the point queried on does not have to exist in the tree. However, if it does exist,
|
22
|
+
it will be returned.
|
5
23
|
|
6
24
|
=end
|
7
25
|
|
8
26
|
class Containers::KDTree
|
9
27
|
Node = Struct.new(:id, :coords, :left, :right)
|
10
28
|
|
29
|
+
# Points is a hash of id => [coord, coord] pairs.
|
11
30
|
def initialize(points)
|
12
|
-
|
31
|
+
raise "must pass in a hash" unless points.kind_of?(Hash)
|
32
|
+
@dimensions = points[ points.keys.first ].size
|
33
|
+
@root = build_tree(points.to_a)
|
34
|
+
@nearest = []
|
35
|
+
end
|
36
|
+
|
37
|
+
# Find k closest points to given coordinates
|
38
|
+
def find_nearest(target, k_nearest)
|
13
39
|
@nearest = []
|
40
|
+
nearest(@root, target, k_nearest, 0)
|
14
41
|
end
|
15
42
|
|
16
|
-
#
|
43
|
+
# points is an array
|
17
44
|
def build_tree(points, depth=0)
|
18
45
|
return if points.empty?
|
19
|
-
|
20
|
-
axis = depth %
|
21
|
-
|
22
|
-
points.sort! { |a, b| a[
|
46
|
+
|
47
|
+
axis = depth % @dimensions
|
48
|
+
|
49
|
+
points.sort! { |a, b| a.last[axis] <=> b.last[axis] }
|
23
50
|
median = points.size / 2
|
24
|
-
|
25
|
-
node = Node.new(points[median]
|
51
|
+
|
52
|
+
node = Node.new(points[median].first, points[median].last, nil, nil)
|
26
53
|
node.left = build_tree(points[0...median], depth+1)
|
27
54
|
node.right = build_tree(points[median+1..-1], depth+1)
|
28
55
|
node
|
@@ -48,15 +75,11 @@ class Containers::KDTree
|
|
48
75
|
end
|
49
76
|
nearest
|
50
77
|
end
|
78
|
+
private :check_nearest
|
51
79
|
|
52
|
-
#
|
53
|
-
def find_nearest(target, k_nearest)
|
54
|
-
@nearest = []
|
55
|
-
nearest(@root, target, k_nearest, 0)
|
56
|
-
end
|
57
|
-
|
80
|
+
# Recursively find nearest coordinates, going down the appropriate branch as needed
|
58
81
|
def nearest(node, target, k_nearest, depth)
|
59
|
-
axis = depth %
|
82
|
+
axis = depth % @dimensions
|
60
83
|
|
61
84
|
if node.left.nil? && node.right.nil? # Leaf node
|
62
85
|
@nearest = check_nearest(@nearest, node, target, k_nearest)
|
data/lib/containers/trie.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
# $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
|
2
|
+
# require 'algorithms'
|
3
|
+
#
|
4
|
+
# if defined? Containers::CBst
|
5
|
+
# describe "CBst" do
|
6
|
+
# it "should mark ruby object references" do
|
7
|
+
# anon_key_class = Class.new do
|
8
|
+
# attr :value
|
9
|
+
# def initialize(x); @value = x; end
|
10
|
+
# def <=>(other); value <=> other.value; end
|
11
|
+
# end
|
12
|
+
# anon_val_class = Class.new
|
13
|
+
# @bst = Containers::CBst.new
|
14
|
+
# 100.times { |x| @bst.push(anon_key_class.new(x), anon_val_class.new) }
|
15
|
+
# # Mark and sweep
|
16
|
+
# ObjectSpace.garbage_collect
|
17
|
+
# # Check if any instances were swept
|
18
|
+
# count = 0
|
19
|
+
# ObjectSpace.each_object(anon_key_class) { |x| count += 1 }
|
20
|
+
# count.should eql(100)
|
21
|
+
# ObjectSpace.each_object(anon_val_class) { |x| count += 1 }
|
22
|
+
# count.should eql(200)
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
# end
|
data/spec/bst_spec.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
|
2
|
+
# require "algorithms"
|
3
|
+
#
|
4
|
+
# begin
|
5
|
+
# Containers::CBst
|
6
|
+
# describe "binary search tree" do
|
7
|
+
# it "should let user push new elements with key" do
|
8
|
+
# @bst = Containers::CBst.new
|
9
|
+
# 100.times { |x| @bst.push(x, "hello : #{x}") }
|
10
|
+
# @bst.size.should eql(100)
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# it "should allow users to delete elements" do
|
14
|
+
# @bst = Containers::CBst.new
|
15
|
+
# @bst.push(10, "hello world")
|
16
|
+
# @bst.push(11, "hello world")
|
17
|
+
# @bst.delete(11)
|
18
|
+
# @bst.size.should eql(1)
|
19
|
+
# @bst.delete(10)
|
20
|
+
# @bst.size.should eql(0)
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
# rescue Exception
|
25
|
+
# end
|
data/spec/deque_gc_mark_spec.rb
CHANGED
data/spec/deque_spec.rb
CHANGED
@@ -1,77 +1,77 @@
|
|
1
1
|
$: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
|
2
2
|
require 'algorithms'
|
3
3
|
|
4
|
-
|
4
|
+
shared_examples "(empty deque)" do
|
5
5
|
it "should return nil when popping objects" do
|
6
|
-
@deque.pop_front.
|
7
|
-
@deque.pop_back.
|
6
|
+
expect(@deque.pop_front).to be_nil
|
7
|
+
expect(@deque.pop_back).to be_nil
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should return a size of 1 when sent #push_front" do
|
11
11
|
@deque.push_front(1)
|
12
|
-
@deque.size.
|
12
|
+
expect(@deque.size).to eql(1)
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should return a size of 1 when sent #push_back" do
|
16
16
|
@deque.push_back(1)
|
17
|
-
@deque.size.
|
17
|
+
expect(@deque.size).to eql(1)
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should return nil when sent #front and #back" do
|
21
|
-
@deque.front.
|
22
|
-
@deque.back.
|
21
|
+
expect(@deque.front).to be_nil
|
22
|
+
expect(@deque.back).to be_nil
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should be empty" do
|
26
|
-
@deque.
|
26
|
+
expect(@deque).to be_empty
|
27
27
|
end
|
28
28
|
|
29
29
|
it "should raise ArgumentError if passed more than one argument" do
|
30
|
-
|
30
|
+
expect { @deque.class.send("new", Time.now, []) }.to raise_error(ArgumentError)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
|
34
|
+
shared_examples "(non-empty deque)" do
|
35
35
|
before(:each) do
|
36
36
|
@deque.push_back(10)
|
37
37
|
@deque.push_back("10")
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should return last pushed object with pop_back" do
|
41
|
-
@deque.pop_back.
|
42
|
-
@deque.pop_back.
|
41
|
+
expect(@deque.pop_back).to eql("10")
|
42
|
+
expect(@deque.pop_back).to eql(10)
|
43
43
|
end
|
44
44
|
|
45
45
|
it "should return first pushed object with pop_front" do
|
46
|
-
@deque.pop_front.
|
47
|
-
@deque.pop_front.
|
46
|
+
expect(@deque.pop_front).to eql(10)
|
47
|
+
expect(@deque.pop_front).to eql("10")
|
48
48
|
end
|
49
49
|
|
50
50
|
it "should return a size greater than 0" do
|
51
|
-
@deque.size.
|
51
|
+
expect(@deque.size).to eql(2)
|
52
52
|
end
|
53
53
|
|
54
54
|
it "should not be empty" do
|
55
|
-
@deque.
|
55
|
+
expect(@deque).not_to be_empty
|
56
56
|
end
|
57
57
|
|
58
58
|
it "should iterate in LIFO order with #each_backward" do
|
59
59
|
arr = []
|
60
60
|
@deque.each_backward { |obj| arr << obj }
|
61
|
-
arr.
|
61
|
+
expect(arr).to eql(["10", 10])
|
62
62
|
end
|
63
63
|
|
64
64
|
it "should iterate in FIFO order with #each_forward" do
|
65
65
|
arr = []
|
66
66
|
@deque.each_forward { |obj| arr << obj }
|
67
|
-
arr.
|
67
|
+
expect(arr).to eql([10, "10"])
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should return nil after everything's popped" do
|
71
71
|
@deque.pop_back
|
72
72
|
@deque.pop_back
|
73
|
-
@deque.pop_back.
|
74
|
-
@deque.front.
|
73
|
+
expect(@deque.pop_back).to be_nil
|
74
|
+
expect(@deque.front).to be_nil
|
75
75
|
end
|
76
76
|
end
|
77
77
|
|