algorithms 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/{History.txt → CHANGELOG.markdown} +25 -0
- data/Gemfile +9 -0
- data/Manifest +16 -6
- data/README.markdown +40 -66
- data/Rakefile +16 -25
- data/algorithms.gemspec +14 -24
- data/benchmarks/treemaps.rb +25 -10
- data/ext/algorithms/string/extconf.rb +4 -0
- data/ext/algorithms/string/string.c +68 -0
- data/ext/containers/bst/bst.c +247 -0
- data/ext/containers/bst/extconf.rb +4 -0
- data/ext/containers/deque/deque.c +3 -3
- data/ext/containers/rbtree_map/rbtree.c +43 -18
- data/ext/containers/splaytree_map/splaytree.c +154 -105
- data/lib/algorithms.rb +5 -6
- data/lib/algorithms/sort.rb +130 -0
- data/lib/algorithms/string.rb +9 -0
- data/lib/containers/heap.rb +16 -0
- data/lib/containers/kd_tree.rb +40 -17
- data/lib/containers/trie.rb +1 -1
- data/spec/bst_gc_mark_spec.rb +25 -0
- data/spec/bst_spec.rb +25 -0
- data/spec/deque_gc_mark_spec.rb +1 -1
- data/spec/deque_spec.rb +20 -20
- data/spec/heap_spec.rb +28 -23
- data/spec/kd_expected_out.txt +10000 -0
- data/spec/kd_test_in.txt +10000 -0
- data/spec/kd_tree_spec.rb +31 -1
- data/spec/{rb_tree_map_gc_mark_spec.rb → map_gc_mark_spec.rb} +10 -6
- data/spec/priority_queue_spec.rb +20 -20
- data/spec/queue_spec.rb +10 -10
- data/spec/rb_tree_map_spec.rb +25 -25
- data/spec/search_spec.rb +9 -9
- data/spec/sort_spec.rb +6 -5
- data/spec/splay_tree_map_spec.rb +21 -17
- data/spec/stack_spec.rb +10 -10
- data/spec/string_spec.rb +15 -0
- data/spec/suffix_array_spec.rb +17 -17
- data/spec/trie_spec.rb +16 -16
- metadata +49 -62
data/lib/algorithms.rb
CHANGED
@@ -29,6 +29,7 @@
|
|
29
29
|
* Splay Trees - Containers::SplayTreeMap
|
30
30
|
* Tries - Containers::Trie
|
31
31
|
* Suffix Array - Containers::SuffixArray
|
32
|
+
* kd Tree - Containers::KDTree
|
32
33
|
|
33
34
|
* Search algorithms
|
34
35
|
- Binary Search - Algorithms::Search.binary_search
|
@@ -42,19 +43,17 @@
|
|
42
43
|
- Shell sort - Algorithms::Sort.shell_sort
|
43
44
|
- Quicksort - Algorithms::Sort.quicksort
|
44
45
|
- Mergesort - Algorithms::Sort.mergesort
|
46
|
+
- Dual-Pivot Quicksort - Algorithms::Sort.dualpivotquicksort
|
47
|
+
* String algorithms
|
48
|
+
- Levenshtein distance - Algorithms::String.levenshtein_dist
|
45
49
|
=end
|
46
50
|
|
47
51
|
module Algorithms; end
|
48
52
|
module Containers; end
|
49
53
|
|
50
|
-
begin
|
51
|
-
require 'CBst'
|
52
|
-
Containers::Bst = Containers::CBst
|
53
|
-
rescue LoadError # C Version could not be found
|
54
|
-
end
|
55
|
-
|
56
54
|
require 'algorithms/search'
|
57
55
|
require 'algorithms/sort'
|
56
|
+
require 'algorithms/string'
|
58
57
|
require 'containers/heap'
|
59
58
|
require 'containers/stack'
|
60
59
|
require 'containers/deque'
|
data/lib/algorithms/sort.rb
CHANGED
@@ -235,4 +235,134 @@ module Algorithms::Sort
|
|
235
235
|
sorted + left + right
|
236
236
|
end
|
237
237
|
|
238
|
+
# Dual-Pivot Quicksort is a variation of Quicksort by Vladimir Yaroslavskiy.
|
239
|
+
# This is an implementation of the algorithm as it was found in the original
|
240
|
+
# research paper:
|
241
|
+
#
|
242
|
+
# http://iaroslavski.narod.ru/quicksort/DualPivotQuicksort.pdf
|
243
|
+
#
|
244
|
+
# Mirror:
|
245
|
+
# http://codeblab.com/wp-content/uploads/2009/09/DualPivotQuicksort.pdf
|
246
|
+
#
|
247
|
+
# "This algorithm offers O(n log(n)) performance on many data sets that cause
|
248
|
+
# other quicksorts to degrade to quadratic performance, and is typically
|
249
|
+
# faster than traditional (one-pivot) Quicksort implementations."
|
250
|
+
# -- http://download.oracle.com/javase/7/docs/api/java/util/Arrays.html
|
251
|
+
#
|
252
|
+
# The algorithm was improved by Vladimir Yaroslavskiy, Jon Bentley, and
|
253
|
+
# Joshua Bloch, and was implemented as the default sort algorithm for
|
254
|
+
# primatives in Java 7.
|
255
|
+
#
|
256
|
+
# Implementation in the Java JDK as of November, 2011:
|
257
|
+
# http://www.docjar.com/html/api/java/util/DualPivotQuicksort.java.html
|
258
|
+
#
|
259
|
+
# It is proved that for the Dual-Pivot Quicksort the average number
|
260
|
+
# of comparisons is 2*n*ln(n), the average number of swaps is
|
261
|
+
# 0.8*n*ln(n), whereas classical Quicksort algorithm has 2*n*ln(n)
|
262
|
+
# and 1*n*ln(n) respectively. This has been fully examined mathematically
|
263
|
+
# and experimentally.
|
264
|
+
#
|
265
|
+
# Requirements: Container should implement #pop and include the Enumerable module.
|
266
|
+
# Time Complexity: О(n log n) average, О(n log n) worst-case
|
267
|
+
# Space Complexity: О(n) auxiliary
|
268
|
+
#
|
269
|
+
# Stable: No
|
270
|
+
#
|
271
|
+
# Algorithms::Sort.dualpivotquicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
|
272
|
+
|
273
|
+
def self.dualpivotquicksort(container)
|
274
|
+
return container if container.size <= 1
|
275
|
+
dualpivot(container, 0, container.size-1, 3)
|
276
|
+
end
|
277
|
+
|
278
|
+
def self.dualpivot(container, left=0, right=container.size-1, div=3)
|
279
|
+
length = right - left
|
280
|
+
if length < 27 # insertion sort for tiny array
|
281
|
+
container.each_with_index do |data,i|
|
282
|
+
j = i - 1
|
283
|
+
while j >= 0
|
284
|
+
break if container[j] <= data
|
285
|
+
container[j + 1] = container[j]
|
286
|
+
j = j - 1
|
287
|
+
end
|
288
|
+
container[j + 1] = data
|
289
|
+
end
|
290
|
+
else # full dual-pivot quicksort
|
291
|
+
third = length / div
|
292
|
+
# medians
|
293
|
+
m1 = left + third
|
294
|
+
m2 = right - third
|
295
|
+
if m1 <= left
|
296
|
+
m1 = left + 1
|
297
|
+
end
|
298
|
+
if m2 >= right
|
299
|
+
m2 = right - 1
|
300
|
+
end
|
301
|
+
if container[m1] < container[m2]
|
302
|
+
dualpivot_swap(container, m1, left)
|
303
|
+
dualpivot_swap(container, m2, right)
|
304
|
+
else
|
305
|
+
dualpivot_swap(container, m1, right)
|
306
|
+
dualpivot_swap(container, m2, left)
|
307
|
+
end
|
308
|
+
# pivots
|
309
|
+
pivot1 = container[left]
|
310
|
+
pivot2 = container[right]
|
311
|
+
# pointers
|
312
|
+
less = left + 1
|
313
|
+
great = right - 1
|
314
|
+
# sorting
|
315
|
+
k = less
|
316
|
+
while k <= great
|
317
|
+
if container[k] < pivot1
|
318
|
+
dualpivot_swap(container, k, less += 1)
|
319
|
+
elsif container[k] > pivot2
|
320
|
+
while k < great && container[great] > pivot2
|
321
|
+
great -= 1
|
322
|
+
end
|
323
|
+
dualpivot_swap(container, k, great -= 1)
|
324
|
+
if container[k] < pivot1
|
325
|
+
dualpivot_swap(container, k, less += 1)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
k += 1
|
329
|
+
end
|
330
|
+
# swaps
|
331
|
+
dist = great - less
|
332
|
+
if dist < 13
|
333
|
+
div += 1
|
334
|
+
end
|
335
|
+
dualpivot_swap(container, less-1, left)
|
336
|
+
dualpivot_swap(container, great+1, right)
|
337
|
+
# subarrays
|
338
|
+
dualpivot(container, left, less-2, div)
|
339
|
+
dualpivot(container, great+2, right, div)
|
340
|
+
# equal elements
|
341
|
+
if dist > length - 13 && pivot1 != pivot2
|
342
|
+
for k in less..great do
|
343
|
+
if container[k] == pivot1
|
344
|
+
dualpivot_swap(container, k, less)
|
345
|
+
less += 1
|
346
|
+
elsif container[k] == pivot2
|
347
|
+
dualpivot_swap(container, k, great)
|
348
|
+
great -= 1
|
349
|
+
if container[k] == pivot1
|
350
|
+
dualpivot_swap(container, k, less)
|
351
|
+
less += 1
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
# subarray
|
357
|
+
if pivot1 < pivot2
|
358
|
+
dualpivot(container, less, great, div)
|
359
|
+
end
|
360
|
+
container
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def self.dualpivot_swap(container, i, j)
|
365
|
+
container[i], container[j] = container[j], container[i]
|
366
|
+
end
|
238
367
|
end
|
368
|
+
|
data/lib/containers/heap.rb
CHANGED
@@ -117,6 +117,22 @@ class Containers::Heap
|
|
117
117
|
@next && @next.value
|
118
118
|
end
|
119
119
|
|
120
|
+
# call-seq:
|
121
|
+
# next_key -> key
|
122
|
+
# next_key -> nil
|
123
|
+
#
|
124
|
+
# Returns the key associated with the next item in heap order, but does not remove the value.
|
125
|
+
#
|
126
|
+
# Complexity: O(1)
|
127
|
+
#
|
128
|
+
# minheap = MinHeap.new
|
129
|
+
# minheap.push(1, :a)
|
130
|
+
# minheap.next_key #=> 1
|
131
|
+
#
|
132
|
+
def next_key
|
133
|
+
@next && @next.key
|
134
|
+
end
|
135
|
+
|
120
136
|
# call-seq:
|
121
137
|
# clear -> nil
|
122
138
|
#
|
data/lib/containers/kd_tree.rb
CHANGED
@@ -1,28 +1,55 @@
|
|
1
1
|
=begin rdoc
|
2
2
|
|
3
|
-
A kd-tree allows
|
4
|
-
|
3
|
+
A kd-tree is a binary tree that allows one to store points (of any space dimension: 2D, 3D, etc).
|
4
|
+
The structure of the resulting tree makes it so that large portions of the tree are pruned
|
5
|
+
during queries.
|
6
|
+
|
7
|
+
One very good use of the tree is to allow nearest neighbor searching. Let's say you have a number
|
8
|
+
of points in 2D space, and you want to find the nearest 2 points from a specific point:
|
9
|
+
|
10
|
+
First, put the points into the tree:
|
11
|
+
|
12
|
+
kdtree = Containers::KDTree.new( {0 => [4, 3], 1 => [3, 4], 2 => [-1, 2], 3 => [6, 4],
|
13
|
+
4 => [3, -5], 5 => [-2, -5] })
|
14
|
+
|
15
|
+
Then, query on the tree:
|
16
|
+
|
17
|
+
puts kd.find_nearest([0, 0], 2) => [[5, 2], [9, 1]]
|
18
|
+
|
19
|
+
The result is an array of [distance, id] pairs. There seems to be a bug in this version.
|
20
|
+
|
21
|
+
Note that the point queried on does not have to exist in the tree. However, if it does exist,
|
22
|
+
it will be returned.
|
5
23
|
|
6
24
|
=end
|
7
25
|
|
8
26
|
class Containers::KDTree
|
9
27
|
Node = Struct.new(:id, :coords, :left, :right)
|
10
28
|
|
29
|
+
# Points is a hash of id => [coord, coord] pairs.
|
11
30
|
def initialize(points)
|
12
|
-
|
31
|
+
raise "must pass in a hash" unless points.kind_of?(Hash)
|
32
|
+
@dimensions = points[ points.keys.first ].size
|
33
|
+
@root = build_tree(points.to_a)
|
34
|
+
@nearest = []
|
35
|
+
end
|
36
|
+
|
37
|
+
# Find k closest points to given coordinates
|
38
|
+
def find_nearest(target, k_nearest)
|
13
39
|
@nearest = []
|
40
|
+
nearest(@root, target, k_nearest, 0)
|
14
41
|
end
|
15
42
|
|
16
|
-
#
|
43
|
+
# points is an array
|
17
44
|
def build_tree(points, depth=0)
|
18
45
|
return if points.empty?
|
19
|
-
|
20
|
-
axis = depth %
|
21
|
-
|
22
|
-
points.sort! { |a, b| a[
|
46
|
+
|
47
|
+
axis = depth % @dimensions
|
48
|
+
|
49
|
+
points.sort! { |a, b| a.last[axis] <=> b.last[axis] }
|
23
50
|
median = points.size / 2
|
24
|
-
|
25
|
-
node = Node.new(points[median]
|
51
|
+
|
52
|
+
node = Node.new(points[median].first, points[median].last, nil, nil)
|
26
53
|
node.left = build_tree(points[0...median], depth+1)
|
27
54
|
node.right = build_tree(points[median+1..-1], depth+1)
|
28
55
|
node
|
@@ -48,15 +75,11 @@ class Containers::KDTree
|
|
48
75
|
end
|
49
76
|
nearest
|
50
77
|
end
|
78
|
+
private :check_nearest
|
51
79
|
|
52
|
-
#
|
53
|
-
def find_nearest(target, k_nearest)
|
54
|
-
@nearest = []
|
55
|
-
nearest(@root, target, k_nearest, 0)
|
56
|
-
end
|
57
|
-
|
80
|
+
# Recursively find nearest coordinates, going down the appropriate branch as needed
|
58
81
|
def nearest(node, target, k_nearest, depth)
|
59
|
-
axis = depth %
|
82
|
+
axis = depth % @dimensions
|
60
83
|
|
61
84
|
if node.left.nil? && node.right.nil? # Leaf node
|
62
85
|
@nearest = check_nearest(@nearest, node, target, k_nearest)
|
data/lib/containers/trie.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
# $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
|
2
|
+
# require 'algorithms'
|
3
|
+
#
|
4
|
+
# if defined? Containers::CBst
|
5
|
+
# describe "CBst" do
|
6
|
+
# it "should mark ruby object references" do
|
7
|
+
# anon_key_class = Class.new do
|
8
|
+
# attr :value
|
9
|
+
# def initialize(x); @value = x; end
|
10
|
+
# def <=>(other); value <=> other.value; end
|
11
|
+
# end
|
12
|
+
# anon_val_class = Class.new
|
13
|
+
# @bst = Containers::CBst.new
|
14
|
+
# 100.times { |x| @bst.push(anon_key_class.new(x), anon_val_class.new) }
|
15
|
+
# # Mark and sweep
|
16
|
+
# ObjectSpace.garbage_collect
|
17
|
+
# # Check if any instances were swept
|
18
|
+
# count = 0
|
19
|
+
# ObjectSpace.each_object(anon_key_class) { |x| count += 1 }
|
20
|
+
# count.should eql(100)
|
21
|
+
# ObjectSpace.each_object(anon_val_class) { |x| count += 1 }
|
22
|
+
# count.should eql(200)
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
# end
|
data/spec/bst_spec.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
|
2
|
+
# require "algorithms"
|
3
|
+
#
|
4
|
+
# begin
|
5
|
+
# Containers::CBst
|
6
|
+
# describe "binary search tree" do
|
7
|
+
# it "should let user push new elements with key" do
|
8
|
+
# @bst = Containers::CBst.new
|
9
|
+
# 100.times { |x| @bst.push(x, "hello : #{x}") }
|
10
|
+
# @bst.size.should eql(100)
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# it "should allow users to delete elements" do
|
14
|
+
# @bst = Containers::CBst.new
|
15
|
+
# @bst.push(10, "hello world")
|
16
|
+
# @bst.push(11, "hello world")
|
17
|
+
# @bst.delete(11)
|
18
|
+
# @bst.size.should eql(1)
|
19
|
+
# @bst.delete(10)
|
20
|
+
# @bst.size.should eql(0)
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
# rescue Exception
|
25
|
+
# end
|
data/spec/deque_gc_mark_spec.rb
CHANGED
data/spec/deque_spec.rb
CHANGED
@@ -1,77 +1,77 @@
|
|
1
1
|
$: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
|
2
2
|
require 'algorithms'
|
3
3
|
|
4
|
-
|
4
|
+
shared_examples "(empty deque)" do
|
5
5
|
it "should return nil when popping objects" do
|
6
|
-
@deque.pop_front.
|
7
|
-
@deque.pop_back.
|
6
|
+
expect(@deque.pop_front).to be_nil
|
7
|
+
expect(@deque.pop_back).to be_nil
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should return a size of 1 when sent #push_front" do
|
11
11
|
@deque.push_front(1)
|
12
|
-
@deque.size.
|
12
|
+
expect(@deque.size).to eql(1)
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should return a size of 1 when sent #push_back" do
|
16
16
|
@deque.push_back(1)
|
17
|
-
@deque.size.
|
17
|
+
expect(@deque.size).to eql(1)
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should return nil when sent #front and #back" do
|
21
|
-
@deque.front.
|
22
|
-
@deque.back.
|
21
|
+
expect(@deque.front).to be_nil
|
22
|
+
expect(@deque.back).to be_nil
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should be empty" do
|
26
|
-
@deque.
|
26
|
+
expect(@deque).to be_empty
|
27
27
|
end
|
28
28
|
|
29
29
|
it "should raise ArgumentError if passed more than one argument" do
|
30
|
-
|
30
|
+
expect { @deque.class.send("new", Time.now, []) }.to raise_error(ArgumentError)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
|
34
|
+
shared_examples "(non-empty deque)" do
|
35
35
|
before(:each) do
|
36
36
|
@deque.push_back(10)
|
37
37
|
@deque.push_back("10")
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should return last pushed object with pop_back" do
|
41
|
-
@deque.pop_back.
|
42
|
-
@deque.pop_back.
|
41
|
+
expect(@deque.pop_back).to eql("10")
|
42
|
+
expect(@deque.pop_back).to eql(10)
|
43
43
|
end
|
44
44
|
|
45
45
|
it "should return first pushed object with pop_front" do
|
46
|
-
@deque.pop_front.
|
47
|
-
@deque.pop_front.
|
46
|
+
expect(@deque.pop_front).to eql(10)
|
47
|
+
expect(@deque.pop_front).to eql("10")
|
48
48
|
end
|
49
49
|
|
50
50
|
it "should return a size greater than 0" do
|
51
|
-
@deque.size.
|
51
|
+
expect(@deque.size).to eql(2)
|
52
52
|
end
|
53
53
|
|
54
54
|
it "should not be empty" do
|
55
|
-
@deque.
|
55
|
+
expect(@deque).not_to be_empty
|
56
56
|
end
|
57
57
|
|
58
58
|
it "should iterate in LIFO order with #each_backward" do
|
59
59
|
arr = []
|
60
60
|
@deque.each_backward { |obj| arr << obj }
|
61
|
-
arr.
|
61
|
+
expect(arr).to eql(["10", 10])
|
62
62
|
end
|
63
63
|
|
64
64
|
it "should iterate in FIFO order with #each_forward" do
|
65
65
|
arr = []
|
66
66
|
@deque.each_forward { |obj| arr << obj }
|
67
|
-
arr.
|
67
|
+
expect(arr).to eql([10, "10"])
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should return nil after everything's popped" do
|
71
71
|
@deque.pop_back
|
72
72
|
@deque.pop_back
|
73
|
-
@deque.pop_back.
|
74
|
-
@deque.front.
|
73
|
+
expect(@deque.pop_back).to be_nil
|
74
|
+
expect(@deque.front).to be_nil
|
75
75
|
end
|
76
76
|
end
|
77
77
|
|