algorithms 0.2.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/algorithms.rb CHANGED
@@ -29,6 +29,7 @@
29
29
  * Splay Trees - Containers::SplayTreeMap
30
30
  * Tries - Containers::Trie
31
31
  * Suffix Array - Containers::SuffixArray
32
+ * kd Tree - Containers::KDTree
32
33
 
33
34
  * Search algorithms
34
35
  - Binary Search - Algorithms::Search.binary_search
@@ -42,19 +43,17 @@
42
43
  - Shell sort - Algorithms::Sort.shell_sort
43
44
  - Quicksort - Algorithms::Sort.quicksort
44
45
  - Mergesort - Algorithms::Sort.mergesort
46
+ - Dual-Pivot Quicksort - Algorithms::Sort.dualpivotquicksort
47
+ * String algorithms
48
+ - Levenshtein distance - Algorithms::String.levenshtein_dist
45
49
  =end
46
50
 
47
51
  module Algorithms; end
48
52
  module Containers; end
49
53
 
50
- begin
51
- require 'CBst'
52
- Containers::Bst = Containers::CBst
53
- rescue LoadError # C Version could not be found
54
- end
55
-
56
54
  require 'algorithms/search'
57
55
  require 'algorithms/sort'
56
+ require 'algorithms/string'
58
57
  require 'containers/heap'
59
58
  require 'containers/stack'
60
59
  require 'containers/deque'
@@ -235,4 +235,134 @@ module Algorithms::Sort
235
235
  sorted + left + right
236
236
  end
237
237
 
238
+ # Dual-Pivot Quicksort is a variation of Quicksort by Vladimir Yaroslavskiy.
239
+ # This is an implementation of the algorithm as it was found in the original
240
+ # research paper:
241
+ #
242
+ # http://iaroslavski.narod.ru/quicksort/DualPivotQuicksort.pdf
243
+ #
244
+ # Mirror:
245
+ # http://codeblab.com/wp-content/uploads/2009/09/DualPivotQuicksort.pdf
246
+ #
247
+ # "This algorithm offers O(n log(n)) performance on many data sets that cause
248
+ # other quicksorts to degrade to quadratic performance, and is typically
249
+ # faster than traditional (one-pivot) Quicksort implementations."
250
+ # -- http://download.oracle.com/javase/7/docs/api/java/util/Arrays.html
251
+ #
252
+ # The algorithm was improved by Vladimir Yaroslavskiy, Jon Bentley, and
253
+ # Joshua Bloch, and was implemented as the default sort algorithm for
254
+ # primatives in Java 7.
255
+ #
256
+ # Implementation in the Java JDK as of November, 2011:
257
+ # http://www.docjar.com/html/api/java/util/DualPivotQuicksort.java.html
258
+ #
259
+ # It is proved that for the Dual-Pivot Quicksort the average number
260
+ # of comparisons is 2*n*ln(n), the average number of swaps is
261
+ # 0.8*n*ln(n), whereas classical Quicksort algorithm has 2*n*ln(n)
262
+ # and 1*n*ln(n) respectively. This has been fully examined mathematically
263
+ # and experimentally.
264
+ #
265
+ # Requirements: Container should implement #pop and include the Enumerable module.
266
+ # Time Complexity: О(n log n) average, О(n log n) worst-case
267
+ # Space Complexity: О(n) auxiliary
268
+ #
269
+ # Stable: No
270
+ #
271
+ # Algorithms::Sort.dualpivotquicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
272
+
273
+ def self.dualpivotquicksort(container)
274
+ return container if container.size <= 1
275
+ dualpivot(container, 0, container.size-1, 3)
276
+ end
277
+
278
+ def self.dualpivot(container, left=0, right=container.size-1, div=3)
279
+ length = right - left
280
+ if length < 27 # insertion sort for tiny array
281
+ container.each_with_index do |data,i|
282
+ j = i - 1
283
+ while j >= 0
284
+ break if container[j] <= data
285
+ container[j + 1] = container[j]
286
+ j = j - 1
287
+ end
288
+ container[j + 1] = data
289
+ end
290
+ else # full dual-pivot quicksort
291
+ third = length / div
292
+ # medians
293
+ m1 = left + third
294
+ m2 = right - third
295
+ if m1 <= left
296
+ m1 = left + 1
297
+ end
298
+ if m2 >= right
299
+ m2 = right - 1
300
+ end
301
+ if container[m1] < container[m2]
302
+ dualpivot_swap(container, m1, left)
303
+ dualpivot_swap(container, m2, right)
304
+ else
305
+ dualpivot_swap(container, m1, right)
306
+ dualpivot_swap(container, m2, left)
307
+ end
308
+ # pivots
309
+ pivot1 = container[left]
310
+ pivot2 = container[right]
311
+ # pointers
312
+ less = left + 1
313
+ great = right - 1
314
+ # sorting
315
+ k = less
316
+ while k <= great
317
+ if container[k] < pivot1
318
+ dualpivot_swap(container, k, less += 1)
319
+ elsif container[k] > pivot2
320
+ while k < great && container[great] > pivot2
321
+ great -= 1
322
+ end
323
+ dualpivot_swap(container, k, great -= 1)
324
+ if container[k] < pivot1
325
+ dualpivot_swap(container, k, less += 1)
326
+ end
327
+ end
328
+ k += 1
329
+ end
330
+ # swaps
331
+ dist = great - less
332
+ if dist < 13
333
+ div += 1
334
+ end
335
+ dualpivot_swap(container, less-1, left)
336
+ dualpivot_swap(container, great+1, right)
337
+ # subarrays
338
+ dualpivot(container, left, less-2, div)
339
+ dualpivot(container, great+2, right, div)
340
+ # equal elements
341
+ if dist > length - 13 && pivot1 != pivot2
342
+ for k in less..great do
343
+ if container[k] == pivot1
344
+ dualpivot_swap(container, k, less)
345
+ less += 1
346
+ elsif container[k] == pivot2
347
+ dualpivot_swap(container, k, great)
348
+ great -= 1
349
+ if container[k] == pivot1
350
+ dualpivot_swap(container, k, less)
351
+ less += 1
352
+ end
353
+ end
354
+ end
355
+ end
356
+ # subarray
357
+ if pivot1 < pivot2
358
+ dualpivot(container, less, great, div)
359
+ end
360
+ container
361
+ end
362
+ end
363
+
364
+ def self.dualpivot_swap(container, i, j)
365
+ container[i], container[j] = container[j], container[i]
366
+ end
238
367
  end
368
+
@@ -0,0 +1,9 @@
1
+ =begin rdoc
2
+ This module implements string algorithms. Documentation is provided for each algorithm.
3
+
4
+ =end
5
+
6
+ begin
7
+ require 'CString'
8
+ rescue LoadError
9
+ end
@@ -117,6 +117,22 @@ class Containers::Heap
117
117
  @next && @next.value
118
118
  end
119
119
 
120
+ # call-seq:
121
+ # next_key -> key
122
+ # next_key -> nil
123
+ #
124
+ # Returns the key associated with the next item in heap order, but does not remove the value.
125
+ #
126
+ # Complexity: O(1)
127
+ #
128
+ # minheap = MinHeap.new
129
+ # minheap.push(1, :a)
130
+ # minheap.next_key #=> 1
131
+ #
132
+ def next_key
133
+ @next && @next.key
134
+ end
135
+
120
136
  # call-seq:
121
137
  # clear -> nil
122
138
  #
@@ -1,28 +1,55 @@
1
1
  =begin rdoc
2
2
 
3
- A kd-tree allows searching of points in multi-dimensional space, increasing
4
- efficiency for nearest-neighbor searching in particular.
3
+ A kd-tree is a binary tree that allows one to store points (of any space dimension: 2D, 3D, etc).
4
+ The structure of the resulting tree makes it so that large portions of the tree are pruned
5
+ during queries.
6
+
7
+ One very good use of the tree is to allow nearest neighbor searching. Let's say you have a number
8
+ of points in 2D space, and you want to find the nearest 2 points from a specific point:
9
+
10
+ First, put the points into the tree:
11
+
12
+ kdtree = Containers::KDTree.new( {0 => [4, 3], 1 => [3, 4], 2 => [-1, 2], 3 => [6, 4],
13
+ 4 => [3, -5], 5 => [-2, -5] })
14
+
15
+ Then, query on the tree:
16
+
17
+ puts kd.find_nearest([0, 0], 2) => [[5, 2], [9, 1]]
18
+
19
+ The result is an array of [distance, id] pairs. There seems to be a bug in this version.
20
+
21
+ Note that the point queried on does not have to exist in the tree. However, if it does exist,
22
+ it will be returned.
5
23
 
6
24
  =end
7
25
 
8
26
  class Containers::KDTree
9
27
  Node = Struct.new(:id, :coords, :left, :right)
10
28
 
29
+ # Points is a hash of id => [coord, coord] pairs.
11
30
  def initialize(points)
12
- @root = build_tree(points)
31
+ raise "must pass in a hash" unless points.kind_of?(Hash)
32
+ @dimensions = points[ points.keys.first ].size
33
+ @root = build_tree(points.to_a)
34
+ @nearest = []
35
+ end
36
+
37
+ # Find k closest points to given coordinates
38
+ def find_nearest(target, k_nearest)
13
39
  @nearest = []
40
+ nearest(@root, target, k_nearest, 0)
14
41
  end
15
42
 
16
- # Build a kd-tree
43
+ # points is an array
17
44
  def build_tree(points, depth=0)
18
45
  return if points.empty?
19
-
20
- axis = depth % 2
21
-
22
- points.sort! { |a, b| a[1][axis] <=> b[1][axis] }
46
+
47
+ axis = depth % @dimensions
48
+
49
+ points.sort! { |a, b| a.last[axis] <=> b.last[axis] }
23
50
  median = points.size / 2
24
-
25
- node = Node.new(points[median][0], points[median][1], nil, nil)
51
+
52
+ node = Node.new(points[median].first, points[median].last, nil, nil)
26
53
  node.left = build_tree(points[0...median], depth+1)
27
54
  node.right = build_tree(points[median+1..-1], depth+1)
28
55
  node
@@ -48,15 +75,11 @@ class Containers::KDTree
48
75
  end
49
76
  nearest
50
77
  end
78
+ private :check_nearest
51
79
 
52
- # Find k closest points to given coordinates
53
- def find_nearest(target, k_nearest)
54
- @nearest = []
55
- nearest(@root, target, k_nearest, 0)
56
- end
57
-
80
+ # Recursively find nearest coordinates, going down the appropriate branch as needed
58
81
  def nearest(node, target, k_nearest, depth)
59
- axis = depth % 2
82
+ axis = depth % @dimensions
60
83
 
61
84
  if node.left.nil? && node.right.nil? # Leaf node
62
85
  @nearest = check_nearest(@nearest, node, target, k_nearest)
@@ -13,7 +13,7 @@ class Containers::Trie
13
13
  #
14
14
  # t = Containers::Trie.new
15
15
  # t["hello"] = "world"
16
- # t["hello] #=> "world"
16
+ # t["hello"] #=> "world"
17
17
  def initialize
18
18
  @root = nil
19
19
  end
@@ -0,0 +1,25 @@
1
+ # $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2
+ # require 'algorithms'
3
+ #
4
+ # if defined? Containers::CBst
5
+ # describe "CBst" do
6
+ # it "should mark ruby object references" do
7
+ # anon_key_class = Class.new do
8
+ # attr :value
9
+ # def initialize(x); @value = x; end
10
+ # def <=>(other); value <=> other.value; end
11
+ # end
12
+ # anon_val_class = Class.new
13
+ # @bst = Containers::CBst.new
14
+ # 100.times { |x| @bst.push(anon_key_class.new(x), anon_val_class.new) }
15
+ # # Mark and sweep
16
+ # ObjectSpace.garbage_collect
17
+ # # Check if any instances were swept
18
+ # count = 0
19
+ # ObjectSpace.each_object(anon_key_class) { |x| count += 1 }
20
+ # count.should eql(100)
21
+ # ObjectSpace.each_object(anon_val_class) { |x| count += 1 }
22
+ # count.should eql(200)
23
+ # end
24
+ # end
25
+ # end
data/spec/bst_spec.rb ADDED
@@ -0,0 +1,25 @@
1
+ # $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2
+ # require "algorithms"
3
+ #
4
+ # begin
5
+ # Containers::CBst
6
+ # describe "binary search tree" do
7
+ # it "should let user push new elements with key" do
8
+ # @bst = Containers::CBst.new
9
+ # 100.times { |x| @bst.push(x, "hello : #{x}") }
10
+ # @bst.size.should eql(100)
11
+ # end
12
+ #
13
+ # it "should allow users to delete elements" do
14
+ # @bst = Containers::CBst.new
15
+ # @bst.push(10, "hello world")
16
+ # @bst.push(11, "hello world")
17
+ # @bst.delete(11)
18
+ # @bst.size.should eql(1)
19
+ # @bst.delete(10)
20
+ # @bst.size.should eql(0)
21
+ # end
22
+ #
23
+ # end
24
+ # rescue Exception
25
+ # end
@@ -12,7 +12,7 @@ if defined? Containers::CDeque
12
12
  # Check if any instances were swept
13
13
  count = 0
14
14
  ObjectSpace.each_object(anon_class) { |x| count += 1 }
15
- count.should eql(100)
15
+ expect(count).to eql(100)
16
16
  end
17
17
  end
18
18
  end
data/spec/deque_spec.rb CHANGED
@@ -1,77 +1,77 @@
1
1
  $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2
2
  require 'algorithms'
3
3
 
4
- describe "(empty deque)", :shared => true do
4
+ shared_examples "(empty deque)" do
5
5
  it "should return nil when popping objects" do
6
- @deque.pop_front.should be_nil
7
- @deque.pop_back.should be_nil
6
+ expect(@deque.pop_front).to be_nil
7
+ expect(@deque.pop_back).to be_nil
8
8
  end
9
9
 
10
10
  it "should return a size of 1 when sent #push_front" do
11
11
  @deque.push_front(1)
12
- @deque.size.should eql(1)
12
+ expect(@deque.size).to eql(1)
13
13
  end
14
14
 
15
15
  it "should return a size of 1 when sent #push_back" do
16
16
  @deque.push_back(1)
17
- @deque.size.should eql(1)
17
+ expect(@deque.size).to eql(1)
18
18
  end
19
19
 
20
20
  it "should return nil when sent #front and #back" do
21
- @deque.front.should be_nil
22
- @deque.back.should be_nil
21
+ expect(@deque.front).to be_nil
22
+ expect(@deque.back).to be_nil
23
23
  end
24
24
 
25
25
  it "should be empty" do
26
- @deque.should be_empty
26
+ expect(@deque).to be_empty
27
27
  end
28
28
 
29
29
  it "should raise ArgumentError if passed more than one argument" do
30
- lambda { @deque.class.send("new", Time.now, []) }.should raise_error
30
+ expect { @deque.class.send("new", Time.now, []) }.to raise_error(ArgumentError)
31
31
  end
32
32
  end
33
33
 
34
- describe "(non-empty deque)", :shared => true do
34
+ shared_examples "(non-empty deque)" do
35
35
  before(:each) do
36
36
  @deque.push_back(10)
37
37
  @deque.push_back("10")
38
38
  end
39
39
 
40
40
  it "should return last pushed object with pop_back" do
41
- @deque.pop_back.should eql("10")
42
- @deque.pop_back.should eql(10)
41
+ expect(@deque.pop_back).to eql("10")
42
+ expect(@deque.pop_back).to eql(10)
43
43
  end
44
44
 
45
45
  it "should return first pushed object with pop_front" do
46
- @deque.pop_front.should eql(10)
47
- @deque.pop_front.should eql("10")
46
+ expect(@deque.pop_front).to eql(10)
47
+ expect(@deque.pop_front).to eql("10")
48
48
  end
49
49
 
50
50
  it "should return a size greater than 0" do
51
- @deque.size.should eql(2)
51
+ expect(@deque.size).to eql(2)
52
52
  end
53
53
 
54
54
  it "should not be empty" do
55
- @deque.should_not be_empty
55
+ expect(@deque).not_to be_empty
56
56
  end
57
57
 
58
58
  it "should iterate in LIFO order with #each_backward" do
59
59
  arr = []
60
60
  @deque.each_backward { |obj| arr << obj }
61
- arr.should eql(["10", 10])
61
+ expect(arr).to eql(["10", 10])
62
62
  end
63
63
 
64
64
  it "should iterate in FIFO order with #each_forward" do
65
65
  arr = []
66
66
  @deque.each_forward { |obj| arr << obj }
67
- arr.should eql([10, "10"])
67
+ expect(arr).to eql([10, "10"])
68
68
  end
69
69
 
70
70
  it "should return nil after everything's popped" do
71
71
  @deque.pop_back
72
72
  @deque.pop_back
73
- @deque.pop_back.should be_nil
74
- @deque.front.should be_nil
73
+ expect(@deque.pop_back).to be_nil
74
+ expect(@deque.front).to be_nil
75
75
  end
76
76
  end
77
77