algorithms 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/algorithms.rb CHANGED
@@ -29,6 +29,7 @@
29
29
  * Splay Trees - Containers::SplayTreeMap
30
30
  * Tries - Containers::Trie
31
31
  * Suffix Array - Containers::SuffixArray
32
+ * kd Tree - Containers::KDTree
32
33
 
33
34
  * Search algorithms
34
35
  - Binary Search - Algorithms::Search.binary_search
@@ -42,19 +43,17 @@
42
43
  - Shell sort - Algorithms::Sort.shell_sort
43
44
  - Quicksort - Algorithms::Sort.quicksort
44
45
  - Mergesort - Algorithms::Sort.mergesort
46
+ - Dual-Pivot Quicksort - Algorithms::Sort.dualpivotquicksort
47
+ * String algorithms
48
+ - Levenshtein distance - Algorithms::String.levenshtein_dist
45
49
  =end
46
50
 
47
51
  module Algorithms; end
48
52
  module Containers; end
49
53
 
50
- begin
51
- require 'CBst'
52
- Containers::Bst = Containers::CBst
53
- rescue LoadError # C Version could not be found
54
- end
55
-
56
54
  require 'algorithms/search'
57
55
  require 'algorithms/sort'
56
+ require 'algorithms/string'
58
57
  require 'containers/heap'
59
58
  require 'containers/stack'
60
59
  require 'containers/deque'
@@ -235,4 +235,134 @@ module Algorithms::Sort
235
235
  sorted + left + right
236
236
  end
237
237
 
238
+ # Dual-Pivot Quicksort is a variation of Quicksort by Vladimir Yaroslavskiy.
239
+ # This is an implementation of the algorithm as it was found in the original
240
+ # research paper:
241
+ #
242
+ # http://iaroslavski.narod.ru/quicksort/DualPivotQuicksort.pdf
243
+ #
244
+ # Mirror:
245
+ # http://codeblab.com/wp-content/uploads/2009/09/DualPivotQuicksort.pdf
246
+ #
247
+ # "This algorithm offers O(n log(n)) performance on many data sets that cause
248
+ # other quicksorts to degrade to quadratic performance, and is typically
249
+ # faster than traditional (one-pivot) Quicksort implementations."
250
+ # -- http://download.oracle.com/javase/7/docs/api/java/util/Arrays.html
251
+ #
252
+ # The algorithm was improved by Vladimir Yaroslavskiy, Jon Bentley, and
253
+ # Joshua Bloch, and was implemented as the default sort algorithm for
254
+ # primatives in Java 7.
255
+ #
256
+ # Implementation in the Java JDK as of November, 2011:
257
+ # http://www.docjar.com/html/api/java/util/DualPivotQuicksort.java.html
258
+ #
259
+ # It is proved that for the Dual-Pivot Quicksort the average number
260
+ # of comparisons is 2*n*ln(n), the average number of swaps is
261
+ # 0.8*n*ln(n), whereas classical Quicksort algorithm has 2*n*ln(n)
262
+ # and 1*n*ln(n) respectively. This has been fully examined mathematically
263
+ # and experimentally.
264
+ #
265
+ # Requirements: Container should implement #pop and include the Enumerable module.
266
+ # Time Complexity: О(n log n) average, О(n log n) worst-case
267
+ # Space Complexity: О(n) auxiliary
268
+ #
269
+ # Stable: No
270
+ #
271
+ # Algorithms::Sort.dualpivotquicksort [5, 4, 3, 1, 2] => [1, 2, 3, 4, 5]
272
+
273
+ def self.dualpivotquicksort(container)
274
+ return container if container.size <= 1
275
+ dualpivot(container, 0, container.size-1, 3)
276
+ end
277
+
278
+ def self.dualpivot(container, left=0, right=container.size-1, div=3)
279
+ length = right - left
280
+ if length < 27 # insertion sort for tiny array
281
+ container.each_with_index do |data,i|
282
+ j = i - 1
283
+ while j >= 0
284
+ break if container[j] <= data
285
+ container[j + 1] = container[j]
286
+ j = j - 1
287
+ end
288
+ container[j + 1] = data
289
+ end
290
+ else # full dual-pivot quicksort
291
+ third = length / div
292
+ # medians
293
+ m1 = left + third
294
+ m2 = right - third
295
+ if m1 <= left
296
+ m1 = left + 1
297
+ end
298
+ if m2 >= right
299
+ m2 = right - 1
300
+ end
301
+ if container[m1] < container[m2]
302
+ dualpivot_swap(container, m1, left)
303
+ dualpivot_swap(container, m2, right)
304
+ else
305
+ dualpivot_swap(container, m1, right)
306
+ dualpivot_swap(container, m2, left)
307
+ end
308
+ # pivots
309
+ pivot1 = container[left]
310
+ pivot2 = container[right]
311
+ # pointers
312
+ less = left + 1
313
+ great = right - 1
314
+ # sorting
315
+ k = less
316
+ while k <= great
317
+ if container[k] < pivot1
318
+ dualpivot_swap(container, k, less += 1)
319
+ elsif container[k] > pivot2
320
+ while k < great && container[great] > pivot2
321
+ great -= 1
322
+ end
323
+ dualpivot_swap(container, k, great -= 1)
324
+ if container[k] < pivot1
325
+ dualpivot_swap(container, k, less += 1)
326
+ end
327
+ end
328
+ k += 1
329
+ end
330
+ # swaps
331
+ dist = great - less
332
+ if dist < 13
333
+ div += 1
334
+ end
335
+ dualpivot_swap(container, less-1, left)
336
+ dualpivot_swap(container, great+1, right)
337
+ # subarrays
338
+ dualpivot(container, left, less-2, div)
339
+ dualpivot(container, great+2, right, div)
340
+ # equal elements
341
+ if dist > length - 13 && pivot1 != pivot2
342
+ for k in less..great do
343
+ if container[k] == pivot1
344
+ dualpivot_swap(container, k, less)
345
+ less += 1
346
+ elsif container[k] == pivot2
347
+ dualpivot_swap(container, k, great)
348
+ great -= 1
349
+ if container[k] == pivot1
350
+ dualpivot_swap(container, k, less)
351
+ less += 1
352
+ end
353
+ end
354
+ end
355
+ end
356
+ # subarray
357
+ if pivot1 < pivot2
358
+ dualpivot(container, less, great, div)
359
+ end
360
+ container
361
+ end
362
+ end
363
+
364
+ def self.dualpivot_swap(container, i, j)
365
+ container[i], container[j] = container[j], container[i]
366
+ end
238
367
  end
368
+
@@ -0,0 +1,9 @@
1
+ =begin rdoc
2
+ This module implements string algorithms. Documentation is provided for each algorithm.
3
+
4
+ =end
5
+
6
+ begin
7
+ require 'CString'
8
+ rescue LoadError
9
+ end
@@ -117,6 +117,22 @@ class Containers::Heap
117
117
  @next && @next.value
118
118
  end
119
119
 
120
+ # call-seq:
121
+ # next_key -> key
122
+ # next_key -> nil
123
+ #
124
+ # Returns the key associated with the next item in heap order, but does not remove the value.
125
+ #
126
+ # Complexity: O(1)
127
+ #
128
+ # minheap = MinHeap.new
129
+ # minheap.push(1, :a)
130
+ # minheap.next_key #=> 1
131
+ #
132
+ def next_key
133
+ @next && @next.key
134
+ end
135
+
120
136
  # call-seq:
121
137
  # clear -> nil
122
138
  #
@@ -1,28 +1,55 @@
1
1
  =begin rdoc
2
2
 
3
- A kd-tree allows searching of points in multi-dimensional space, increasing
4
- efficiency for nearest-neighbor searching in particular.
3
+ A kd-tree is a binary tree that allows one to store points (of any space dimension: 2D, 3D, etc).
4
+ The structure of the resulting tree makes it so that large portions of the tree are pruned
5
+ during queries.
6
+
7
+ One very good use of the tree is to allow nearest neighbor searching. Let's say you have a number
8
+ of points in 2D space, and you want to find the nearest 2 points from a specific point:
9
+
10
+ First, put the points into the tree:
11
+
12
+ kdtree = Containers::KDTree.new( {0 => [4, 3], 1 => [3, 4], 2 => [-1, 2], 3 => [6, 4],
13
+ 4 => [3, -5], 5 => [-2, -5] })
14
+
15
+ Then, query on the tree:
16
+
17
+ puts kd.find_nearest([0, 0], 2) => [[5, 2], [9, 1]]
18
+
19
+ The result is an array of [distance, id] pairs. There seems to be a bug in this version.
20
+
21
+ Note that the point queried on does not have to exist in the tree. However, if it does exist,
22
+ it will be returned.
5
23
 
6
24
  =end
7
25
 
8
26
  class Containers::KDTree
9
27
  Node = Struct.new(:id, :coords, :left, :right)
10
28
 
29
+ # Points is a hash of id => [coord, coord] pairs.
11
30
  def initialize(points)
12
- @root = build_tree(points)
31
+ raise "must pass in a hash" unless points.kind_of?(Hash)
32
+ @dimensions = points[ points.keys.first ].size
33
+ @root = build_tree(points.to_a)
34
+ @nearest = []
35
+ end
36
+
37
+ # Find k closest points to given coordinates
38
+ def find_nearest(target, k_nearest)
13
39
  @nearest = []
40
+ nearest(@root, target, k_nearest, 0)
14
41
  end
15
42
 
16
- # Build a kd-tree
43
+ # points is an array
17
44
  def build_tree(points, depth=0)
18
45
  return if points.empty?
19
-
20
- axis = depth % 2
21
-
22
- points.sort! { |a, b| a[1][axis] <=> b[1][axis] }
46
+
47
+ axis = depth % @dimensions
48
+
49
+ points.sort! { |a, b| a.last[axis] <=> b.last[axis] }
23
50
  median = points.size / 2
24
-
25
- node = Node.new(points[median][0], points[median][1], nil, nil)
51
+
52
+ node = Node.new(points[median].first, points[median].last, nil, nil)
26
53
  node.left = build_tree(points[0...median], depth+1)
27
54
  node.right = build_tree(points[median+1..-1], depth+1)
28
55
  node
@@ -48,15 +75,11 @@ class Containers::KDTree
48
75
  end
49
76
  nearest
50
77
  end
78
+ private :check_nearest
51
79
 
52
- # Find k closest points to given coordinates
53
- def find_nearest(target, k_nearest)
54
- @nearest = []
55
- nearest(@root, target, k_nearest, 0)
56
- end
57
-
80
+ # Recursively find nearest coordinates, going down the appropriate branch as needed
58
81
  def nearest(node, target, k_nearest, depth)
59
- axis = depth % 2
82
+ axis = depth % @dimensions
60
83
 
61
84
  if node.left.nil? && node.right.nil? # Leaf node
62
85
  @nearest = check_nearest(@nearest, node, target, k_nearest)
@@ -13,7 +13,7 @@ class Containers::Trie
13
13
  #
14
14
  # t = Containers::Trie.new
15
15
  # t["hello"] = "world"
16
- # t["hello] #=> "world"
16
+ # t["hello"] #=> "world"
17
17
  def initialize
18
18
  @root = nil
19
19
  end
@@ -0,0 +1,25 @@
1
+ # $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2
+ # require 'algorithms'
3
+ #
4
+ # if defined? Containers::CBst
5
+ # describe "CBst" do
6
+ # it "should mark ruby object references" do
7
+ # anon_key_class = Class.new do
8
+ # attr :value
9
+ # def initialize(x); @value = x; end
10
+ # def <=>(other); value <=> other.value; end
11
+ # end
12
+ # anon_val_class = Class.new
13
+ # @bst = Containers::CBst.new
14
+ # 100.times { |x| @bst.push(anon_key_class.new(x), anon_val_class.new) }
15
+ # # Mark and sweep
16
+ # ObjectSpace.garbage_collect
17
+ # # Check if any instances were swept
18
+ # count = 0
19
+ # ObjectSpace.each_object(anon_key_class) { |x| count += 1 }
20
+ # count.should eql(100)
21
+ # ObjectSpace.each_object(anon_val_class) { |x| count += 1 }
22
+ # count.should eql(200)
23
+ # end
24
+ # end
25
+ # end
data/spec/bst_spec.rb ADDED
@@ -0,0 +1,25 @@
1
+ # $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2
+ # require "algorithms"
3
+ #
4
+ # begin
5
+ # Containers::CBst
6
+ # describe "binary search tree" do
7
+ # it "should let user push new elements with key" do
8
+ # @bst = Containers::CBst.new
9
+ # 100.times { |x| @bst.push(x, "hello : #{x}") }
10
+ # @bst.size.should eql(100)
11
+ # end
12
+ #
13
+ # it "should allow users to delete elements" do
14
+ # @bst = Containers::CBst.new
15
+ # @bst.push(10, "hello world")
16
+ # @bst.push(11, "hello world")
17
+ # @bst.delete(11)
18
+ # @bst.size.should eql(1)
19
+ # @bst.delete(10)
20
+ # @bst.size.should eql(0)
21
+ # end
22
+ #
23
+ # end
24
+ # rescue Exception
25
+ # end
@@ -12,7 +12,7 @@ if defined? Containers::CDeque
12
12
  # Check if any instances were swept
13
13
  count = 0
14
14
  ObjectSpace.each_object(anon_class) { |x| count += 1 }
15
- count.should eql(100)
15
+ expect(count).to eql(100)
16
16
  end
17
17
  end
18
18
  end
data/spec/deque_spec.rb CHANGED
@@ -1,77 +1,77 @@
1
1
  $: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2
2
  require 'algorithms'
3
3
 
4
- describe "(empty deque)", :shared => true do
4
+ shared_examples "(empty deque)" do
5
5
  it "should return nil when popping objects" do
6
- @deque.pop_front.should be_nil
7
- @deque.pop_back.should be_nil
6
+ expect(@deque.pop_front).to be_nil
7
+ expect(@deque.pop_back).to be_nil
8
8
  end
9
9
 
10
10
  it "should return a size of 1 when sent #push_front" do
11
11
  @deque.push_front(1)
12
- @deque.size.should eql(1)
12
+ expect(@deque.size).to eql(1)
13
13
  end
14
14
 
15
15
  it "should return a size of 1 when sent #push_back" do
16
16
  @deque.push_back(1)
17
- @deque.size.should eql(1)
17
+ expect(@deque.size).to eql(1)
18
18
  end
19
19
 
20
20
  it "should return nil when sent #front and #back" do
21
- @deque.front.should be_nil
22
- @deque.back.should be_nil
21
+ expect(@deque.front).to be_nil
22
+ expect(@deque.back).to be_nil
23
23
  end
24
24
 
25
25
  it "should be empty" do
26
- @deque.should be_empty
26
+ expect(@deque).to be_empty
27
27
  end
28
28
 
29
29
  it "should raise ArgumentError if passed more than one argument" do
30
- lambda { @deque.class.send("new", Time.now, []) }.should raise_error
30
+ expect { @deque.class.send("new", Time.now, []) }.to raise_error(ArgumentError)
31
31
  end
32
32
  end
33
33
 
34
- describe "(non-empty deque)", :shared => true do
34
+ shared_examples "(non-empty deque)" do
35
35
  before(:each) do
36
36
  @deque.push_back(10)
37
37
  @deque.push_back("10")
38
38
  end
39
39
 
40
40
  it "should return last pushed object with pop_back" do
41
- @deque.pop_back.should eql("10")
42
- @deque.pop_back.should eql(10)
41
+ expect(@deque.pop_back).to eql("10")
42
+ expect(@deque.pop_back).to eql(10)
43
43
  end
44
44
 
45
45
  it "should return first pushed object with pop_front" do
46
- @deque.pop_front.should eql(10)
47
- @deque.pop_front.should eql("10")
46
+ expect(@deque.pop_front).to eql(10)
47
+ expect(@deque.pop_front).to eql("10")
48
48
  end
49
49
 
50
50
  it "should return a size greater than 0" do
51
- @deque.size.should eql(2)
51
+ expect(@deque.size).to eql(2)
52
52
  end
53
53
 
54
54
  it "should not be empty" do
55
- @deque.should_not be_empty
55
+ expect(@deque).not_to be_empty
56
56
  end
57
57
 
58
58
  it "should iterate in LIFO order with #each_backward" do
59
59
  arr = []
60
60
  @deque.each_backward { |obj| arr << obj }
61
- arr.should eql(["10", 10])
61
+ expect(arr).to eql(["10", 10])
62
62
  end
63
63
 
64
64
  it "should iterate in FIFO order with #each_forward" do
65
65
  arr = []
66
66
  @deque.each_forward { |obj| arr << obj }
67
- arr.should eql([10, "10"])
67
+ expect(arr).to eql([10, "10"])
68
68
  end
69
69
 
70
70
  it "should return nil after everything's popped" do
71
71
  @deque.pop_back
72
72
  @deque.pop_back
73
- @deque.pop_back.should be_nil
74
- @deque.front.should be_nil
73
+ expect(@deque.pop_back).to be_nil
74
+ expect(@deque.front).to be_nil
75
75
  end
76
76
  end
77
77