burrows_wheeler 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0a169f09d5ef17035eaa0718fd9040391242c760
4
+ data.tar.gz: dd7c0ae92daa3380414d19db5cd44af4470c7557
5
+ SHA512:
6
+ metadata.gz: b23520d72182c2dcbf1c41b2d8a59a5eaef6c353f91e99ff4aac566988ebbbeb738b3d3d8cda5d392d8962df7d32e4ae709e50e32bd6a0e2863f49ab84a23557
7
+ data.tar.gz: b5c2d14d467fbda359cb5e13a9e10a37b2f83c42243c133a8f3747694eff77ecf29ed63d7ac8a6aaf47f6184d37d46927c75cc8aaf9a151f83b373fdd98e3814
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.0
4
+ - 2.0.0
5
+ - 1.9.3
6
+ script: "rake test"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in burrows_wheeler.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 M.Magomedov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ [**Burrows-Wheeler transform**](http://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform)
2
+
3
+ [![Build Status](https://travis-ci.org/ethylamide/burrows_wheeler.svg)](https://travis-ci.org/ethylamide/burrows_wheeler)
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'burrows_wheeler'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install burrows_wheeler
18
+
19
+ ## Usage
20
+
21
+ Typical use-case looks something like this:
22
+
23
+ ```ruby
24
+ require 'burrows_wheeler/transform'
25
+ inp = StringIO.new('ABRACADABRA!')
26
+ File.open('/tmp/file.bwt') do |out|
27
+ BurrowsWheeler::Transform.encode(inp, out)
28
+ end
29
+ ```
30
+
31
+ ```ruby
32
+ File.open('/tmp/file.bwt') do |inp|
33
+ out = StringIO.new
34
+ BurrowsWheeler::Transform.decode(inp, out)
35
+ puts out.string
36
+ end
37
+ ```
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it ( https://github.com/ethylamide/burrows_wheeler/fork )
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |task|
5
+ task.libs << %w(test lib)
6
+ task.pattern = 'test/test_*.rb'
7
+ end
8
+
9
+ task default: :test
10
+
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'burrows_wheeler/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "burrows_wheeler"
8
+ spec.version = BurrowsWheeler::VERSION
9
+ spec.authors = ["M.Magomedov"]
10
+ spec.email = ["mmagomedoff@gmail.com"]
11
+ spec.summary = %q{Burrows-Wheeler transformation.}
12
+ spec.description = %q{Burrows-Wheeler transformation. Encoding and decoding.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'burrows_wheeler/version'
2
+
3
+ module BurrowsWheeler
4
+ end
@@ -0,0 +1,52 @@
1
+ module BurrowsWheeler
2
+ # > CircularString.new(str, 5).to_s
3
+ # => "ADABRA!ABRAC",
4
+
5
+ class CircularString
6
+ include Comparable
7
+
8
+ attr_reader :string
9
+ attr_reader :shift
10
+
11
+ def initialize(string, shift)
12
+ @string = string
13
+ @shift = shift
14
+ end
15
+
16
+ def [](index)
17
+ return nil if index < 0 || index >= string.length
18
+ idx = (index + @shift) % @string.length
19
+ @string[idx]
20
+ end
21
+
22
+ def last
23
+ self[length - 1]
24
+ end
25
+
26
+ def subsequence(start, finish)
27
+ len = finish - start
28
+ CircularString.new(@string[start, len], @shift)
29
+ end
30
+
31
+ def <=>(other)
32
+ len = [length, other.length].min
33
+
34
+ (0..len - 1).each do |i|
35
+ c1, c2 = self[i], other[i]
36
+ return (c1 <=> c2) if c1 != c2
37
+ end
38
+
39
+ length <=> other.length
40
+ end
41
+
42
+ def length
43
+ @string.length
44
+ end
45
+
46
+ def to_s
47
+ (0..@string.length - 1).reduce('') do |str, idx|
48
+ str + self[idx]
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,15 @@
1
+ module BurrowsWheeler
2
+ class CircularSuffixArray
3
+ extend Forwardable
4
+
5
+ def initialize(string)
6
+ suffixes = (0..string.length - 1).map do |shift|
7
+ CircularString.new(string, shift)
8
+ end.sort
9
+
10
+ @indices = suffixes.map(&:shift)
11
+ end
12
+
13
+ def_delegators :@indices, :length, :[], :map, :each, :to_a
14
+ end
15
+ end
@@ -0,0 +1,62 @@
1
+ require 'burrows_wheeler/circular_string'
2
+ require 'burrows_wheeler/circular_suffix_array'
3
+
4
+ module BurrowsWheeler
5
+ class Transform
6
+ class << self
7
+ def encode(inp, out)
8
+ str = inp.read
9
+ csa = CircularSuffixArray.new(str)
10
+ first = -1
11
+
12
+ symbols = []
13
+
14
+ (0..str.length - 1).each do |i|
15
+ idx = csa[i]
16
+ cs = CircularString.new(str, idx)
17
+ symbols << cs.last
18
+ first = i if idx == 0
19
+ end
20
+
21
+ # write uint64_t
22
+ out.write([first].pack('Q'))
23
+
24
+ symbols.each { |c| out.write(c) }
25
+
26
+ out.flush
27
+ end
28
+
29
+ def decode(inp, out)
30
+ # read uint64_t
31
+ first = inp.read(8).unpack('Q').first
32
+ data = inp.read.split('')
33
+ column = data.sort
34
+
35
+ length = data.length
36
+
37
+ index = {}
38
+ nxt = Array.new(length)
39
+
40
+ j = 0
41
+ while j < length
42
+ index[column[j]] = j
43
+ j += 1 while j < length - 1 && column[j] == column[j + 1]
44
+ j += 1
45
+ end
46
+
47
+ (0..length - 1).each do |i|
48
+ char = data[i]
49
+ nxt[index[char]] = i
50
+ index[char] += 1
51
+ end
52
+
53
+ (0..length - 1).each do
54
+ out.write(column[first])
55
+ first = nxt[first]
56
+ end
57
+
58
+ out.flush
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module BurrowsWheeler
2
+ VERSION = "0.0.1"
3
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'minitest/autorun'
4
+ require 'minitest/unit'
@@ -0,0 +1,32 @@
1
+ require 'helper'
2
+ require 'burrows_wheeler/circular_string'
3
+
4
+ module BurrowsWheeler
5
+ class TestCircularString < MiniTest::Unit::TestCase
6
+ def setup
7
+ @circular_string = CircularString.new('ABRACADABRA!', 5)
8
+ end
9
+
10
+ def test_indexing
11
+ assert_equal '!', @circular_string[6]
12
+ end
13
+
14
+ def test_length
15
+ assert_equal 12, @circular_string.length
16
+ end
17
+
18
+ def test_to_string_transformation
19
+ assert_equal 'ADABRA!ABRAC', @circular_string.to_s
20
+ end
21
+
22
+ def test_comparing
23
+ other = CircularString.new('ABRACADABRA!', 11)
24
+ assert_operator @circular_string, :>, other
25
+ end
26
+
27
+ def test_subsequence
28
+ sub = @circular_string.subsequence(0, 6)
29
+ assert_equal sub.to_s, 'AABRAC'
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+ require 'burrows_wheeler/circular_string'
3
+ require 'burrows_wheeler/circular_suffix_array'
4
+
5
+ module BurrowsWheeler
6
+ class TestCircularSuffixArray < MiniTest::Unit::TestCase
7
+ def setup
8
+ @circular_suffix_array = CircularSuffixArray.new('ABRACADABRA!')
9
+ end
10
+
11
+ def test_length
12
+ assert_equal 12, @circular_suffix_array.length
13
+ end
14
+
15
+ def test_indexing
16
+ assert_equal 0, @circular_suffix_array[3]
17
+ end
18
+
19
+ def test_array
20
+ assert_equal [11, 10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2], @circular_suffix_array.to_a
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,24 @@
1
+ require 'helper'
2
+ require 'burrows_wheeler/transform'
3
+
4
+ module BurrowsWheeler
5
+ class TestTransform < MiniTest::Unit::TestCase
6
+ def test_encode
7
+ inp = StringIO.new('ABRACADABRA!')
8
+ out = StringIO.new
9
+
10
+ BurrowsWheeler::Transform.encode(inp, out)
11
+
12
+ assert_equal "\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000ARD!RCAAAABB", out.string
13
+ end
14
+
15
+ def test_decode
16
+ inp = StringIO.new("\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000ARD!RCAAAABB")
17
+ out = StringIO.new
18
+
19
+ BurrowsWheeler::Transform.decode(inp, out)
20
+
21
+ assert_equal 'ABRACADABRA!', out.string
22
+ end
23
+ end
24
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: burrows_wheeler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M.Magomedov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Burrows-Wheeler transformation. Encoding and decoding.
42
+ email:
43
+ - mmagomedoff@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - ".travis.yml"
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - burrows_wheeler.gemspec
55
+ - lib/burrows_wheeler.rb
56
+ - lib/burrows_wheeler/circular_string.rb
57
+ - lib/burrows_wheeler/circular_suffix_array.rb
58
+ - lib/burrows_wheeler/transform.rb
59
+ - lib/burrows_wheeler/version.rb
60
+ - test/helper.rb
61
+ - test/test_circular_string.rb
62
+ - test/test_circular_suffix_array.rb
63
+ - test/test_transform.rb
64
+ homepage: ''
65
+ licenses:
66
+ - MIT
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.2.2
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Burrows-Wheeler transformation.
88
+ test_files:
89
+ - test/helper.rb
90
+ - test/test_circular_string.rb
91
+ - test/test_circular_suffix_array.rb
92
+ - test/test_transform.rb