bio-maf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio-maf.rb +12 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +181 -0
data/.document
ADDED
data/.simplecov
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
SimpleCov.start
|
data/.travis.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
language: ruby
|
2
|
+
before_install:
|
3
|
+
#- sudo update-java-alternatives -s java-1.7.0-openjdk-i386
|
4
|
+
- sudo update-java-alternatives -s java-1.6.0-openjdk
|
5
|
+
- sudo ./travis-ci/install_kc
|
6
|
+
bundler_args: --without development
|
7
|
+
script: "bundle exec rake test"
|
8
|
+
after_script:
|
9
|
+
- ./travis-ci/report_errors
|
10
|
+
rvm:
|
11
|
+
- 1.9.3
|
12
|
+
- jruby-19mode # JRuby in 1.9 mode
|
13
|
+
- rbx-19mode
|
14
|
+
matrix:
|
15
|
+
allow_failures:
|
16
|
+
- rvm: rbx-19mode
|
data/.yardopts
ADDED
data/DEVELOPMENT.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# Development guide
|
2
|
+
|
3
|
+
Here are notes on less obvious aspects of the development process for
|
4
|
+
this library.
|
5
|
+
|
6
|
+
## kyotocabinet-java
|
7
|
+
|
8
|
+
Running `bio-maf` on JRuby requires the [kyotocabinet-java][] gem, a
|
9
|
+
wrapper around the Kyoto Cabinet Java interface providing a Ruby API
|
10
|
+
compatible with the standard Kyoto Cabinet Ruby API.
|
11
|
+
|
12
|
+
[kyotocabinet-java]: https://github.com/csw/kyotocabinet-java
|
13
|
+
|
14
|
+
## Man pages
|
15
|
+
|
16
|
+
Man pages are developed with [ronn][] and live in `man/`; see
|
17
|
+
[maf_index.1.ronn][] for an example. The generated man pages,
|
18
|
+
e.g. `maf_index.1`, are added to Git for [gem-man][] support.
|
19
|
+
|
20
|
+
[ronn]: https://github.com/rtomayko/ronn
|
21
|
+
[gem-man]: https://github.com/defunkt/gem-man
|
22
|
+
[maf_index.1.ronn]: https://github.com/csw/bioruby-maf/blob/master/man/maf_index.1.ronn
|
23
|
+
|
24
|
+
HTML and roff versions are built with:
|
25
|
+
|
26
|
+
$ rake man
|
27
|
+
|
28
|
+
The HTML versions are published through Octopress to Github Pages,
|
29
|
+
e.g. <http://csw.github.com/bioruby-maf/man/maf_index.1.html>. This is
|
30
|
+
a separate step, and necessarily dependent on the local filesystem
|
31
|
+
layout. Specifically, there must be an `octopress` directory at the
|
32
|
+
same level as `bioruby-maf`, containing a checked-out copy of
|
33
|
+
<https://github.com/csw/bioruby-maf-blog>. Then, to publish the man
|
34
|
+
pages, run:
|
35
|
+
|
36
|
+
$ rake man:publish
|
37
|
+
|
38
|
+
After this, in that Octopress instance, run:
|
39
|
+
|
40
|
+
$ rake deploy
|
data/Gemfile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "rdoc", "~> 3.12"
|
10
|
+
gem "simplecov", "~> 0.6.4", :platforms => :mri
|
11
|
+
gem "yard", "~> 0.8.1"
|
12
|
+
gem "kramdown", "~> 0.13.6"
|
13
|
+
gem "redcarpet", "~> 2.1.1", :platforms => :mri
|
14
|
+
gem "ronn", "~> 0.7.3", :platforms => :mri
|
15
|
+
gem "sinatra", "~> 1.3.2" # for ronn --server
|
16
|
+
end
|
17
|
+
|
18
|
+
group :test do
|
19
|
+
gem "bundler", ">= 1.0.0"
|
20
|
+
gem "rake", ">= 0.9"
|
21
|
+
gem "cucumber", ">= 0"
|
22
|
+
gem "rspec", "~> 2.10.0"
|
23
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 csw
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
# bio-maf
|
2
|
+
|
3
|
+
[![Build Status](https://secure.travis-ci.org/csw/bioruby-maf.png)](http://travis-ci.org/csw/bioruby-maf)
|
4
|
+
|
5
|
+
This is a plugin for [BioRuby](http://bioruby.open-bio.org/) adding
|
6
|
+
support for the
|
7
|
+
[Multiple Alignment Format](http://genome.ucsc.edu/FAQ/FAQformat#format5)
|
8
|
+
(MAF), used in bioinformatics to store whole-genome sets of multiple
|
9
|
+
sequence alignments.
|
10
|
+
|
11
|
+
Ultimately it will provide indexed and sequential access to MAF data,
|
12
|
+
as well as performing various manipulations on it and writing modified
|
13
|
+
MAF files. So far, it only supports simple sequential parsing.
|
14
|
+
|
15
|
+
For more information, see the
|
16
|
+
[project wiki](https://github.com/csw/bioruby-maf/wiki).
|
17
|
+
|
18
|
+
Developer documentation generated with YARD is available at
|
19
|
+
[rubydoc.info](http://rubydoc.info/github/csw/bioruby-maf/).
|
20
|
+
|
21
|
+
This is being developed by Clayton Wheeler as
|
22
|
+
[part of](http://www.bioruby.org/wiki/Google_Summer_of_Code) the
|
23
|
+
Google Summer of Code 2012, under the auspices of the Open
|
24
|
+
Bioinformatics Foundation. The development
|
25
|
+
[blog](http://csw.github.com/bioruby-maf/) may be of interest.
|
26
|
+
|
27
|
+
## Dependencies
|
28
|
+
|
29
|
+
[Kyoto Cabinet][] is a database library, required for building MAF
|
30
|
+
indexes. Install the core library in the appropriate way for your
|
31
|
+
platform, as documented [here][].
|
32
|
+
|
33
|
+
[Kyoto Cabinet]: http://fallabs.com/kyotocabinet/
|
34
|
+
[here]: https://github.com/csw/bioruby-maf/wiki/Kyoto-Cabinet
|
35
|
+
|
36
|
+
If you're using MRI, the [kyotocabinet-ruby][] gem will be used to
|
37
|
+
interact with Kyoto Cabinet. For best performance, however, you should
|
38
|
+
really consider using JRuby. On JRuby, the [kyotocabinet-java][] gem
|
39
|
+
will be used instead; this builds a Java library using JNI to call
|
40
|
+
into Kyoto Cabinet. Please file a [bug report][] if you encounter
|
41
|
+
problems building or using this gem, which is still fairly new.
|
42
|
+
|
43
|
+
[kyotocabinet-ruby]: https://rubygems.org/gems/kyotocabinet-ruby
|
44
|
+
[kyotocabinet-java]: https://github.com/csw/kyotocabinet-java
|
45
|
+
[bug report]: https://github.com/csw/kyotocabinet-java/issues
|
46
|
+
|
47
|
+
|
48
|
+
## Installation
|
49
|
+
|
50
|
+
$ gem install bio-maf
|
51
|
+
|
52
|
+
## Usage
|
53
|
+
|
54
|
+
### Create an index on a MAF file
|
55
|
+
|
56
|
+
Much of the functionality of this library relies on an index. You can
|
57
|
+
create one with [maf_index(1)][], like so:
|
58
|
+
|
59
|
+
[maf_index(1)]: http://csw.github.com/bioruby-maf/man/maf_index.1.html
|
60
|
+
|
61
|
+
|
62
|
+
$ maf_index test/data/mm8_chr7_tiny.maf /tmp/mm8_chr7_tiny.kct
|
63
|
+
|
64
|
+
Or programmatically:
|
65
|
+
|
66
|
+
require 'bio-maf'
|
67
|
+
parser = Bio::MAF::Parser.new("test/data/mm8_chr7_tiny.maf")
|
68
|
+
idx = Bio::MAF::KyotoIndex.build(parser, "/tmp/mm8_chr7_tiny.kct")
|
69
|
+
|
70
|
+
### Extract blocks from an indexed MAF file, by genomic interval
|
71
|
+
|
72
|
+
Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/mm8_chr7_tiny.maf).
|
73
|
+
|
74
|
+
|
75
|
+
require 'bio-maf'
|
76
|
+
parser = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
77
|
+
idx = Bio::MAF::KyotoIndex.open('test/data/mm8_chr7_tiny.kct')
|
78
|
+
|
79
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
80
|
+
idx.find(q, parser).each do |block|
|
81
|
+
ref_seq = block.sequences[0]
|
82
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
83
|
+
end
|
84
|
+
|
85
|
+
# => Matched block at 80082592, 121 bases
|
86
|
+
# => Matched block at 80082713, 54 bases
|
87
|
+
|
88
|
+
### Filter species returned in alignment blocks
|
89
|
+
|
90
|
+
require 'bio-maf'
|
91
|
+
parser = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
92
|
+
idx = Bio::MAF::KyotoIndex.open('test/data/mm8_chr7_tiny.kct')
|
93
|
+
|
94
|
+
parser.sequence_filter = { :only_species => %w(hg18 mm8 rheMac2) }
|
95
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
96
|
+
blocks = idx.find(q, parser)
|
97
|
+
block = blocks.first
|
98
|
+
puts "Block has #{block.sequences.size} sequences."
|
99
|
+
|
100
|
+
# => Block has 3 sequences.
|
101
|
+
|
102
|
+
### Extract blocks matching certain conditions
|
103
|
+
|
104
|
+
See also the [Cucumber feature][] and [step definitions][] for this.
|
105
|
+
|
106
|
+
[Cucumber feature]: https://github.com/csw/bioruby-maf/blob/master/features/maf-querying.feature
|
107
|
+
[step definitions]: https://github.com/csw/bioruby-maf/blob/master/features/step_definitions/query_steps.rb
|
108
|
+
|
109
|
+
#### Match only blocks with all specified species
|
110
|
+
|
111
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082471, 80082730)]
|
112
|
+
filter = { :with_all_species => %w(panTro2 loxAfr1) }
|
113
|
+
n_blocks = idx.find(q, parser, filter).count
|
114
|
+
# => 1
|
115
|
+
|
116
|
+
#### Match only blocks with a certain number of sequences
|
117
|
+
|
118
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082767, 80083008)]
|
119
|
+
filter = { :at_least_n_sequences => 6 }
|
120
|
+
n_blocks = idx.find(q, parser, filter).count
|
121
|
+
# => 1
|
122
|
+
|
123
|
+
#### Match only blocks within a text size range
|
124
|
+
|
125
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 0, 80100000)]
|
126
|
+
filter = { :min_size => 72, :max_size => 160 }
|
127
|
+
n_blocks = idx.find(q, parser, filter).count
|
128
|
+
# => 3
|
129
|
+
|
130
|
+
### Process each block in a MAF file
|
131
|
+
|
132
|
+
require 'bio-maf'
|
133
|
+
p = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
134
|
+
puts "MAF version: #{p.header.version}"
|
135
|
+
# => MAF version: 1
|
136
|
+
|
137
|
+
p.parse_blocks.each do |block|
|
138
|
+
block.sequences.each do |seq|
|
139
|
+
do_something(seq)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
### Parse empty ('e') lines
|
144
|
+
|
145
|
+
Refer to [`chr22_ieq.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/chr22_ieq.maf).
|
146
|
+
|
147
|
+
require 'bio-maf'
|
148
|
+
p = Bio::MAF::Parser.new('test/data/chr22_ieq.maf',
|
149
|
+
:parse_empty => false)
|
150
|
+
block = p.parse_block
|
151
|
+
block.sequences.size
|
152
|
+
# => 3
|
153
|
+
|
154
|
+
p = Bio::MAF::Parser.new('test/data/chr22_ieq.maf',
|
155
|
+
:parse_empty => true)
|
156
|
+
block = p.parse_block
|
157
|
+
block.sequences.size
|
158
|
+
# => 4
|
159
|
+
block.sequences.find { |s| s.empty? }
|
160
|
+
# => #<Bio::MAF::EmptySequence:0x007fe1f39882d0
|
161
|
+
# @source="turTru1.scaffold_109008", @start=25049,
|
162
|
+
# @size=1601, @strand=:+, @src_size=50103, @text=nil,
|
163
|
+
# @status="I">
|
164
|
+
|
165
|
+
|
166
|
+
### Command line tools
|
167
|
+
|
168
|
+
Man pages for command line tools:
|
169
|
+
|
170
|
+
* [`maf_index(1)`](http://csw.github.com/bioruby-maf/man/maf_index.1.html)
|
171
|
+
* [`maf_to_fasta(1)`](http://csw.github.com/bioruby-maf/man/maf_to_fasta.1.html)
|
172
|
+
|
173
|
+
### Other documentation
|
174
|
+
|
175
|
+
Also see the [API documentation][]. For more code examples see the
|
176
|
+
[RSpec][] and [Cucumber][] test files in the source tree.
|
177
|
+
|
178
|
+
[API documentation]: http://rubydoc.info/github/csw/bioruby-maf/
|
179
|
+
[RSpec]: https://github.com/csw/bioruby-maf/tree/master/spec/bio/maf
|
180
|
+
[Cucumber]: https://github.com/csw/bioruby-maf/tree/master/features
|
181
|
+
|
182
|
+
Also, the scripts in the
|
183
|
+
[bin](https://github.com/csw/bioruby-maf/tree/master/bin) directory
|
184
|
+
provide good worked examples of how to use the existing parsing API.
|
185
|
+
|
186
|
+
## Project home page
|
187
|
+
|
188
|
+
For information on the source tree, documentation, examples, issues
|
189
|
+
and how to contribute, see
|
190
|
+
|
191
|
+
<http://github.com/csw/bioruby-maf>
|
192
|
+
|
193
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
194
|
+
|
195
|
+
## Cite
|
196
|
+
|
197
|
+
If you use this software, please cite one of
|
198
|
+
|
199
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
200
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
201
|
+
|
202
|
+
## Biogems.info
|
203
|
+
|
204
|
+
This Biogem will be published at [#bio-maf](http://biogems.info/index.html)
|
205
|
+
|
206
|
+
## Copyright
|
207
|
+
|
208
|
+
Copyright (c) 2012 Clayton Wheeler. See LICENSE.txt for further details.
|
209
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
require 'rubygems/package_task'
|
14
|
+
|
15
|
+
$gemspec = Gem::Specification.load("bio-maf.gemspec")
|
16
|
+
Gem::PackageTask.new($gemspec) { |pkg| }
|
17
|
+
|
18
|
+
require 'rspec/core'
|
19
|
+
require 'rspec/core/rake_task'
|
20
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
21
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'cucumber/rake/task'
|
25
|
+
Cucumber::Rake::Task.new do |features|
|
26
|
+
end
|
27
|
+
|
28
|
+
task :test => [ :spec, :cucumber ]
|
29
|
+
task :default => :test
|
30
|
+
|
31
|
+
#### Man pages
|
32
|
+
# (borrowed from matthewtodd/shoe)
|
33
|
+
ronn_avail = begin
|
34
|
+
require 'ronn'
|
35
|
+
true
|
36
|
+
rescue LoadError
|
37
|
+
false
|
38
|
+
end
|
39
|
+
|
40
|
+
if ronn_avail
|
41
|
+
RONN_FILES = Rake::FileList["man/*.?.ronn"]
|
42
|
+
|
43
|
+
desc "Generate man pages"
|
44
|
+
task :man do
|
45
|
+
file_spec = RONN_FILES.join(' ')
|
46
|
+
sh "ronn --roff --html --style toc --date #{$gemspec.date.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='#{$gemspec.author}' #{file_spec}"
|
47
|
+
end
|
48
|
+
|
49
|
+
namespace :man do
|
50
|
+
desc "Publish man pages to Octopress source dir"
|
51
|
+
task :publish do
|
52
|
+
RONN_FILES.map { |path| path.sub(/\.ronn$/, '.html') }.each do |man|
|
53
|
+
cp man, "../octopress/source/man/#{File.basename(man)}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
task 'man:publish' => :man
|
58
|
+
|
59
|
+
namespace :ronn do
|
60
|
+
task :server do
|
61
|
+
sh "ronn --server #{RONN_FILES.join(' ')}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end # if ronn_avail
|
65
|
+
|
66
|
+
#### RDoc (not currently used)
|
67
|
+
|
68
|
+
require 'rdoc/task'
|
69
|
+
Rake::RDocTask.new do |rdoc|
|
70
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
71
|
+
|
72
|
+
rdoc.rdoc_dir = 'rdoc'
|
73
|
+
rdoc.title = "bio-maf #{version}"
|
74
|
+
rdoc.rdoc_files.include('README*')
|
75
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
76
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
n = 10000000
|
6
|
+
line = 's tupBel1.scaffold_3803.1-85889 33686 61 + 85889 ttcaggaagggggcccaaaacgcttgagtggtcagctctta-ttttgcgtttactggatggg'
|
7
|
+
|
8
|
+
Benchmark.bmbm do |x|
|
9
|
+
x.report("case with strings") do
|
10
|
+
n.times do
|
11
|
+
i = 0
|
12
|
+
case line[0]
|
13
|
+
when 's'
|
14
|
+
i += 1
|
15
|
+
when 'i', 'e', 'q', '#', nil
|
16
|
+
next
|
17
|
+
else
|
18
|
+
raise "foo"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
S = 's'.getbyte(0)
|
23
|
+
I = 'i'.getbyte(0)
|
24
|
+
E = 'e'.getbyte(0)
|
25
|
+
Q = 'q'.getbyte(0)
|
26
|
+
COMMENT = '#'.getbyte(0)
|
27
|
+
x.report("case with bytes") do
|
28
|
+
n.times do
|
29
|
+
i = 0
|
30
|
+
case line.getbyte(0)
|
31
|
+
when S
|
32
|
+
i += 1
|
33
|
+
when I, E, Q, COMMENT, nil
|
34
|
+
next
|
35
|
+
else
|
36
|
+
raise "foo"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
x.report("if/else with bytes") do
|
41
|
+
n.times do
|
42
|
+
i = 0
|
43
|
+
b = line.getbyte(0)
|
44
|
+
if b == S
|
45
|
+
i += 1
|
46
|
+
elsif [I, E, Q, COMMENT, nil].contain?(b)
|
47
|
+
next
|
48
|
+
else
|
49
|
+
raise "foo"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
n = 1000000
|
6
|
+
data = <<EOF
|
7
|
+
a score=28680.000000
|
8
|
+
s hg19.chr22 16050711 61 + 51304566 atctccaagagggcataaaacac-tgagtaaacagctcttttatatgtgtttcctggatgag
|
9
|
+
s panTro2.chrUn 7681110 59 + 58616431 atctccaagagggcataaaacac-tgagtaaacagctctt--atatgtgtttcctggatgag
|
10
|
+
q panTro2.chrUn 99999999999999999999999-9999999999999999--99999999999999999999
|
11
|
+
i panTro2.chrUn C 0 C 0
|
12
|
+
s tarSyr1.scaffold_75923 2859 50 - 8928 atctccaagagggctgaaaatgc-caaatga-----------tcacacgtttcctggacaag
|
13
|
+
q tarSyr1.scaffold_75923 79295966999999999999998-9999799-----------99999999997657759999
|
14
|
+
i tarSyr1.scaffold_75923 N 0 C 0
|
15
|
+
s micMur1.scaffold_22105 5493 59 - 10683 acctccgagagggctcaaaacgc-cgagtgatcagctctt--atgcgcgtttcctggacgag
|
16
|
+
q micMur1.scaffold_22105 99999999999999999999999-9999999999999999--99999999999999999999
|
17
|
+
i micMur1.scaffold_22105 C 0 C 0
|
18
|
+
s tupBel1.scaffold_3803.1-85889 33686 61 + 85889 ttcaggaagggggcccaaaacgcttgagtggtcagctctta-ttttgcgtttactggatggg
|
19
|
+
q tupBel1.scaffold_3803.1-85889 79648579699867994997775679665662767577569-69987455976776322888
|
20
|
+
i tupBel1.scaffold_3803.1-85889 I 1 C 0
|
21
|
+
s vicPac1.scaffold_12713 6831 55 - 10681 actgccatgggggctcagcgtac-tgaatggttaattact------gtggtccccgaatgag
|
22
|
+
q vicPac1.scaffold_12713 99999999999999999999999-9999999999999999------9999999999999999
|
23
|
+
EOF
|
24
|
+
|
25
|
+
Benchmark.bmbm do |x|
|
26
|
+
x.report("split/each") do
|
27
|
+
n.times do
|
28
|
+
i = 0
|
29
|
+
data.split("\n").each do |line|
|
30
|
+
i += line.size
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
x.report("until/shift") do
|
35
|
+
n.times do
|
36
|
+
i = 0
|
37
|
+
lines = data.split("\n")
|
38
|
+
until lines.empty?
|
39
|
+
line = lines.shift
|
40
|
+
i += line.size
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
FILE = '/Users/csw/maf/chr22.maf'
|
6
|
+
|
7
|
+
Benchmark.bm do |x|
|
8
|
+
x.report("8k") do
|
9
|
+
File.open(FILE) do |f|
|
10
|
+
while true
|
11
|
+
r = f.read(8192)
|
12
|
+
break unless r
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
x.report("128k") do
|
17
|
+
File.open(FILE) do |f|
|
18
|
+
while true
|
19
|
+
r = f.read(128 * 1024)
|
20
|
+
break unless r
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
x.report("1M") do
|
25
|
+
File.open(FILE) do |f|
|
26
|
+
while true
|
27
|
+
r = f.read(1024 * 1024)
|
28
|
+
break unless r
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
x.report("8M") do
|
33
|
+
File.open(FILE) do |f|
|
34
|
+
while true
|
35
|
+
r = f.read(8 * 1024 * 1024)
|
36
|
+
break unless r
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
class Thing
|
6
|
+
attr_reader :part
|
7
|
+
|
8
|
+
def initialize(part)
|
9
|
+
@part = part
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
prng = Random.new
|
14
|
+
v_max = 1 << 31
|
15
|
+
ary = []
|
16
|
+
1000.times do
|
17
|
+
ary << Thing.new(rand(v_max))
|
18
|
+
end
|
19
|
+
|
20
|
+
Benchmark.bmbm do |x|
|
21
|
+
x.report("sort!") do
|
22
|
+
1000.times do
|
23
|
+
ary2 = ary.dup
|
24
|
+
ary2.sort! { |a, b| a.part <=> b.part }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
x.report("sort_by!") do
|
28
|
+
1000.times do
|
29
|
+
ary2 = ary.dup
|
30
|
+
ary2.sort_by! { |i| i.part }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
n = 2000000
|
6
|
+
line = 's tupBel1.scaffold_3803.1-85889 33686 61 + 85889 ttcaggaagggggcccaaaacgcttgagtggtcagctctta-ttttgcgtttactggatggg'
|
7
|
+
|
8
|
+
Benchmark.bmbm do |x|
|
9
|
+
x.report("basic String#split") do
|
10
|
+
n.times do
|
11
|
+
parts = line.split
|
12
|
+
end
|
13
|
+
end
|
14
|
+
x.report("regex split") do
|
15
|
+
n.times do
|
16
|
+
parts = line.split(/\s+/)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
x.report("regex fields") do
|
20
|
+
n.times do
|
21
|
+
if m = /^s\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+(\d+)\s+(\S+)/.match(line)
|
22
|
+
parts = m.captures
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
x.report("regex fields") do
|
27
|
+
n.times do
|
28
|
+
if m = /^s\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+(\d+)\s+(\S+)/.match(line)
|
29
|
+
parts = m.captures
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|