bio-maf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio-maf.rb +12 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +181 -0
data/bin/maf_count
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
options.reader = Bio::MAF::ChunkReader
|
11
|
+
|
12
|
+
PRINTERS = {
|
13
|
+
'flat' => :FlatPrinter,
|
14
|
+
'stack' => :CallStackPrinter
|
15
|
+
}
|
16
|
+
|
17
|
+
OptionParser.new do |opts|
|
18
|
+
opts.banner = "Usage: maf_count [options] <maf>"
|
19
|
+
opts.separator ""
|
20
|
+
opts.separator "Options:"
|
21
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
22
|
+
options.prof = prof
|
23
|
+
end
|
24
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
25
|
+
if pspec =~ /(\w+):(.+)/
|
26
|
+
require 'ruby-prof'
|
27
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
28
|
+
options.ruby_prof_path = $2
|
29
|
+
else
|
30
|
+
options.ruby_prof_printer = RubyProf::FlatPrinter
|
31
|
+
options.ruby_prof_path = pspec
|
32
|
+
end
|
33
|
+
end
|
34
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
35
|
+
options.profile_gc = true
|
36
|
+
end
|
37
|
+
opts.on("--parser PARSER", "parser") do |name|
|
38
|
+
options.parser = Bio::MAF.const_get(name)
|
39
|
+
end
|
40
|
+
opts.on("-t", "--threaded") do
|
41
|
+
options.reader = Bio::MAF::ThreadedChunkReader
|
42
|
+
end
|
43
|
+
end.parse!(ARGV)
|
44
|
+
|
45
|
+
src_path = ARGV.shift
|
46
|
+
|
47
|
+
if options.prof
|
48
|
+
require 'perftools'
|
49
|
+
PerfTools::CpuProfiler.start(options.prof)
|
50
|
+
elsif options.ruby_prof_path
|
51
|
+
require 'ruby-prof'
|
52
|
+
RubyProf.start
|
53
|
+
end
|
54
|
+
|
55
|
+
if options.profile_gc
|
56
|
+
GC::Profiler.enable
|
57
|
+
end
|
58
|
+
|
59
|
+
parser = options.parser.new(src_path,
|
60
|
+
:chunk_reader => options.reader,
|
61
|
+
:parse_extended => false)
|
62
|
+
|
63
|
+
n = 0
|
64
|
+
parser.each_block do |block|
|
65
|
+
n += 1
|
66
|
+
end
|
67
|
+
puts "Parsed #{n} MAF alignment blocks."
|
68
|
+
|
69
|
+
if options.profile_gc
|
70
|
+
$stderr.puts GC::Profiler.result
|
71
|
+
GC::Profiler.disable
|
72
|
+
end
|
73
|
+
|
74
|
+
if options.prof
|
75
|
+
PerfTools::CpuProfiler.stop
|
76
|
+
elsif options.ruby_prof_path
|
77
|
+
res = RubyProf.stop
|
78
|
+
printer = options.ruby_prof_printer.new(res)
|
79
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
80
|
+
printer.print(f)
|
81
|
+
end
|
82
|
+
end
|
data/bin/maf_dump_blocks
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: maf_dump_blocks [options] <maf>"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("--parser PARSER", "parser") do |name|
|
16
|
+
options.parser = Bio::MAF.const_get(name)
|
17
|
+
end
|
18
|
+
end.parse!(ARGV)
|
19
|
+
|
20
|
+
src_path = ARGV.shift
|
21
|
+
|
22
|
+
parser = options.parser.new(src_path)
|
23
|
+
|
24
|
+
parser.each_block do |block|
|
25
|
+
$stdout.printf("%12d\t%7d\n", block.offset, block.size)
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
require 'bio-maf'
|
7
|
+
require 'bio-genomic-interval'
|
8
|
+
|
9
|
+
options = OpenStruct.new
|
10
|
+
options.p = { :threads => 1 }
|
11
|
+
options.passes = 1
|
12
|
+
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = "Usage: maf_extract_ranges_count [options] <maf> <index>"
|
15
|
+
opts.separator ""
|
16
|
+
opts.separator "Options:"
|
17
|
+
opts.on("-t", "--threads N", "Parser threads") do |n|
|
18
|
+
options.p[:threads] = n.to_i
|
19
|
+
end
|
20
|
+
opts.on("-p", "--passes N", "Number of passes") do |n|
|
21
|
+
options.passes = n.to_i
|
22
|
+
end
|
23
|
+
end.parse!(ARGV)
|
24
|
+
|
25
|
+
maf_p = ARGV.shift
|
26
|
+
index_p = ARGV.shift
|
27
|
+
|
28
|
+
parser = Bio::MAF::Parser.new(maf_p, options.p)
|
29
|
+
index = Bio::MAF::KyotoIndex.open(index_p)
|
30
|
+
|
31
|
+
def parse_interval(line)
|
32
|
+
src, r_start_s, r_end_s, _ = line.split(nil, 4)
|
33
|
+
r_start = r_start_s.to_i
|
34
|
+
r_end = r_end_s.to_i
|
35
|
+
return Bio::GenomicInterval.zero_based(src, r_start, r_end)
|
36
|
+
end
|
37
|
+
|
38
|
+
intervals = []
|
39
|
+
$stdin.each_line { |line| intervals << parse_interval(line) }
|
40
|
+
|
41
|
+
options.passes.times do
|
42
|
+
blocks = index.find(intervals, parser)
|
43
|
+
puts "TOTAL: #{blocks.count} blocks parsed."
|
44
|
+
end
|
data/bin/maf_index
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'bio-maf'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
PRINTERS = {
|
9
|
+
'flat' => :FlatPrinter,
|
10
|
+
'stack' => :CallStackPrinter,
|
11
|
+
'graph' => :GraphHtmlPrinter
|
12
|
+
}
|
13
|
+
|
14
|
+
$options = OpenStruct.new
|
15
|
+
$options.mode = :build
|
16
|
+
$options.reader = Bio::MAF::ChunkReader
|
17
|
+
|
18
|
+
def build_index(maf, index)
|
19
|
+
parser = Bio::MAF::Parser.new(maf,
|
20
|
+
:chunk_reader => $options.reader,
|
21
|
+
:parse_extended => false)
|
22
|
+
idx = Bio::MAF::KyotoIndex.build(parser, index)
|
23
|
+
idx.close
|
24
|
+
end
|
25
|
+
|
26
|
+
op = OptionParser.new do |opts|
|
27
|
+
opts.banner = "Usage: maf_index [options] <maf> <index>"
|
28
|
+
#opts.separator ""
|
29
|
+
#opts.separator "Options:"
|
30
|
+
opts.on("--time", "print elapsed time") do
|
31
|
+
$options.bench = true
|
32
|
+
end
|
33
|
+
opts.on("-d", "--dump") do
|
34
|
+
$options.mode = :dump
|
35
|
+
end
|
36
|
+
opts.on("-t", "--threaded") do
|
37
|
+
$options.reader = Bio::MAF::ThreadedChunkReader
|
38
|
+
end
|
39
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
40
|
+
require 'ruby-prof'
|
41
|
+
if pspec =~ /(\w+):(.+)/
|
42
|
+
$options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
43
|
+
$options.ruby_prof_path = $2
|
44
|
+
else
|
45
|
+
$options.ruby_prof_printer = Ruby_Prof::FlatPrinter
|
46
|
+
$options.ruby_prof_path = pspec
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
op.parse!(ARGV)
|
52
|
+
|
53
|
+
maf_p = ARGV.shift if $options.mode == :build
|
54
|
+
index_p = ARGV.shift
|
55
|
+
|
56
|
+
unless (maf_p || $options.mode == :dump) && index_p
|
57
|
+
$stderr.puts op
|
58
|
+
exit 1
|
59
|
+
end
|
60
|
+
|
61
|
+
if $options.ruby_prof_path
|
62
|
+
RubyProf.start
|
63
|
+
end
|
64
|
+
|
65
|
+
case $options.mode
|
66
|
+
when :build
|
67
|
+
if ! $options.bench
|
68
|
+
build_index(maf_p, index_p)
|
69
|
+
else
|
70
|
+
bm_res = Benchmark.measure do
|
71
|
+
build_index(maf_p, index_p)
|
72
|
+
end
|
73
|
+
puts bm_res
|
74
|
+
end
|
75
|
+
when :dump
|
76
|
+
idx = Bio::MAF::KyotoIndex.open(index_p)
|
77
|
+
idx.dump
|
78
|
+
else
|
79
|
+
raise "Unsupported mode: #{$options.mode}"
|
80
|
+
end
|
81
|
+
|
82
|
+
if $options.ruby_prof_path
|
83
|
+
res = RubyProf.stop
|
84
|
+
printer = $options.ruby_prof_printer.new(res)
|
85
|
+
File.open($options.ruby_prof_path, 'w') do |f|
|
86
|
+
printer.print(f)
|
87
|
+
end
|
88
|
+
end
|
data/bin/maf_parse_bench
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'bio-maf'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
options.runs = 100_000
|
11
|
+
options.warmup = false
|
12
|
+
|
13
|
+
PRINTERS = {
|
14
|
+
'flat' => :FlatPrinter,
|
15
|
+
'stack' => :CallStackPrinter
|
16
|
+
}
|
17
|
+
|
18
|
+
OptionParser.new do |opts|
|
19
|
+
opts.banner = "Usage: maf_parse_bench [options] <maf>"
|
20
|
+
opts.separator ""
|
21
|
+
opts.separator "Options:"
|
22
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
23
|
+
options.prof = prof
|
24
|
+
end
|
25
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
26
|
+
if pspec =~ /(\w+):(.+)/
|
27
|
+
require 'ruby-prof'
|
28
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
29
|
+
options.ruby_prof_path = $2
|
30
|
+
else
|
31
|
+
options.ruby_prof_printer = :FlatPrinter
|
32
|
+
options.ruby_prof_path = pspec
|
33
|
+
end
|
34
|
+
end
|
35
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
36
|
+
options.profile_gc = true
|
37
|
+
end
|
38
|
+
opts.on("--parser PARSER", "parser") do |name|
|
39
|
+
options.parser = Bio::MAF.const_get(name)
|
40
|
+
end
|
41
|
+
opts.on("-w", "--warmup", "perform warmup run") do
|
42
|
+
options.warmup = true
|
43
|
+
end
|
44
|
+
end.parse!(ARGV)
|
45
|
+
|
46
|
+
src_path = ARGV.shift
|
47
|
+
|
48
|
+
if options.prof
|
49
|
+
require 'perftools'
|
50
|
+
PerfTools::CpuProfiler.start(options.prof)
|
51
|
+
elsif options.ruby_prof_path
|
52
|
+
require 'ruby-prof'
|
53
|
+
RubyProf.start
|
54
|
+
end
|
55
|
+
|
56
|
+
if options.profile_gc
|
57
|
+
GC::Profiler.enable
|
58
|
+
end
|
59
|
+
|
60
|
+
parser = options.parser.new(src_path)
|
61
|
+
parser.parse_block
|
62
|
+
parser.parse_block
|
63
|
+
pos = parser.s.pos
|
64
|
+
|
65
|
+
if options.warmup
|
66
|
+
options.runs.times do
|
67
|
+
parser.parse_block
|
68
|
+
parser.s.pos = pos
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
bm_res = Benchmark.measure do
|
73
|
+
options.runs.times do
|
74
|
+
parser.parse_block
|
75
|
+
parser.s.pos = pos
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if options.profile_gc
|
80
|
+
$stderr.puts GC::Profiler.result
|
81
|
+
GC::Profiler.disable
|
82
|
+
end
|
83
|
+
|
84
|
+
if options.prof
|
85
|
+
PerfTools::CpuProfiler.stop
|
86
|
+
elsif options.ruby_prof_path
|
87
|
+
res = RubyProf.stop
|
88
|
+
printer = options.ruby_prof_printer.new(res)
|
89
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
90
|
+
printer.print(f)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
puts bm_res / options.runs
|
data/bin/maf_to_fasta
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
16
|
+
options.prof = prof
|
17
|
+
end
|
18
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
|
19
|
+
options.ruby_prof = path
|
20
|
+
end
|
21
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
22
|
+
options.profile_gc = true
|
23
|
+
end
|
24
|
+
opts.on("--parser PARSER", "parser") do |name|
|
25
|
+
options.parser = Bio::MAF.const_get(name)
|
26
|
+
end
|
27
|
+
end.parse!(ARGV)
|
28
|
+
|
29
|
+
src_path = ARGV.shift
|
30
|
+
dst_path = ARGV.shift
|
31
|
+
|
32
|
+
if options.prof
|
33
|
+
require 'perftools'
|
34
|
+
PerfTools::CpuProfiler.start(options.prof)
|
35
|
+
elsif options.ruby_prof
|
36
|
+
require 'ruby-prof'
|
37
|
+
RubyProf.start
|
38
|
+
end
|
39
|
+
|
40
|
+
if options.profile_gc
|
41
|
+
GC::Profiler.enable
|
42
|
+
end
|
43
|
+
|
44
|
+
parser = options.parser.new(src_path)
|
45
|
+
writer = FastaWriter.new(dst_path)
|
46
|
+
|
47
|
+
parser.each_block do |block|
|
48
|
+
block.each_raw_seq do |seq|
|
49
|
+
seq.write_fasta(writer)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
writer.close
|
54
|
+
|
55
|
+
if options.profile_gc
|
56
|
+
$stderr.puts GC::Profiler.result
|
57
|
+
GC::Profiler.disable
|
58
|
+
end
|
59
|
+
|
60
|
+
if options.prof
|
61
|
+
PerfTools::CpuProfiler.stop
|
62
|
+
elsif options.ruby_prof
|
63
|
+
res = RubyProf.stop
|
64
|
+
printer = RubyProf::FlatPrinter.new(res)
|
65
|
+
File.open(options.ruby_prof, 'w') do |f|
|
66
|
+
printer.print(f)
|
67
|
+
end
|
68
|
+
end
|
data/bin/maf_write
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
options = OpenStruct.new
|
8
|
+
options.parser = Bio::MAF::Parser
|
9
|
+
options.opts = {
|
10
|
+
:chunk_reader => Bio::MAF::ChunkReader,
|
11
|
+
:parse_extended => false
|
12
|
+
}
|
13
|
+
|
14
|
+
PRINTERS = {
|
15
|
+
'flat' => :FlatPrinter,
|
16
|
+
'stack' => :CallStackPrinter
|
17
|
+
}
|
18
|
+
|
19
|
+
OptionParser.new do |opts|
|
20
|
+
opts.banner = "Usage: maf_write [options] <maf>"
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Options:"
|
23
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
24
|
+
options.prof = prof
|
25
|
+
end
|
26
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
27
|
+
if pspec =~ /(\w+):(.+)/
|
28
|
+
require 'ruby-prof'
|
29
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
30
|
+
options.ruby_prof_path = $2
|
31
|
+
else
|
32
|
+
options.ruby_prof_printer = RubyProf::FlatPrinter
|
33
|
+
options.ruby_prof_path = pspec
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
37
|
+
options.profile_gc = true
|
38
|
+
end
|
39
|
+
opts.on("--parser PARSER", "parser") do |name|
|
40
|
+
options.parser = Bio::MAF.const_get(name)
|
41
|
+
end
|
42
|
+
opts.on("-t", "--threaded") do
|
43
|
+
options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
|
44
|
+
options.opts[:threads] = 1
|
45
|
+
end
|
46
|
+
opts.on("-e", "--extended") do
|
47
|
+
options.opts[:parse_extended] = true
|
48
|
+
options.opts[:parse_empty] = true
|
49
|
+
end
|
50
|
+
end.parse!(ARGV)
|
51
|
+
|
52
|
+
src_path = ARGV.shift
|
53
|
+
|
54
|
+
if options.prof
|
55
|
+
require 'perftools'
|
56
|
+
PerfTools::CpuProfiler.start(options.prof)
|
57
|
+
elsif options.ruby_prof_path
|
58
|
+
require 'ruby-prof'
|
59
|
+
RubyProf.start
|
60
|
+
end
|
61
|
+
|
62
|
+
if options.profile_gc
|
63
|
+
GC::Profiler.enable
|
64
|
+
end
|
65
|
+
|
66
|
+
parser = options.parser.new(src_path, options.opts)
|
67
|
+
writer = Bio::MAF::Writer.new($stdout)
|
68
|
+
writer.write_header(parser.header)
|
69
|
+
writer.write_blocks(parser.parse_blocks)
|
70
|
+
|
71
|
+
if options.profile_gc
|
72
|
+
$stderr.puts GC::Profiler.result
|
73
|
+
GC::Profiler.disable
|
74
|
+
end
|
75
|
+
|
76
|
+
if options.prof
|
77
|
+
PerfTools::CpuProfiler.stop
|
78
|
+
elsif options.ruby_prof_path
|
79
|
+
res = RubyProf.stop
|
80
|
+
printer = options.ruby_prof_printer.new(res)
|
81
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
82
|
+
printer.print(f)
|
83
|
+
end
|
84
|
+
end
|
data/bin/random_ranges
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
options = OpenStruct.new
|
7
|
+
|
8
|
+
op = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: random_ranges [options]"
|
10
|
+
opts.on("-r", "--range START:END", "range") do |range|
|
11
|
+
s, e = range.split(':')
|
12
|
+
options.start = s.to_i
|
13
|
+
options.end = e.to_i
|
14
|
+
end
|
15
|
+
opts.on("-l", "--length LEN", "block length") do |len|
|
16
|
+
options.length = len.to_i
|
17
|
+
end
|
18
|
+
opts.on("-n", "--number NUM", "number of blocks") do |num|
|
19
|
+
options.num = num.to_i
|
20
|
+
end
|
21
|
+
opts.on("-s", "--sequence SEQ", "sequence") do |seq|
|
22
|
+
options.seq = seq
|
23
|
+
end
|
24
|
+
end.parse!(ARGV)
|
25
|
+
|
26
|
+
rand = Random.new
|
27
|
+
range = options.end - options.start
|
28
|
+
block_range = range / options.num
|
29
|
+
block_start_range = block_range - options.length
|
30
|
+
(0...options.num).each do |n|
|
31
|
+
block_offset = rand.rand(block_start_range)
|
32
|
+
b_start = options.start + (block_range * n) + block_offset
|
33
|
+
b_end = b_start + options.length
|
34
|
+
puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
|
35
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
@milestone_2
|
2
|
+
Feature: Indexed access to MAF files
|
3
|
+
In order to extract alignment blocks from MAF files
|
4
|
+
By chromosomal ranges matching a source sequence
|
5
|
+
I want to have a way to build indexes on MAF files
|
6
|
+
And use indexes to efficiently find alignment blocks
|
7
|
+
Because linear searches of a 200 GB file are impractical
|
8
|
+
|
9
|
+
Scenario: Index a MAF file
|
10
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
11
|
+
When I open it with a MAF reader
|
12
|
+
And build an index on the reference sequence
|
13
|
+
Then the index has at least 8 entries
|
14
|
+
|
15
|
+
Scenario: Extract alignment blocks by chromosomal range
|
16
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
17
|
+
When I open it with a MAF reader
|
18
|
+
And build an index on the reference sequence
|
19
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
20
|
+
Then 2 blocks are obtained
|
21
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
22
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
23
|
+
|
24
|
+
Scenario: Extract alignment blocks by chromosomal range from index file
|
25
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
26
|
+
And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
|
27
|
+
When I open it with a MAF reader
|
28
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
29
|
+
Then 2 blocks are obtained
|
30
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
31
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Feature: MAF output
|
2
|
+
In order to output modified MAF files or subsets of them
|
3
|
+
I want to be able to write out parsed MAF data
|
4
|
+
|
5
|
+
Scenario: Reproduce simple test data
|
6
|
+
Given a MAF source file "mm8_single.maf"
|
7
|
+
When I open it with a MAF reader
|
8
|
+
And open a new MAF writer
|
9
|
+
And write the header from the original MAF file
|
10
|
+
And write all the parsed blocks
|
11
|
+
Then the output should match, except whitespace, "mm8_single.maf"
|
12
|
+
|
13
|
+
Scenario: Reproduce longer test data
|
14
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
15
|
+
When I open it with a MAF reader
|
16
|
+
And open a new MAF writer
|
17
|
+
And write the header from the original MAF file
|
18
|
+
And write all the parsed blocks
|
19
|
+
Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
|
20
|
+
|
21
|
+
Scenario: Reproduce test data with i, e, q lines
|
22
|
+
Given a MAF source file "chr22_ieq.maf"
|
23
|
+
When I enable the :parse_extended parser option
|
24
|
+
And I enable the :parse_empty parser option
|
25
|
+
And I open it with a MAF reader
|
26
|
+
And open a new MAF writer
|
27
|
+
And write the header from the original MAF file
|
28
|
+
And write all the parsed blocks
|
29
|
+
Then the output should match, except whitespace, "chr22_ieq.maf"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
Feature: Parse MAF files
|
2
|
+
In order to extract information from a MAF file
|
3
|
+
I want to read it and pull out information
|
4
|
+
|
5
|
+
Scenario: Read MAF header
|
6
|
+
Given MAF data:
|
7
|
+
"""
|
8
|
+
##maf version=1 scoring=humor.v4
|
9
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
|
10
|
+
|
11
|
+
a score=0.128
|
12
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
13
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
14
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
15
|
+
"""
|
16
|
+
When I open it with a MAF reader
|
17
|
+
Then the MAF version should be "1"
|
18
|
+
And the scoring scheme should be "humor.v4"
|
19
|
+
# third line a continuation
|
20
|
+
And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf"
|
21
|
+
|
22
|
+
Scenario: Read alignment block
|
23
|
+
Given MAF data:
|
24
|
+
"""
|
25
|
+
##maf version=1 scoring=humor.v4
|
26
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
27
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
28
|
+
|
29
|
+
a score=0.128
|
30
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
31
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
32
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
33
|
+
"""
|
34
|
+
When I open it with a MAF reader
|
35
|
+
Then an alignment block can be obtained
|
36
|
+
And the alignment block has 3 sequences
|
37
|
+
And sequence 0 has source "human_hoxa"
|
38
|
+
And sequence 0 has start 100
|
39
|
+
And sequence 0 has size 8
|
40
|
+
And sequence 0 has strand :+
|
41
|
+
And sequence 0 has source size 100257
|
42
|
+
And sequence 0 has text "ACA-TTACT"
|
43
|
+
And sequence 1 has strand :-
|
44
|
+
|