bio-maf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio-maf.rb +12 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +181 -0
data/bin/maf_count
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
options.reader = Bio::MAF::ChunkReader
|
11
|
+
|
12
|
+
PRINTERS = {
|
13
|
+
'flat' => :FlatPrinter,
|
14
|
+
'stack' => :CallStackPrinter
|
15
|
+
}
|
16
|
+
|
17
|
+
OptionParser.new do |opts|
|
18
|
+
opts.banner = "Usage: maf_count [options] <maf>"
|
19
|
+
opts.separator ""
|
20
|
+
opts.separator "Options:"
|
21
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
22
|
+
options.prof = prof
|
23
|
+
end
|
24
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
25
|
+
if pspec =~ /(\w+):(.+)/
|
26
|
+
require 'ruby-prof'
|
27
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
28
|
+
options.ruby_prof_path = $2
|
29
|
+
else
|
30
|
+
options.ruby_prof_printer = RubyProf::FlatPrinter
|
31
|
+
options.ruby_prof_path = pspec
|
32
|
+
end
|
33
|
+
end
|
34
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
35
|
+
options.profile_gc = true
|
36
|
+
end
|
37
|
+
opts.on("--parser PARSER", "parser") do |name|
|
38
|
+
options.parser = Bio::MAF.const_get(name)
|
39
|
+
end
|
40
|
+
opts.on("-t", "--threaded") do
|
41
|
+
options.reader = Bio::MAF::ThreadedChunkReader
|
42
|
+
end
|
43
|
+
end.parse!(ARGV)
|
44
|
+
|
45
|
+
src_path = ARGV.shift
|
46
|
+
|
47
|
+
if options.prof
|
48
|
+
require 'perftools'
|
49
|
+
PerfTools::CpuProfiler.start(options.prof)
|
50
|
+
elsif options.ruby_prof_path
|
51
|
+
require 'ruby-prof'
|
52
|
+
RubyProf.start
|
53
|
+
end
|
54
|
+
|
55
|
+
if options.profile_gc
|
56
|
+
GC::Profiler.enable
|
57
|
+
end
|
58
|
+
|
59
|
+
parser = options.parser.new(src_path,
|
60
|
+
:chunk_reader => options.reader,
|
61
|
+
:parse_extended => false)
|
62
|
+
|
63
|
+
n = 0
|
64
|
+
parser.each_block do |block|
|
65
|
+
n += 1
|
66
|
+
end
|
67
|
+
puts "Parsed #{n} MAF alignment blocks."
|
68
|
+
|
69
|
+
if options.profile_gc
|
70
|
+
$stderr.puts GC::Profiler.result
|
71
|
+
GC::Profiler.disable
|
72
|
+
end
|
73
|
+
|
74
|
+
if options.prof
|
75
|
+
PerfTools::CpuProfiler.stop
|
76
|
+
elsif options.ruby_prof_path
|
77
|
+
res = RubyProf.stop
|
78
|
+
printer = options.ruby_prof_printer.new(res)
|
79
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
80
|
+
printer.print(f)
|
81
|
+
end
|
82
|
+
end
|
data/bin/maf_dump_blocks
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: maf_dump_blocks [options] <maf>"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("--parser PARSER", "parser") do |name|
|
16
|
+
options.parser = Bio::MAF.const_get(name)
|
17
|
+
end
|
18
|
+
end.parse!(ARGV)
|
19
|
+
|
20
|
+
src_path = ARGV.shift
|
21
|
+
|
22
|
+
parser = options.parser.new(src_path)
|
23
|
+
|
24
|
+
parser.each_block do |block|
|
25
|
+
$stdout.printf("%12d\t%7d\n", block.offset, block.size)
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
require 'bio-maf'
|
7
|
+
require 'bio-genomic-interval'
|
8
|
+
|
9
|
+
options = OpenStruct.new
|
10
|
+
options.p = { :threads => 1 }
|
11
|
+
options.passes = 1
|
12
|
+
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = "Usage: maf_extract_ranges_count [options] <maf> <index>"
|
15
|
+
opts.separator ""
|
16
|
+
opts.separator "Options:"
|
17
|
+
opts.on("-t", "--threads N", "Parser threads") do |n|
|
18
|
+
options.p[:threads] = n.to_i
|
19
|
+
end
|
20
|
+
opts.on("-p", "--passes N", "Number of passes") do |n|
|
21
|
+
options.passes = n.to_i
|
22
|
+
end
|
23
|
+
end.parse!(ARGV)
|
24
|
+
|
25
|
+
maf_p = ARGV.shift
|
26
|
+
index_p = ARGV.shift
|
27
|
+
|
28
|
+
parser = Bio::MAF::Parser.new(maf_p, options.p)
|
29
|
+
index = Bio::MAF::KyotoIndex.open(index_p)
|
30
|
+
|
31
|
+
def parse_interval(line)
|
32
|
+
src, r_start_s, r_end_s, _ = line.split(nil, 4)
|
33
|
+
r_start = r_start_s.to_i
|
34
|
+
r_end = r_end_s.to_i
|
35
|
+
return Bio::GenomicInterval.zero_based(src, r_start, r_end)
|
36
|
+
end
|
37
|
+
|
38
|
+
intervals = []
|
39
|
+
$stdin.each_line { |line| intervals << parse_interval(line) }
|
40
|
+
|
41
|
+
options.passes.times do
|
42
|
+
blocks = index.find(intervals, parser)
|
43
|
+
puts "TOTAL: #{blocks.count} blocks parsed."
|
44
|
+
end
|
data/bin/maf_index
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'bio-maf'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
PRINTERS = {
|
9
|
+
'flat' => :FlatPrinter,
|
10
|
+
'stack' => :CallStackPrinter,
|
11
|
+
'graph' => :GraphHtmlPrinter
|
12
|
+
}
|
13
|
+
|
14
|
+
$options = OpenStruct.new
|
15
|
+
$options.mode = :build
|
16
|
+
$options.reader = Bio::MAF::ChunkReader
|
17
|
+
|
18
|
+
def build_index(maf, index)
|
19
|
+
parser = Bio::MAF::Parser.new(maf,
|
20
|
+
:chunk_reader => $options.reader,
|
21
|
+
:parse_extended => false)
|
22
|
+
idx = Bio::MAF::KyotoIndex.build(parser, index)
|
23
|
+
idx.close
|
24
|
+
end
|
25
|
+
|
26
|
+
op = OptionParser.new do |opts|
|
27
|
+
opts.banner = "Usage: maf_index [options] <maf> <index>"
|
28
|
+
#opts.separator ""
|
29
|
+
#opts.separator "Options:"
|
30
|
+
opts.on("--time", "print elapsed time") do
|
31
|
+
$options.bench = true
|
32
|
+
end
|
33
|
+
opts.on("-d", "--dump") do
|
34
|
+
$options.mode = :dump
|
35
|
+
end
|
36
|
+
opts.on("-t", "--threaded") do
|
37
|
+
$options.reader = Bio::MAF::ThreadedChunkReader
|
38
|
+
end
|
39
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
40
|
+
require 'ruby-prof'
|
41
|
+
if pspec =~ /(\w+):(.+)/
|
42
|
+
$options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
43
|
+
$options.ruby_prof_path = $2
|
44
|
+
else
|
45
|
+
$options.ruby_prof_printer = Ruby_Prof::FlatPrinter
|
46
|
+
$options.ruby_prof_path = pspec
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
op.parse!(ARGV)
|
52
|
+
|
53
|
+
maf_p = ARGV.shift if $options.mode == :build
|
54
|
+
index_p = ARGV.shift
|
55
|
+
|
56
|
+
unless (maf_p || $options.mode == :dump) && index_p
|
57
|
+
$stderr.puts op
|
58
|
+
exit 1
|
59
|
+
end
|
60
|
+
|
61
|
+
if $options.ruby_prof_path
|
62
|
+
RubyProf.start
|
63
|
+
end
|
64
|
+
|
65
|
+
case $options.mode
|
66
|
+
when :build
|
67
|
+
if ! $options.bench
|
68
|
+
build_index(maf_p, index_p)
|
69
|
+
else
|
70
|
+
bm_res = Benchmark.measure do
|
71
|
+
build_index(maf_p, index_p)
|
72
|
+
end
|
73
|
+
puts bm_res
|
74
|
+
end
|
75
|
+
when :dump
|
76
|
+
idx = Bio::MAF::KyotoIndex.open(index_p)
|
77
|
+
idx.dump
|
78
|
+
else
|
79
|
+
raise "Unsupported mode: #{$options.mode}"
|
80
|
+
end
|
81
|
+
|
82
|
+
if $options.ruby_prof_path
|
83
|
+
res = RubyProf.stop
|
84
|
+
printer = $options.ruby_prof_printer.new(res)
|
85
|
+
File.open($options.ruby_prof_path, 'w') do |f|
|
86
|
+
printer.print(f)
|
87
|
+
end
|
88
|
+
end
|
data/bin/maf_parse_bench
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'bio-maf'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
options.runs = 100_000
|
11
|
+
options.warmup = false
|
12
|
+
|
13
|
+
PRINTERS = {
|
14
|
+
'flat' => :FlatPrinter,
|
15
|
+
'stack' => :CallStackPrinter
|
16
|
+
}
|
17
|
+
|
18
|
+
OptionParser.new do |opts|
|
19
|
+
opts.banner = "Usage: maf_parse_bench [options] <maf>"
|
20
|
+
opts.separator ""
|
21
|
+
opts.separator "Options:"
|
22
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
23
|
+
options.prof = prof
|
24
|
+
end
|
25
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
26
|
+
if pspec =~ /(\w+):(.+)/
|
27
|
+
require 'ruby-prof'
|
28
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
29
|
+
options.ruby_prof_path = $2
|
30
|
+
else
|
31
|
+
options.ruby_prof_printer = :FlatPrinter
|
32
|
+
options.ruby_prof_path = pspec
|
33
|
+
end
|
34
|
+
end
|
35
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
36
|
+
options.profile_gc = true
|
37
|
+
end
|
38
|
+
opts.on("--parser PARSER", "parser") do |name|
|
39
|
+
options.parser = Bio::MAF.const_get(name)
|
40
|
+
end
|
41
|
+
opts.on("-w", "--warmup", "perform warmup run") do
|
42
|
+
options.warmup = true
|
43
|
+
end
|
44
|
+
end.parse!(ARGV)
|
45
|
+
|
46
|
+
src_path = ARGV.shift
|
47
|
+
|
48
|
+
if options.prof
|
49
|
+
require 'perftools'
|
50
|
+
PerfTools::CpuProfiler.start(options.prof)
|
51
|
+
elsif options.ruby_prof_path
|
52
|
+
require 'ruby-prof'
|
53
|
+
RubyProf.start
|
54
|
+
end
|
55
|
+
|
56
|
+
if options.profile_gc
|
57
|
+
GC::Profiler.enable
|
58
|
+
end
|
59
|
+
|
60
|
+
parser = options.parser.new(src_path)
|
61
|
+
parser.parse_block
|
62
|
+
parser.parse_block
|
63
|
+
pos = parser.s.pos
|
64
|
+
|
65
|
+
if options.warmup
|
66
|
+
options.runs.times do
|
67
|
+
parser.parse_block
|
68
|
+
parser.s.pos = pos
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
bm_res = Benchmark.measure do
|
73
|
+
options.runs.times do
|
74
|
+
parser.parse_block
|
75
|
+
parser.s.pos = pos
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if options.profile_gc
|
80
|
+
$stderr.puts GC::Profiler.result
|
81
|
+
GC::Profiler.disable
|
82
|
+
end
|
83
|
+
|
84
|
+
if options.prof
|
85
|
+
PerfTools::CpuProfiler.stop
|
86
|
+
elsif options.ruby_prof_path
|
87
|
+
res = RubyProf.stop
|
88
|
+
printer = options.ruby_prof_printer.new(res)
|
89
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
90
|
+
printer.print(f)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
puts bm_res / options.runs
|
data/bin/maf_to_fasta
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
16
|
+
options.prof = prof
|
17
|
+
end
|
18
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
|
19
|
+
options.ruby_prof = path
|
20
|
+
end
|
21
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
22
|
+
options.profile_gc = true
|
23
|
+
end
|
24
|
+
opts.on("--parser PARSER", "parser") do |name|
|
25
|
+
options.parser = Bio::MAF.const_get(name)
|
26
|
+
end
|
27
|
+
end.parse!(ARGV)
|
28
|
+
|
29
|
+
src_path = ARGV.shift
|
30
|
+
dst_path = ARGV.shift
|
31
|
+
|
32
|
+
if options.prof
|
33
|
+
require 'perftools'
|
34
|
+
PerfTools::CpuProfiler.start(options.prof)
|
35
|
+
elsif options.ruby_prof
|
36
|
+
require 'ruby-prof'
|
37
|
+
RubyProf.start
|
38
|
+
end
|
39
|
+
|
40
|
+
if options.profile_gc
|
41
|
+
GC::Profiler.enable
|
42
|
+
end
|
43
|
+
|
44
|
+
parser = options.parser.new(src_path)
|
45
|
+
writer = FastaWriter.new(dst_path)
|
46
|
+
|
47
|
+
parser.each_block do |block|
|
48
|
+
block.each_raw_seq do |seq|
|
49
|
+
seq.write_fasta(writer)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
writer.close
|
54
|
+
|
55
|
+
if options.profile_gc
|
56
|
+
$stderr.puts GC::Profiler.result
|
57
|
+
GC::Profiler.disable
|
58
|
+
end
|
59
|
+
|
60
|
+
if options.prof
|
61
|
+
PerfTools::CpuProfiler.stop
|
62
|
+
elsif options.ruby_prof
|
63
|
+
res = RubyProf.stop
|
64
|
+
printer = RubyProf::FlatPrinter.new(res)
|
65
|
+
File.open(options.ruby_prof, 'w') do |f|
|
66
|
+
printer.print(f)
|
67
|
+
end
|
68
|
+
end
|
data/bin/maf_write
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
options = OpenStruct.new
|
8
|
+
options.parser = Bio::MAF::Parser
|
9
|
+
options.opts = {
|
10
|
+
:chunk_reader => Bio::MAF::ChunkReader,
|
11
|
+
:parse_extended => false
|
12
|
+
}
|
13
|
+
|
14
|
+
PRINTERS = {
|
15
|
+
'flat' => :FlatPrinter,
|
16
|
+
'stack' => :CallStackPrinter
|
17
|
+
}
|
18
|
+
|
19
|
+
OptionParser.new do |opts|
|
20
|
+
opts.banner = "Usage: maf_write [options] <maf>"
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Options:"
|
23
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
24
|
+
options.prof = prof
|
25
|
+
end
|
26
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
27
|
+
if pspec =~ /(\w+):(.+)/
|
28
|
+
require 'ruby-prof'
|
29
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
30
|
+
options.ruby_prof_path = $2
|
31
|
+
else
|
32
|
+
options.ruby_prof_printer = RubyProf::FlatPrinter
|
33
|
+
options.ruby_prof_path = pspec
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
37
|
+
options.profile_gc = true
|
38
|
+
end
|
39
|
+
opts.on("--parser PARSER", "parser") do |name|
|
40
|
+
options.parser = Bio::MAF.const_get(name)
|
41
|
+
end
|
42
|
+
opts.on("-t", "--threaded") do
|
43
|
+
options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
|
44
|
+
options.opts[:threads] = 1
|
45
|
+
end
|
46
|
+
opts.on("-e", "--extended") do
|
47
|
+
options.opts[:parse_extended] = true
|
48
|
+
options.opts[:parse_empty] = true
|
49
|
+
end
|
50
|
+
end.parse!(ARGV)
|
51
|
+
|
52
|
+
src_path = ARGV.shift
|
53
|
+
|
54
|
+
if options.prof
|
55
|
+
require 'perftools'
|
56
|
+
PerfTools::CpuProfiler.start(options.prof)
|
57
|
+
elsif options.ruby_prof_path
|
58
|
+
require 'ruby-prof'
|
59
|
+
RubyProf.start
|
60
|
+
end
|
61
|
+
|
62
|
+
if options.profile_gc
|
63
|
+
GC::Profiler.enable
|
64
|
+
end
|
65
|
+
|
66
|
+
parser = options.parser.new(src_path, options.opts)
|
67
|
+
writer = Bio::MAF::Writer.new($stdout)
|
68
|
+
writer.write_header(parser.header)
|
69
|
+
writer.write_blocks(parser.parse_blocks)
|
70
|
+
|
71
|
+
if options.profile_gc
|
72
|
+
$stderr.puts GC::Profiler.result
|
73
|
+
GC::Profiler.disable
|
74
|
+
end
|
75
|
+
|
76
|
+
if options.prof
|
77
|
+
PerfTools::CpuProfiler.stop
|
78
|
+
elsif options.ruby_prof_path
|
79
|
+
res = RubyProf.stop
|
80
|
+
printer = options.ruby_prof_printer.new(res)
|
81
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
82
|
+
printer.print(f)
|
83
|
+
end
|
84
|
+
end
|
data/bin/random_ranges
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
options = OpenStruct.new
|
7
|
+
|
8
|
+
op = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: random_ranges [options]"
|
10
|
+
opts.on("-r", "--range START:END", "range") do |range|
|
11
|
+
s, e = range.split(':')
|
12
|
+
options.start = s.to_i
|
13
|
+
options.end = e.to_i
|
14
|
+
end
|
15
|
+
opts.on("-l", "--length LEN", "block length") do |len|
|
16
|
+
options.length = len.to_i
|
17
|
+
end
|
18
|
+
opts.on("-n", "--number NUM", "number of blocks") do |num|
|
19
|
+
options.num = num.to_i
|
20
|
+
end
|
21
|
+
opts.on("-s", "--sequence SEQ", "sequence") do |seq|
|
22
|
+
options.seq = seq
|
23
|
+
end
|
24
|
+
end.parse!(ARGV)
|
25
|
+
|
26
|
+
rand = Random.new
|
27
|
+
range = options.end - options.start
|
28
|
+
block_range = range / options.num
|
29
|
+
block_start_range = block_range - options.length
|
30
|
+
(0...options.num).each do |n|
|
31
|
+
block_offset = rand.rand(block_start_range)
|
32
|
+
b_start = options.start + (block_range * n) + block_offset
|
33
|
+
b_end = b_start + options.length
|
34
|
+
puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
|
35
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
@milestone_2
|
2
|
+
Feature: Indexed access to MAF files
|
3
|
+
In order to extract alignment blocks from MAF files
|
4
|
+
By chromosomal ranges matching a source sequence
|
5
|
+
I want to have a way to build indexes on MAF files
|
6
|
+
And use indexes to efficiently find alignment blocks
|
7
|
+
Because linear searches of a 200 GB file are impractical
|
8
|
+
|
9
|
+
Scenario: Index a MAF file
|
10
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
11
|
+
When I open it with a MAF reader
|
12
|
+
And build an index on the reference sequence
|
13
|
+
Then the index has at least 8 entries
|
14
|
+
|
15
|
+
Scenario: Extract alignment blocks by chromosomal range
|
16
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
17
|
+
When I open it with a MAF reader
|
18
|
+
And build an index on the reference sequence
|
19
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
20
|
+
Then 2 blocks are obtained
|
21
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
22
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
23
|
+
|
24
|
+
Scenario: Extract alignment blocks by chromosomal range from index file
|
25
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
26
|
+
And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
|
27
|
+
When I open it with a MAF reader
|
28
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
29
|
+
Then 2 blocks are obtained
|
30
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
31
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Feature: MAF output
|
2
|
+
In order to output modified MAF files or subsets of them
|
3
|
+
I want to be able to write out parsed MAF data
|
4
|
+
|
5
|
+
Scenario: Reproduce simple test data
|
6
|
+
Given a MAF source file "mm8_single.maf"
|
7
|
+
When I open it with a MAF reader
|
8
|
+
And open a new MAF writer
|
9
|
+
And write the header from the original MAF file
|
10
|
+
And write all the parsed blocks
|
11
|
+
Then the output should match, except whitespace, "mm8_single.maf"
|
12
|
+
|
13
|
+
Scenario: Reproduce longer test data
|
14
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
15
|
+
When I open it with a MAF reader
|
16
|
+
And open a new MAF writer
|
17
|
+
And write the header from the original MAF file
|
18
|
+
And write all the parsed blocks
|
19
|
+
Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
|
20
|
+
|
21
|
+
Scenario: Reproduce test data with i, e, q lines
|
22
|
+
Given a MAF source file "chr22_ieq.maf"
|
23
|
+
When I enable the :parse_extended parser option
|
24
|
+
And I enable the :parse_empty parser option
|
25
|
+
And I open it with a MAF reader
|
26
|
+
And open a new MAF writer
|
27
|
+
And write the header from the original MAF file
|
28
|
+
And write all the parsed blocks
|
29
|
+
Then the output should match, except whitespace, "chr22_ieq.maf"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
Feature: Parse MAF files
|
2
|
+
In order to extract information from a MAF file
|
3
|
+
I want to read it and pull out information
|
4
|
+
|
5
|
+
Scenario: Read MAF header
|
6
|
+
Given MAF data:
|
7
|
+
"""
|
8
|
+
##maf version=1 scoring=humor.v4
|
9
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
|
10
|
+
|
11
|
+
a score=0.128
|
12
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
13
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
14
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
15
|
+
"""
|
16
|
+
When I open it with a MAF reader
|
17
|
+
Then the MAF version should be "1"
|
18
|
+
And the scoring scheme should be "humor.v4"
|
19
|
+
# third line a continuation
|
20
|
+
And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf"
|
21
|
+
|
22
|
+
Scenario: Read alignment block
|
23
|
+
Given MAF data:
|
24
|
+
"""
|
25
|
+
##maf version=1 scoring=humor.v4
|
26
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
27
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
28
|
+
|
29
|
+
a score=0.128
|
30
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
31
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
32
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
33
|
+
"""
|
34
|
+
When I open it with a MAF reader
|
35
|
+
Then an alignment block can be obtained
|
36
|
+
And the alignment block has 3 sequences
|
37
|
+
And sequence 0 has source "human_hoxa"
|
38
|
+
And sequence 0 has start 100
|
39
|
+
And sequence 0 has size 8
|
40
|
+
And sequence 0 has strand :+
|
41
|
+
And sequence 0 has source size 100257
|
42
|
+
And sequence 0 has text "ACA-TTACT"
|
43
|
+
And sequence 1 has strand :-
|
44
|
+
|