bio-maf 0.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio-maf.rb +12 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +182 -0
data/bin/maf_to_fasta
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
16
|
+
options.prof = prof
|
17
|
+
end
|
18
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
|
19
|
+
options.ruby_prof = path
|
20
|
+
end
|
21
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
22
|
+
options.profile_gc = true
|
23
|
+
end
|
24
|
+
opts.on("--parser PARSER", "parser") do |name|
|
25
|
+
options.parser = Bio::MAF.const_get(name)
|
26
|
+
end
|
27
|
+
end.parse!(ARGV)
|
28
|
+
|
29
|
+
src_path = ARGV.shift
|
30
|
+
dst_path = ARGV.shift
|
31
|
+
|
32
|
+
if options.prof
|
33
|
+
require 'perftools'
|
34
|
+
PerfTools::CpuProfiler.start(options.prof)
|
35
|
+
elsif options.ruby_prof
|
36
|
+
require 'ruby-prof'
|
37
|
+
RubyProf.start
|
38
|
+
end
|
39
|
+
|
40
|
+
if options.profile_gc
|
41
|
+
GC::Profiler.enable
|
42
|
+
end
|
43
|
+
|
44
|
+
parser = options.parser.new(src_path)
|
45
|
+
writer = FastaWriter.new(dst_path)
|
46
|
+
|
47
|
+
parser.each_block do |block|
|
48
|
+
block.each_raw_seq do |seq|
|
49
|
+
seq.write_fasta(writer)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
writer.close
|
54
|
+
|
55
|
+
if options.profile_gc
|
56
|
+
$stderr.puts GC::Profiler.result
|
57
|
+
GC::Profiler.disable
|
58
|
+
end
|
59
|
+
|
60
|
+
if options.prof
|
61
|
+
PerfTools::CpuProfiler.stop
|
62
|
+
elsif options.ruby_prof
|
63
|
+
res = RubyProf.stop
|
64
|
+
printer = RubyProf::FlatPrinter.new(res)
|
65
|
+
File.open(options.ruby_prof, 'w') do |f|
|
66
|
+
printer.print(f)
|
67
|
+
end
|
68
|
+
end
|
data/bin/maf_write
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
options = OpenStruct.new
|
8
|
+
options.parser = Bio::MAF::Parser
|
9
|
+
options.opts = {
|
10
|
+
:chunk_reader => Bio::MAF::ChunkReader,
|
11
|
+
:parse_extended => false
|
12
|
+
}
|
13
|
+
|
14
|
+
PRINTERS = {
|
15
|
+
'flat' => :FlatPrinter,
|
16
|
+
'stack' => :CallStackPrinter
|
17
|
+
}
|
18
|
+
|
19
|
+
OptionParser.new do |opts|
|
20
|
+
opts.banner = "Usage: maf_write [options] <maf>"
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Options:"
|
23
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
24
|
+
options.prof = prof
|
25
|
+
end
|
26
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
27
|
+
if pspec =~ /(\w+):(.+)/
|
28
|
+
require 'ruby-prof'
|
29
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
30
|
+
options.ruby_prof_path = $2
|
31
|
+
else
|
32
|
+
options.ruby_prof_printer = RubyProf::FlatPrinter
|
33
|
+
options.ruby_prof_path = pspec
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
37
|
+
options.profile_gc = true
|
38
|
+
end
|
39
|
+
opts.on("--parser PARSER", "parser") do |name|
|
40
|
+
options.parser = Bio::MAF.const_get(name)
|
41
|
+
end
|
42
|
+
opts.on("-t", "--threaded") do
|
43
|
+
options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
|
44
|
+
options.opts[:threads] = 1
|
45
|
+
end
|
46
|
+
opts.on("-e", "--extended") do
|
47
|
+
options.opts[:parse_extended] = true
|
48
|
+
options.opts[:parse_empty] = true
|
49
|
+
end
|
50
|
+
end.parse!(ARGV)
|
51
|
+
|
52
|
+
src_path = ARGV.shift
|
53
|
+
|
54
|
+
if options.prof
|
55
|
+
require 'perftools'
|
56
|
+
PerfTools::CpuProfiler.start(options.prof)
|
57
|
+
elsif options.ruby_prof_path
|
58
|
+
require 'ruby-prof'
|
59
|
+
RubyProf.start
|
60
|
+
end
|
61
|
+
|
62
|
+
if options.profile_gc
|
63
|
+
GC::Profiler.enable
|
64
|
+
end
|
65
|
+
|
66
|
+
parser = options.parser.new(src_path, options.opts)
|
67
|
+
writer = Bio::MAF::Writer.new($stdout)
|
68
|
+
writer.write_header(parser.header)
|
69
|
+
writer.write_blocks(parser.parse_blocks)
|
70
|
+
|
71
|
+
if options.profile_gc
|
72
|
+
$stderr.puts GC::Profiler.result
|
73
|
+
GC::Profiler.disable
|
74
|
+
end
|
75
|
+
|
76
|
+
if options.prof
|
77
|
+
PerfTools::CpuProfiler.stop
|
78
|
+
elsif options.ruby_prof_path
|
79
|
+
res = RubyProf.stop
|
80
|
+
printer = options.ruby_prof_printer.new(res)
|
81
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
82
|
+
printer.print(f)
|
83
|
+
end
|
84
|
+
end
|
data/bin/random_ranges
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
options = OpenStruct.new
|
7
|
+
|
8
|
+
op = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: random_ranges [options]"
|
10
|
+
opts.on("-r", "--range START:END", "range") do |range|
|
11
|
+
s, e = range.split(':')
|
12
|
+
options.start = s.to_i
|
13
|
+
options.end = e.to_i
|
14
|
+
end
|
15
|
+
opts.on("-l", "--length LEN", "block length") do |len|
|
16
|
+
options.length = len.to_i
|
17
|
+
end
|
18
|
+
opts.on("-n", "--number NUM", "number of blocks") do |num|
|
19
|
+
options.num = num.to_i
|
20
|
+
end
|
21
|
+
opts.on("-s", "--sequence SEQ", "sequence") do |seq|
|
22
|
+
options.seq = seq
|
23
|
+
end
|
24
|
+
end.parse!(ARGV)
|
25
|
+
|
26
|
+
rand = Random.new
|
27
|
+
range = options.end - options.start
|
28
|
+
block_range = range / options.num
|
29
|
+
block_start_range = block_range - options.length
|
30
|
+
(0...options.num).each do |n|
|
31
|
+
block_offset = rand.rand(block_start_range)
|
32
|
+
b_start = options.start + (block_range * n) + block_offset
|
33
|
+
b_end = b_start + options.length
|
34
|
+
puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
|
35
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
@milestone_2
|
2
|
+
Feature: Indexed access to MAF files
|
3
|
+
In order to extract alignment blocks from MAF files
|
4
|
+
By chromosomal ranges matching a source sequence
|
5
|
+
I want to have a way to build indexes on MAF files
|
6
|
+
And use indexes to efficiently find alignment blocks
|
7
|
+
Because linear searches of a 200 GB file are impractical
|
8
|
+
|
9
|
+
Scenario: Index a MAF file
|
10
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
11
|
+
When I open it with a MAF reader
|
12
|
+
And build an index on the reference sequence
|
13
|
+
Then the index has at least 8 entries
|
14
|
+
|
15
|
+
Scenario: Extract alignment blocks by chromosomal range
|
16
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
17
|
+
When I open it with a MAF reader
|
18
|
+
And build an index on the reference sequence
|
19
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
20
|
+
Then 2 blocks are obtained
|
21
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
22
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
23
|
+
|
24
|
+
Scenario: Extract alignment blocks by chromosomal range from index file
|
25
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
26
|
+
And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
|
27
|
+
When I open it with a MAF reader
|
28
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
29
|
+
Then 2 blocks are obtained
|
30
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
31
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Feature: MAF output
|
2
|
+
In order to output modified MAF files or subsets of them
|
3
|
+
I want to be able to write out parsed MAF data
|
4
|
+
|
5
|
+
Scenario: Reproduce simple test data
|
6
|
+
Given a MAF source file "mm8_single.maf"
|
7
|
+
When I open it with a MAF reader
|
8
|
+
And open a new MAF writer
|
9
|
+
And write the header from the original MAF file
|
10
|
+
And write all the parsed blocks
|
11
|
+
Then the output should match, except whitespace, "mm8_single.maf"
|
12
|
+
|
13
|
+
Scenario: Reproduce longer test data
|
14
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
15
|
+
When I open it with a MAF reader
|
16
|
+
And open a new MAF writer
|
17
|
+
And write the header from the original MAF file
|
18
|
+
And write all the parsed blocks
|
19
|
+
Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
|
20
|
+
|
21
|
+
Scenario: Reproduce test data with i, e, q lines
|
22
|
+
Given a MAF source file "chr22_ieq.maf"
|
23
|
+
When I enable the :parse_extended parser option
|
24
|
+
And I enable the :parse_empty parser option
|
25
|
+
And I open it with a MAF reader
|
26
|
+
And open a new MAF writer
|
27
|
+
And write the header from the original MAF file
|
28
|
+
And write all the parsed blocks
|
29
|
+
Then the output should match, except whitespace, "chr22_ieq.maf"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
Feature: Parse MAF files
|
2
|
+
In order to extract information from a MAF file
|
3
|
+
I want to read it and pull out information
|
4
|
+
|
5
|
+
Scenario: Read MAF header
|
6
|
+
Given MAF data:
|
7
|
+
"""
|
8
|
+
##maf version=1 scoring=humor.v4
|
9
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
|
10
|
+
|
11
|
+
a score=0.128
|
12
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
13
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
14
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
15
|
+
"""
|
16
|
+
When I open it with a MAF reader
|
17
|
+
Then the MAF version should be "1"
|
18
|
+
And the scoring scheme should be "humor.v4"
|
19
|
+
# third line a continuation
|
20
|
+
And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf"
|
21
|
+
|
22
|
+
Scenario: Read alignment block
|
23
|
+
Given MAF data:
|
24
|
+
"""
|
25
|
+
##maf version=1 scoring=humor.v4
|
26
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
27
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
28
|
+
|
29
|
+
a score=0.128
|
30
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
31
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
32
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
33
|
+
"""
|
34
|
+
When I open it with a MAF reader
|
35
|
+
Then an alignment block can be obtained
|
36
|
+
And the alignment block has 3 sequences
|
37
|
+
And sequence 0 has source "human_hoxa"
|
38
|
+
And sequence 0 has start 100
|
39
|
+
And sequence 0 has size 8
|
40
|
+
And sequence 0 has strand :+
|
41
|
+
And sequence 0 has source size 100257
|
42
|
+
And sequence 0 has text "ACA-TTACT"
|
43
|
+
And sequence 1 has strand :-
|
44
|
+
|
@@ -0,0 +1,75 @@
|
|
1
|
+
@milestone_3
|
2
|
+
Feature: Filter results from MAF files
|
3
|
+
In order to work with only relevant data from a MAF file
|
4
|
+
Such as only species recognized by PhyloCSF
|
5
|
+
I want to filter the results of MAF queries
|
6
|
+
|
7
|
+
Scenario: Return only specified species
|
8
|
+
Given MAF data:
|
9
|
+
"""
|
10
|
+
##maf version=1
|
11
|
+
a score=10542.0
|
12
|
+
s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA-
|
13
|
+
s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA-
|
14
|
+
s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG-
|
15
|
+
s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
16
|
+
s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
17
|
+
s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA-
|
18
|
+
s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG-
|
19
|
+
s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA-
|
20
|
+
s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA
|
21
|
+
s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA
|
22
|
+
"""
|
23
|
+
When I open it with a MAF reader
|
24
|
+
And filter for only the species
|
25
|
+
| hg18 |
|
26
|
+
| mm8 |
|
27
|
+
| rheMac2 |
|
28
|
+
Then an alignment block can be obtained
|
29
|
+
And the alignment block has 3 sequences
|
30
|
+
|
31
|
+
Scenario: Return only blocks having all specified species
|
32
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
33
|
+
When I open it with a MAF reader
|
34
|
+
And build an index on the reference sequence
|
35
|
+
And filter for blocks with the species
|
36
|
+
| panTro2 |
|
37
|
+
| loxAfr1 |
|
38
|
+
And search for blocks between positions 80082471 and 80082730 of mm8.chr7
|
39
|
+
Then 1 block is obtained
|
40
|
+
|
41
|
+
Scenario: Return only blocks having a certain number of sequences
|
42
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
43
|
+
When I open it with a MAF reader
|
44
|
+
And build an index on the reference sequence
|
45
|
+
And filter for blocks with at least 6 sequences
|
46
|
+
And search for blocks between positions 80082767 and 80083008 of mm8.chr7
|
47
|
+
Then 1 block is obtained
|
48
|
+
|
49
|
+
# sizes present:
|
50
|
+
# 55 64 128 148 157 163 165 192
|
51
|
+
|
52
|
+
Scenario: Return blocks with a maximum text size
|
53
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
54
|
+
When I open it with a MAF reader
|
55
|
+
And build an index on the reference sequence
|
56
|
+
And filter for blocks with text size at least 150
|
57
|
+
And search for blocks between positions 0 and 80100000 of mm8.chr7
|
58
|
+
Then 4 blocks are obtained
|
59
|
+
|
60
|
+
Scenario: Return blocks with a minimum text size
|
61
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
62
|
+
When I open it with a MAF reader
|
63
|
+
And build an index on the reference sequence
|
64
|
+
And filter for blocks with text size at most 72
|
65
|
+
And search for blocks between positions 0 and 80100000 of mm8.chr7
|
66
|
+
Then 2 blocks are obtained
|
67
|
+
|
68
|
+
Scenario: Return blocks within a text size range
|
69
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
70
|
+
When I open it with a MAF reader
|
71
|
+
And build an index on the reference sequence
|
72
|
+
And filter for blocks with text size between 72 and 160
|
73
|
+
And search for blocks between positions 0 and 80100000 of mm8.chr7
|
74
|
+
Then 3 blocks are obtained
|
75
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
Feature: Convert MAF file to FASTA
|
2
|
+
In order to use multiple alignment data with other tools
|
3
|
+
I want to read a Multiple Alignment Format (MAF) file and write out its data as FASTA
|
4
|
+
|
5
|
+
Scenario: Convert simple MAF file
|
6
|
+
Given a MAF source file "t1.maf"
|
7
|
+
When I select FASTA output
|
8
|
+
And I open it with a MAF reader
|
9
|
+
And process the file
|
10
|
+
Then the output should match "t1.fasta"
|
11
|
+
|
12
|
+
Scenario: Convert simple MAF data
|
13
|
+
Given MAF data:
|
14
|
+
"""
|
15
|
+
##maf version=1 scoring=humor.v4
|
16
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
17
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
18
|
+
|
19
|
+
a score=0.128
|
20
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
21
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
22
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
23
|
+
|
24
|
+
|
25
|
+
a score=0.071
|
26
|
+
s human_unc 9077 8 + 10998 ACAGTATT
|
27
|
+
# Comment
|
28
|
+
s horse_unc 4555 6 - 5099 ACA--ATT
|
29
|
+
s fugu_unc 4000 4 + 4038 AC----TT
|
30
|
+
"""
|
31
|
+
When I select FASTA output
|
32
|
+
And I open it with a MAF reader
|
33
|
+
And process the file
|
34
|
+
Then the output should be:
|
35
|
+
"""
|
36
|
+
>human_hoxa:100-108
|
37
|
+
ACA-TTACT
|
38
|
+
>horse_hoxa:120-129
|
39
|
+
ACAATTGCT
|
40
|
+
>fugu_hoxa:88-95
|
41
|
+
ACA--TGCT
|
42
|
+
>human_unc:9077-9085
|
43
|
+
ACAGTATT
|
44
|
+
>horse_unc:4555-4561
|
45
|
+
ACA--ATT
|
46
|
+
>fugu_unc:4000-4004
|
47
|
+
AC----TT
|
48
|
+
|
49
|
+
"""
|
50
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'bigbio' # FASTA support
|
2
|
+
|
3
|
+
Given /^a MAF source file "(.*?)"$/ do |src|
|
4
|
+
@src_f = $test_data + src
|
5
|
+
@src_f.exist?.should be_true
|
6
|
+
end
|
7
|
+
|
8
|
+
Given /^MAF data:$/ do |string|
|
9
|
+
@src_f = Tempfile.new(['rspec', '.maf'])
|
10
|
+
@src_f.write(string)
|
11
|
+
@src_f.close
|
12
|
+
end
|
13
|
+
|
14
|
+
When /^I select FASTA output$/ do
|
15
|
+
@dst = Tempfile.new(['cuke', ".#{@out_fmt.to_s}"])
|
16
|
+
@dst.close
|
17
|
+
@writer = FastaWriter.new(@dst.path)
|
18
|
+
end
|
19
|
+
|
20
|
+
When /^process the file$/ do
|
21
|
+
@parser.each_block do |block|
|
22
|
+
block.each_raw_seq do |seq|
|
23
|
+
seq.write_fasta(@writer)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
@writer.close
|
27
|
+
end
|
28
|
+
|
29
|
+
Then /^the output should match "(.*?)"$/ do |ref|
|
30
|
+
ref_p = $test_data + ref
|
31
|
+
ref_p.exist?.should be_true
|
32
|
+
#system("diff #{ref} #{@dst.path} >/dev/null 2>&1").should be_true
|
33
|
+
File.read(@dst.path).should == File.read(ref_p)
|
34
|
+
end
|
35
|
+
|
36
|
+
Then /^the output should be:$/ do |string|
|
37
|
+
File.read(@dst.path).should == string
|
38
|
+
end
|
39
|
+
|
40
|
+
After do
|
41
|
+
if @dst
|
42
|
+
@dst.close
|
43
|
+
@dst.unlink
|
44
|
+
end
|
45
|
+
end
|