bio-maf 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio-maf.rb +12 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +182 -0
data/bin/maf_to_fasta
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'bigbio'
|
5
|
+
require 'optparse'
|
6
|
+
require 'ostruct'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
options.parser = Bio::MAF::Parser
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
16
|
+
options.prof = prof
|
17
|
+
end
|
18
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
|
19
|
+
options.ruby_prof = path
|
20
|
+
end
|
21
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
22
|
+
options.profile_gc = true
|
23
|
+
end
|
24
|
+
opts.on("--parser PARSER", "parser") do |name|
|
25
|
+
options.parser = Bio::MAF.const_get(name)
|
26
|
+
end
|
27
|
+
end.parse!(ARGV)
|
28
|
+
|
29
|
+
src_path = ARGV.shift
|
30
|
+
dst_path = ARGV.shift
|
31
|
+
|
32
|
+
if options.prof
|
33
|
+
require 'perftools'
|
34
|
+
PerfTools::CpuProfiler.start(options.prof)
|
35
|
+
elsif options.ruby_prof
|
36
|
+
require 'ruby-prof'
|
37
|
+
RubyProf.start
|
38
|
+
end
|
39
|
+
|
40
|
+
if options.profile_gc
|
41
|
+
GC::Profiler.enable
|
42
|
+
end
|
43
|
+
|
44
|
+
parser = options.parser.new(src_path)
|
45
|
+
writer = FastaWriter.new(dst_path)
|
46
|
+
|
47
|
+
parser.each_block do |block|
|
48
|
+
block.each_raw_seq do |seq|
|
49
|
+
seq.write_fasta(writer)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
writer.close
|
54
|
+
|
55
|
+
if options.profile_gc
|
56
|
+
$stderr.puts GC::Profiler.result
|
57
|
+
GC::Profiler.disable
|
58
|
+
end
|
59
|
+
|
60
|
+
if options.prof
|
61
|
+
PerfTools::CpuProfiler.stop
|
62
|
+
elsif options.ruby_prof
|
63
|
+
res = RubyProf.stop
|
64
|
+
printer = RubyProf::FlatPrinter.new(res)
|
65
|
+
File.open(options.ruby_prof, 'w') do |f|
|
66
|
+
printer.print(f)
|
67
|
+
end
|
68
|
+
end
|
data/bin/maf_write
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
options = OpenStruct.new
|
8
|
+
options.parser = Bio::MAF::Parser
|
9
|
+
options.opts = {
|
10
|
+
:chunk_reader => Bio::MAF::ChunkReader,
|
11
|
+
:parse_extended => false
|
12
|
+
}
|
13
|
+
|
14
|
+
PRINTERS = {
|
15
|
+
'flat' => :FlatPrinter,
|
16
|
+
'stack' => :CallStackPrinter
|
17
|
+
}
|
18
|
+
|
19
|
+
OptionParser.new do |opts|
|
20
|
+
opts.banner = "Usage: maf_write [options] <maf>"
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Options:"
|
23
|
+
opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
|
24
|
+
options.prof = prof
|
25
|
+
end
|
26
|
+
opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
|
27
|
+
if pspec =~ /(\w+):(.+)/
|
28
|
+
require 'ruby-prof'
|
29
|
+
options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
|
30
|
+
options.ruby_prof_path = $2
|
31
|
+
else
|
32
|
+
options.ruby_prof_printer = RubyProf::FlatPrinter
|
33
|
+
options.ruby_prof_path = pspec
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.on("--profile-gc", "Profile GC") do |prof|
|
37
|
+
options.profile_gc = true
|
38
|
+
end
|
39
|
+
opts.on("--parser PARSER", "parser") do |name|
|
40
|
+
options.parser = Bio::MAF.const_get(name)
|
41
|
+
end
|
42
|
+
opts.on("-t", "--threaded") do
|
43
|
+
options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
|
44
|
+
options.opts[:threads] = 1
|
45
|
+
end
|
46
|
+
opts.on("-e", "--extended") do
|
47
|
+
options.opts[:parse_extended] = true
|
48
|
+
options.opts[:parse_empty] = true
|
49
|
+
end
|
50
|
+
end.parse!(ARGV)
|
51
|
+
|
52
|
+
src_path = ARGV.shift
|
53
|
+
|
54
|
+
if options.prof
|
55
|
+
require 'perftools'
|
56
|
+
PerfTools::CpuProfiler.start(options.prof)
|
57
|
+
elsif options.ruby_prof_path
|
58
|
+
require 'ruby-prof'
|
59
|
+
RubyProf.start
|
60
|
+
end
|
61
|
+
|
62
|
+
if options.profile_gc
|
63
|
+
GC::Profiler.enable
|
64
|
+
end
|
65
|
+
|
66
|
+
parser = options.parser.new(src_path, options.opts)
|
67
|
+
writer = Bio::MAF::Writer.new($stdout)
|
68
|
+
writer.write_header(parser.header)
|
69
|
+
writer.write_blocks(parser.parse_blocks)
|
70
|
+
|
71
|
+
if options.profile_gc
|
72
|
+
$stderr.puts GC::Profiler.result
|
73
|
+
GC::Profiler.disable
|
74
|
+
end
|
75
|
+
|
76
|
+
if options.prof
|
77
|
+
PerfTools::CpuProfiler.stop
|
78
|
+
elsif options.ruby_prof_path
|
79
|
+
res = RubyProf.stop
|
80
|
+
printer = options.ruby_prof_printer.new(res)
|
81
|
+
File.open(options.ruby_prof_path, 'w') do |f|
|
82
|
+
printer.print(f)
|
83
|
+
end
|
84
|
+
end
|
data/bin/random_ranges
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
options = OpenStruct.new
|
7
|
+
|
8
|
+
op = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: random_ranges [options]"
|
10
|
+
opts.on("-r", "--range START:END", "range") do |range|
|
11
|
+
s, e = range.split(':')
|
12
|
+
options.start = s.to_i
|
13
|
+
options.end = e.to_i
|
14
|
+
end
|
15
|
+
opts.on("-l", "--length LEN", "block length") do |len|
|
16
|
+
options.length = len.to_i
|
17
|
+
end
|
18
|
+
opts.on("-n", "--number NUM", "number of blocks") do |num|
|
19
|
+
options.num = num.to_i
|
20
|
+
end
|
21
|
+
opts.on("-s", "--sequence SEQ", "sequence") do |seq|
|
22
|
+
options.seq = seq
|
23
|
+
end
|
24
|
+
end.parse!(ARGV)
|
25
|
+
|
26
|
+
rand = Random.new
|
27
|
+
range = options.end - options.start
|
28
|
+
block_range = range / options.num
|
29
|
+
block_start_range = block_range - options.length
|
30
|
+
(0...options.num).each do |n|
|
31
|
+
block_offset = rand.rand(block_start_range)
|
32
|
+
b_start = options.start + (block_range * n) + block_offset
|
33
|
+
b_end = b_start + options.length
|
34
|
+
puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
|
35
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
@milestone_2
|
2
|
+
Feature: Indexed access to MAF files
|
3
|
+
In order to extract alignment blocks from MAF files
|
4
|
+
By chromosomal ranges matching a source sequence
|
5
|
+
I want to have a way to build indexes on MAF files
|
6
|
+
And use indexes to efficiently find alignment blocks
|
7
|
+
Because linear searches of a 200 GB file are impractical
|
8
|
+
|
9
|
+
Scenario: Index a MAF file
|
10
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
11
|
+
When I open it with a MAF reader
|
12
|
+
And build an index on the reference sequence
|
13
|
+
Then the index has at least 8 entries
|
14
|
+
|
15
|
+
Scenario: Extract alignment blocks by chromosomal range
|
16
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
17
|
+
When I open it with a MAF reader
|
18
|
+
And build an index on the reference sequence
|
19
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
20
|
+
Then 2 blocks are obtained
|
21
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
22
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
23
|
+
|
24
|
+
Scenario: Extract alignment blocks by chromosomal range from index file
|
25
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
26
|
+
And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
|
27
|
+
When I open it with a MAF reader
|
28
|
+
And search for blocks between positions 80082592 and 80082766 of mm8.chr7
|
29
|
+
Then 2 blocks are obtained
|
30
|
+
And sequence mm8.chr7 of block 0 has start 80082592
|
31
|
+
And sequence mm8.chr7 of block 1 has start 80082713
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Feature: MAF output
|
2
|
+
In order to output modified MAF files or subsets of them
|
3
|
+
I want to be able to write out parsed MAF data
|
4
|
+
|
5
|
+
Scenario: Reproduce simple test data
|
6
|
+
Given a MAF source file "mm8_single.maf"
|
7
|
+
When I open it with a MAF reader
|
8
|
+
And open a new MAF writer
|
9
|
+
And write the header from the original MAF file
|
10
|
+
And write all the parsed blocks
|
11
|
+
Then the output should match, except whitespace, "mm8_single.maf"
|
12
|
+
|
13
|
+
Scenario: Reproduce longer test data
|
14
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
15
|
+
When I open it with a MAF reader
|
16
|
+
And open a new MAF writer
|
17
|
+
And write the header from the original MAF file
|
18
|
+
And write all the parsed blocks
|
19
|
+
Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
|
20
|
+
|
21
|
+
Scenario: Reproduce test data with i, e, q lines
|
22
|
+
Given a MAF source file "chr22_ieq.maf"
|
23
|
+
When I enable the :parse_extended parser option
|
24
|
+
And I enable the :parse_empty parser option
|
25
|
+
And I open it with a MAF reader
|
26
|
+
And open a new MAF writer
|
27
|
+
And write the header from the original MAF file
|
28
|
+
And write all the parsed blocks
|
29
|
+
Then the output should match, except whitespace, "chr22_ieq.maf"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
Feature: Parse MAF files
|
2
|
+
In order to extract information from a MAF file
|
3
|
+
I want to read it and pull out information
|
4
|
+
|
5
|
+
Scenario: Read MAF header
|
6
|
+
Given MAF data:
|
7
|
+
"""
|
8
|
+
##maf version=1 scoring=humor.v4
|
9
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
|
10
|
+
|
11
|
+
a score=0.128
|
12
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
13
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
14
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
15
|
+
"""
|
16
|
+
When I open it with a MAF reader
|
17
|
+
Then the MAF version should be "1"
|
18
|
+
And the scoring scheme should be "humor.v4"
|
19
|
+
# third line a continuation
|
20
|
+
And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf"
|
21
|
+
|
22
|
+
Scenario: Read alignment block
|
23
|
+
Given MAF data:
|
24
|
+
"""
|
25
|
+
##maf version=1 scoring=humor.v4
|
26
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
27
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
28
|
+
|
29
|
+
a score=0.128
|
30
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
31
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
32
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
33
|
+
"""
|
34
|
+
When I open it with a MAF reader
|
35
|
+
Then an alignment block can be obtained
|
36
|
+
And the alignment block has 3 sequences
|
37
|
+
And sequence 0 has source "human_hoxa"
|
38
|
+
And sequence 0 has start 100
|
39
|
+
And sequence 0 has size 8
|
40
|
+
And sequence 0 has strand :+
|
41
|
+
And sequence 0 has source size 100257
|
42
|
+
And sequence 0 has text "ACA-TTACT"
|
43
|
+
And sequence 1 has strand :-
|
44
|
+
|
@@ -0,0 +1,75 @@
|
|
1
|
+
@milestone_3
|
2
|
+
Feature: Filter results from MAF files
|
3
|
+
In order to work with only relevant data from a MAF file
|
4
|
+
Such as only species recognized by PhyloCSF
|
5
|
+
I want to filter the results of MAF queries
|
6
|
+
|
7
|
+
Scenario: Return only specified species
|
8
|
+
Given MAF data:
|
9
|
+
"""
|
10
|
+
##maf version=1
|
11
|
+
a score=10542.0
|
12
|
+
s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA-
|
13
|
+
s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA-
|
14
|
+
s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG-
|
15
|
+
s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
16
|
+
s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
17
|
+
s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA-
|
18
|
+
s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG-
|
19
|
+
s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA-
|
20
|
+
s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA
|
21
|
+
s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA
|
22
|
+
"""
|
23
|
+
When I open it with a MAF reader
|
24
|
+
And filter for only the species
|
25
|
+
| hg18 |
|
26
|
+
| mm8 |
|
27
|
+
| rheMac2 |
|
28
|
+
Then an alignment block can be obtained
|
29
|
+
And the alignment block has 3 sequences
|
30
|
+
|
31
|
+
Scenario: Return only blocks having all specified species
|
32
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
33
|
+
When I open it with a MAF reader
|
34
|
+
And build an index on the reference sequence
|
35
|
+
And filter for blocks with the species
|
36
|
+
| panTro2 |
|
37
|
+
| loxAfr1 |
|
38
|
+
And search for blocks between positions 80082471 and 80082730 of mm8.chr7
|
39
|
+
Then 1 block is obtained
|
40
|
+
|
41
|
+
Scenario: Return only blocks having a certain number of sequences
|
42
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
43
|
+
When I open it with a MAF reader
|
44
|
+
And build an index on the reference sequence
|
45
|
+
And filter for blocks with at least 6 sequences
|
46
|
+
And search for blocks between positions 80082767 and 80083008 of mm8.chr7
|
47
|
+
Then 1 block is obtained
|
48
|
+
|
49
|
+
# sizes present:
|
50
|
+
# 55 64 128 148 157 163 165 192
|
51
|
+
|
52
|
+
Scenario: Return blocks with a maximum text size
|
53
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
54
|
+
When I open it with a MAF reader
|
55
|
+
And build an index on the reference sequence
|
56
|
+
And filter for blocks with text size at least 150
|
57
|
+
And search for blocks between positions 0 and 80100000 of mm8.chr7
|
58
|
+
Then 4 blocks are obtained
|
59
|
+
|
60
|
+
Scenario: Return blocks with a minimum text size
|
61
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
62
|
+
When I open it with a MAF reader
|
63
|
+
And build an index on the reference sequence
|
64
|
+
And filter for blocks with text size at most 72
|
65
|
+
And search for blocks between positions 0 and 80100000 of mm8.chr7
|
66
|
+
Then 2 blocks are obtained
|
67
|
+
|
68
|
+
Scenario: Return blocks within a text size range
|
69
|
+
Given a MAF source file "mm8_chr7_tiny.maf"
|
70
|
+
When I open it with a MAF reader
|
71
|
+
And build an index on the reference sequence
|
72
|
+
And filter for blocks with text size between 72 and 160
|
73
|
+
And search for blocks between positions 0 and 80100000 of mm8.chr7
|
74
|
+
Then 3 blocks are obtained
|
75
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
Feature: Convert MAF file to FASTA
|
2
|
+
In order to use multiple alignment data with other tools
|
3
|
+
I want to read a Multiple Alignment Format (MAF) file and write out its data as FASTA
|
4
|
+
|
5
|
+
Scenario: Convert simple MAF file
|
6
|
+
Given a MAF source file "t1.maf"
|
7
|
+
When I select FASTA output
|
8
|
+
And I open it with a MAF reader
|
9
|
+
And process the file
|
10
|
+
Then the output should match "t1.fasta"
|
11
|
+
|
12
|
+
Scenario: Convert simple MAF data
|
13
|
+
Given MAF data:
|
14
|
+
"""
|
15
|
+
##maf version=1 scoring=humor.v4
|
16
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
17
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
18
|
+
|
19
|
+
a score=0.128
|
20
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
21
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
22
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
23
|
+
|
24
|
+
|
25
|
+
a score=0.071
|
26
|
+
s human_unc 9077 8 + 10998 ACAGTATT
|
27
|
+
# Comment
|
28
|
+
s horse_unc 4555 6 - 5099 ACA--ATT
|
29
|
+
s fugu_unc 4000 4 + 4038 AC----TT
|
30
|
+
"""
|
31
|
+
When I select FASTA output
|
32
|
+
And I open it with a MAF reader
|
33
|
+
And process the file
|
34
|
+
Then the output should be:
|
35
|
+
"""
|
36
|
+
>human_hoxa:100-108
|
37
|
+
ACA-TTACT
|
38
|
+
>horse_hoxa:120-129
|
39
|
+
ACAATTGCT
|
40
|
+
>fugu_hoxa:88-95
|
41
|
+
ACA--TGCT
|
42
|
+
>human_unc:9077-9085
|
43
|
+
ACAGTATT
|
44
|
+
>horse_unc:4555-4561
|
45
|
+
ACA--ATT
|
46
|
+
>fugu_unc:4000-4004
|
47
|
+
AC----TT
|
48
|
+
|
49
|
+
"""
|
50
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'bigbio' # FASTA support
|
2
|
+
|
3
|
+
Given /^a MAF source file "(.*?)"$/ do |src|
|
4
|
+
@src_f = $test_data + src
|
5
|
+
@src_f.exist?.should be_true
|
6
|
+
end
|
7
|
+
|
8
|
+
Given /^MAF data:$/ do |string|
|
9
|
+
@src_f = Tempfile.new(['rspec', '.maf'])
|
10
|
+
@src_f.write(string)
|
11
|
+
@src_f.close
|
12
|
+
end
|
13
|
+
|
14
|
+
When /^I select FASTA output$/ do
|
15
|
+
@dst = Tempfile.new(['cuke', ".#{@out_fmt.to_s}"])
|
16
|
+
@dst.close
|
17
|
+
@writer = FastaWriter.new(@dst.path)
|
18
|
+
end
|
19
|
+
|
20
|
+
When /^process the file$/ do
|
21
|
+
@parser.each_block do |block|
|
22
|
+
block.each_raw_seq do |seq|
|
23
|
+
seq.write_fasta(@writer)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
@writer.close
|
27
|
+
end
|
28
|
+
|
29
|
+
Then /^the output should match "(.*?)"$/ do |ref|
|
30
|
+
ref_p = $test_data + ref
|
31
|
+
ref_p.exist?.should be_true
|
32
|
+
#system("diff #{ref} #{@dst.path} >/dev/null 2>&1").should be_true
|
33
|
+
File.read(@dst.path).should == File.read(ref_p)
|
34
|
+
end
|
35
|
+
|
36
|
+
Then /^the output should be:$/ do |string|
|
37
|
+
File.read(@dst.path).should == string
|
38
|
+
end
|
39
|
+
|
40
|
+
After do
|
41
|
+
if @dst
|
42
|
+
@dst.close
|
43
|
+
@dst.unlink
|
44
|
+
end
|
45
|
+
end
|