bio-maf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/.document +5 -0
  2. data/.simplecov +1 -0
  3. data/.travis.yml +16 -0
  4. data/.yardopts +3 -0
  5. data/DEVELOPMENT.md +40 -0
  6. data/Gemfile +23 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +209 -0
  9. data/Rakefile +76 -0
  10. data/VERSION +1 -0
  11. data/benchmarks/dispatch_bench +53 -0
  12. data/benchmarks/iter_bench +44 -0
  13. data/benchmarks/read_bench +40 -0
  14. data/benchmarks/sort_bench +33 -0
  15. data/benchmarks/split_bench +33 -0
  16. data/bin/maf_count +82 -0
  17. data/bin/maf_dump_blocks +27 -0
  18. data/bin/maf_extract_ranges_count +44 -0
  19. data/bin/maf_index +88 -0
  20. data/bin/maf_parse_bench +94 -0
  21. data/bin/maf_to_fasta +68 -0
  22. data/bin/maf_write +84 -0
  23. data/bin/random_ranges +35 -0
  24. data/features/maf-indexing.feature +31 -0
  25. data/features/maf-output.feature +29 -0
  26. data/features/maf-parsing.feature +44 -0
  27. data/features/maf-querying.feature +75 -0
  28. data/features/maf-to-fasta.feature +50 -0
  29. data/features/step_definitions/convert_steps.rb +45 -0
  30. data/features/step_definitions/index_steps.rb +20 -0
  31. data/features/step_definitions/output_steps.rb +27 -0
  32. data/features/step_definitions/parse_steps.rb +63 -0
  33. data/features/step_definitions/query_steps.rb +31 -0
  34. data/features/step_definitions/ucsc_bin_steps.rb +14 -0
  35. data/features/support/env.rb +16 -0
  36. data/features/ucsc-bins.feature +24 -0
  37. data/lib/bio/maf/index.rb +620 -0
  38. data/lib/bio/maf/parser.rb +888 -0
  39. data/lib/bio/maf/struct.rb +63 -0
  40. data/lib/bio/maf/writer.rb +63 -0
  41. data/lib/bio/maf.rb +4 -0
  42. data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
  43. data/lib/bio/ucsc/ucsc_bin.rb +117 -0
  44. data/lib/bio/ucsc.rb +2 -0
  45. data/lib/bio-maf/maf.rb +3 -0
  46. data/lib/bio-maf.rb +12 -0
  47. data/man/.gitignore +1 -0
  48. data/man/maf_index.1 +105 -0
  49. data/man/maf_index.1.markdown +97 -0
  50. data/man/maf_index.1.ronn +83 -0
  51. data/man/maf_to_fasta.1 +53 -0
  52. data/man/maf_to_fasta.1.ronn +51 -0
  53. data/spec/bio/maf/index_spec.rb +363 -0
  54. data/spec/bio/maf/parser_spec.rb +354 -0
  55. data/spec/bio/maf/struct_spec.rb +75 -0
  56. data/spec/spec_helper.rb +14 -0
  57. data/test/data/big-block.maf +15999 -0
  58. data/test/data/chr22_ieq.maf +11 -0
  59. data/test/data/chrY-1block.maf +6 -0
  60. data/test/data/empty +0 -0
  61. data/test/data/empty.db +0 -0
  62. data/test/data/mm8_chr7_tiny.kct +0 -0
  63. data/test/data/mm8_chr7_tiny.maf +76 -0
  64. data/test/data/mm8_mod_a.maf +7 -0
  65. data/test/data/mm8_single.maf +13 -0
  66. data/test/data/mm8_subset_a.maf +23 -0
  67. data/test/data/t1-bad1.maf +15 -0
  68. data/test/data/t1.fasta +12 -0
  69. data/test/data/t1.maf +15 -0
  70. data/test/data/t1a.maf +17 -0
  71. data/test/helper.rb +18 -0
  72. data/test/test_bio-maf.rb +7 -0
  73. data/travis-ci/install_kc +13 -0
  74. data/travis-ci/install_kc_java +13 -0
  75. data/travis-ci/report_errors +4 -0
  76. metadata +181 -0
data/bin/maf_count ADDED
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'bigbio'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+ options.reader = Bio::MAF::ChunkReader
11
+
12
+ PRINTERS = {
13
+ 'flat' => :FlatPrinter,
14
+ 'stack' => :CallStackPrinter
15
+ }
16
+
17
+ OptionParser.new do |opts|
18
+ opts.banner = "Usage: maf_count [options] <maf>"
19
+ opts.separator ""
20
+ opts.separator "Options:"
21
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
22
+ options.prof = prof
23
+ end
24
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
25
+ if pspec =~ /(\w+):(.+)/
26
+ require 'ruby-prof'
27
+ options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
28
+ options.ruby_prof_path = $2
29
+ else
30
+ options.ruby_prof_printer = RubyProf::FlatPrinter
31
+ options.ruby_prof_path = pspec
32
+ end
33
+ end
34
+ opts.on("--profile-gc", "Profile GC") do |prof|
35
+ options.profile_gc = true
36
+ end
37
+ opts.on("--parser PARSER", "parser") do |name|
38
+ options.parser = Bio::MAF.const_get(name)
39
+ end
40
+ opts.on("-t", "--threaded") do
41
+ options.reader = Bio::MAF::ThreadedChunkReader
42
+ end
43
+ end.parse!(ARGV)
44
+
45
+ src_path = ARGV.shift
46
+
47
+ if options.prof
48
+ require 'perftools'
49
+ PerfTools::CpuProfiler.start(options.prof)
50
+ elsif options.ruby_prof_path
51
+ require 'ruby-prof'
52
+ RubyProf.start
53
+ end
54
+
55
+ if options.profile_gc
56
+ GC::Profiler.enable
57
+ end
58
+
59
+ parser = options.parser.new(src_path,
60
+ :chunk_reader => options.reader,
61
+ :parse_extended => false)
62
+
63
+ n = 0
64
+ parser.each_block do |block|
65
+ n += 1
66
+ end
67
+ puts "Parsed #{n} MAF alignment blocks."
68
+
69
+ if options.profile_gc
70
+ $stderr.puts GC::Profiler.result
71
+ GC::Profiler.disable
72
+ end
73
+
74
+ if options.prof
75
+ PerfTools::CpuProfiler.stop
76
+ elsif options.ruby_prof_path
77
+ res = RubyProf.stop
78
+ printer = options.ruby_prof_printer.new(res)
79
+ File.open(options.ruby_prof_path, 'w') do |f|
80
+ printer.print(f)
81
+ end
82
+ end
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'bigbio'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: maf_dump_blocks [options] <maf>"
13
+ opts.separator ""
14
+ opts.separator "Options:"
15
+ opts.on("--parser PARSER", "parser") do |name|
16
+ options.parser = Bio::MAF.const_get(name)
17
+ end
18
+ end.parse!(ARGV)
19
+
20
+ src_path = ARGV.shift
21
+
22
+ parser = options.parser.new(src_path)
23
+
24
+ parser.each_block do |block|
25
+ $stdout.printf("%12d\t%7d\n", block.offset, block.size)
26
+ end
27
+
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+
6
+ require 'bio-maf'
7
+ require 'bio-genomic-interval'
8
+
9
+ options = OpenStruct.new
10
+ options.p = { :threads => 1 }
11
+ options.passes = 1
12
+
13
+ OptionParser.new do |opts|
14
+ opts.banner = "Usage: maf_extract_ranges_count [options] <maf> <index>"
15
+ opts.separator ""
16
+ opts.separator "Options:"
17
+ opts.on("-t", "--threads N", "Parser threads") do |n|
18
+ options.p[:threads] = n.to_i
19
+ end
20
+ opts.on("-p", "--passes N", "Number of passes") do |n|
21
+ options.passes = n.to_i
22
+ end
23
+ end.parse!(ARGV)
24
+
25
+ maf_p = ARGV.shift
26
+ index_p = ARGV.shift
27
+
28
+ parser = Bio::MAF::Parser.new(maf_p, options.p)
29
+ index = Bio::MAF::KyotoIndex.open(index_p)
30
+
31
+ def parse_interval(line)
32
+ src, r_start_s, r_end_s, _ = line.split(nil, 4)
33
+ r_start = r_start_s.to_i
34
+ r_end = r_end_s.to_i
35
+ return Bio::GenomicInterval.zero_based(src, r_start, r_end)
36
+ end
37
+
38
+ intervals = []
39
+ $stdin.each_line { |line| intervals << parse_interval(line) }
40
+
41
+ options.passes.times do
42
+ blocks = index.find(intervals, parser)
43
+ puts "TOTAL: #{blocks.count} blocks parsed."
44
+ end
data/bin/maf_index ADDED
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+ require 'bio-maf'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ PRINTERS = {
9
+ 'flat' => :FlatPrinter,
10
+ 'stack' => :CallStackPrinter,
11
+ 'graph' => :GraphHtmlPrinter
12
+ }
13
+
14
+ $options = OpenStruct.new
15
+ $options.mode = :build
16
+ $options.reader = Bio::MAF::ChunkReader
17
+
18
+ def build_index(maf, index)
19
+ parser = Bio::MAF::Parser.new(maf,
20
+ :chunk_reader => $options.reader,
21
+ :parse_extended => false)
22
+ idx = Bio::MAF::KyotoIndex.build(parser, index)
23
+ idx.close
24
+ end
25
+
26
+ op = OptionParser.new do |opts|
27
+ opts.banner = "Usage: maf_index [options] <maf> <index>"
28
+ #opts.separator ""
29
+ #opts.separator "Options:"
30
+ opts.on("--time", "print elapsed time") do
31
+ $options.bench = true
32
+ end
33
+ opts.on("-d", "--dump") do
34
+ $options.mode = :dump
35
+ end
36
+ opts.on("-t", "--threaded") do
37
+ $options.reader = Bio::MAF::ThreadedChunkReader
38
+ end
39
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
40
+ require 'ruby-prof'
41
+ if pspec =~ /(\w+):(.+)/
42
+ $options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
43
+ $options.ruby_prof_path = $2
44
+ else
45
+ $options.ruby_prof_printer = Ruby_Prof::FlatPrinter
46
+ $options.ruby_prof_path = pspec
47
+ end
48
+ end
49
+ end
50
+
51
+ op.parse!(ARGV)
52
+
53
+ maf_p = ARGV.shift if $options.mode == :build
54
+ index_p = ARGV.shift
55
+
56
+ unless (maf_p || $options.mode == :dump) && index_p
57
+ $stderr.puts op
58
+ exit 1
59
+ end
60
+
61
+ if $options.ruby_prof_path
62
+ RubyProf.start
63
+ end
64
+
65
+ case $options.mode
66
+ when :build
67
+ if ! $options.bench
68
+ build_index(maf_p, index_p)
69
+ else
70
+ bm_res = Benchmark.measure do
71
+ build_index(maf_p, index_p)
72
+ end
73
+ puts bm_res
74
+ end
75
+ when :dump
76
+ idx = Bio::MAF::KyotoIndex.open(index_p)
77
+ idx.dump
78
+ else
79
+ raise "Unsupported mode: #{$options.mode}"
80
+ end
81
+
82
+ if $options.ruby_prof_path
83
+ res = RubyProf.stop
84
+ printer = $options.ruby_prof_printer.new(res)
85
+ File.open($options.ruby_prof_path, 'w') do |f|
86
+ printer.print(f)
87
+ end
88
+ end
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+ require 'bio-maf'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+ options.runs = 100_000
11
+ options.warmup = false
12
+
13
+ PRINTERS = {
14
+ 'flat' => :FlatPrinter,
15
+ 'stack' => :CallStackPrinter
16
+ }
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: maf_parse_bench [options] <maf>"
20
+ opts.separator ""
21
+ opts.separator "Options:"
22
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
23
+ options.prof = prof
24
+ end
25
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
26
+ if pspec =~ /(\w+):(.+)/
27
+ require 'ruby-prof'
28
+ options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
29
+ options.ruby_prof_path = $2
30
+ else
31
+ options.ruby_prof_printer = :FlatPrinter
32
+ options.ruby_prof_path = pspec
33
+ end
34
+ end
35
+ opts.on("--profile-gc", "Profile GC") do |prof|
36
+ options.profile_gc = true
37
+ end
38
+ opts.on("--parser PARSER", "parser") do |name|
39
+ options.parser = Bio::MAF.const_get(name)
40
+ end
41
+ opts.on("-w", "--warmup", "perform warmup run") do
42
+ options.warmup = true
43
+ end
44
+ end.parse!(ARGV)
45
+
46
+ src_path = ARGV.shift
47
+
48
+ if options.prof
49
+ require 'perftools'
50
+ PerfTools::CpuProfiler.start(options.prof)
51
+ elsif options.ruby_prof_path
52
+ require 'ruby-prof'
53
+ RubyProf.start
54
+ end
55
+
56
+ if options.profile_gc
57
+ GC::Profiler.enable
58
+ end
59
+
60
+ parser = options.parser.new(src_path)
61
+ parser.parse_block
62
+ parser.parse_block
63
+ pos = parser.s.pos
64
+
65
+ if options.warmup
66
+ options.runs.times do
67
+ parser.parse_block
68
+ parser.s.pos = pos
69
+ end
70
+ end
71
+
72
+ bm_res = Benchmark.measure do
73
+ options.runs.times do
74
+ parser.parse_block
75
+ parser.s.pos = pos
76
+ end
77
+ end
78
+
79
+ if options.profile_gc
80
+ $stderr.puts GC::Profiler.result
81
+ GC::Profiler.disable
82
+ end
83
+
84
+ if options.prof
85
+ PerfTools::CpuProfiler.stop
86
+ elsif options.ruby_prof_path
87
+ res = RubyProf.stop
88
+ printer = options.ruby_prof_printer.new(res)
89
+ File.open(options.ruby_prof_path, 'w') do |f|
90
+ printer.print(f)
91
+ end
92
+ end
93
+
94
+ puts bm_res / options.runs
data/bin/maf_to_fasta ADDED
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'bigbio'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
13
+ opts.separator ""
14
+ opts.separator "Options:"
15
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
16
+ options.prof = prof
17
+ end
18
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
19
+ options.ruby_prof = path
20
+ end
21
+ opts.on("--profile-gc", "Profile GC") do |prof|
22
+ options.profile_gc = true
23
+ end
24
+ opts.on("--parser PARSER", "parser") do |name|
25
+ options.parser = Bio::MAF.const_get(name)
26
+ end
27
+ end.parse!(ARGV)
28
+
29
+ src_path = ARGV.shift
30
+ dst_path = ARGV.shift
31
+
32
+ if options.prof
33
+ require 'perftools'
34
+ PerfTools::CpuProfiler.start(options.prof)
35
+ elsif options.ruby_prof
36
+ require 'ruby-prof'
37
+ RubyProf.start
38
+ end
39
+
40
+ if options.profile_gc
41
+ GC::Profiler.enable
42
+ end
43
+
44
+ parser = options.parser.new(src_path)
45
+ writer = FastaWriter.new(dst_path)
46
+
47
+ parser.each_block do |block|
48
+ block.each_raw_seq do |seq|
49
+ seq.write_fasta(writer)
50
+ end
51
+ end
52
+
53
+ writer.close
54
+
55
+ if options.profile_gc
56
+ $stderr.puts GC::Profiler.result
57
+ GC::Profiler.disable
58
+ end
59
+
60
+ if options.prof
61
+ PerfTools::CpuProfiler.stop
62
+ elsif options.ruby_prof
63
+ res = RubyProf.stop
64
+ printer = RubyProf::FlatPrinter.new(res)
65
+ File.open(options.ruby_prof, 'w') do |f|
66
+ printer.print(f)
67
+ end
68
+ end
data/bin/maf_write ADDED
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'optparse'
5
+ require 'ostruct'
6
+
7
+ options = OpenStruct.new
8
+ options.parser = Bio::MAF::Parser
9
+ options.opts = {
10
+ :chunk_reader => Bio::MAF::ChunkReader,
11
+ :parse_extended => false
12
+ }
13
+
14
+ PRINTERS = {
15
+ 'flat' => :FlatPrinter,
16
+ 'stack' => :CallStackPrinter
17
+ }
18
+
19
+ OptionParser.new do |opts|
20
+ opts.banner = "Usage: maf_write [options] <maf>"
21
+ opts.separator ""
22
+ opts.separator "Options:"
23
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
24
+ options.prof = prof
25
+ end
26
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
27
+ if pspec =~ /(\w+):(.+)/
28
+ require 'ruby-prof'
29
+ options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
30
+ options.ruby_prof_path = $2
31
+ else
32
+ options.ruby_prof_printer = RubyProf::FlatPrinter
33
+ options.ruby_prof_path = pspec
34
+ end
35
+ end
36
+ opts.on("--profile-gc", "Profile GC") do |prof|
37
+ options.profile_gc = true
38
+ end
39
+ opts.on("--parser PARSER", "parser") do |name|
40
+ options.parser = Bio::MAF.const_get(name)
41
+ end
42
+ opts.on("-t", "--threaded") do
43
+ options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
44
+ options.opts[:threads] = 1
45
+ end
46
+ opts.on("-e", "--extended") do
47
+ options.opts[:parse_extended] = true
48
+ options.opts[:parse_empty] = true
49
+ end
50
+ end.parse!(ARGV)
51
+
52
+ src_path = ARGV.shift
53
+
54
+ if options.prof
55
+ require 'perftools'
56
+ PerfTools::CpuProfiler.start(options.prof)
57
+ elsif options.ruby_prof_path
58
+ require 'ruby-prof'
59
+ RubyProf.start
60
+ end
61
+
62
+ if options.profile_gc
63
+ GC::Profiler.enable
64
+ end
65
+
66
+ parser = options.parser.new(src_path, options.opts)
67
+ writer = Bio::MAF::Writer.new($stdout)
68
+ writer.write_header(parser.header)
69
+ writer.write_blocks(parser.parse_blocks)
70
+
71
+ if options.profile_gc
72
+ $stderr.puts GC::Profiler.result
73
+ GC::Profiler.disable
74
+ end
75
+
76
+ if options.prof
77
+ PerfTools::CpuProfiler.stop
78
+ elsif options.ruby_prof_path
79
+ res = RubyProf.stop
80
+ printer = options.ruby_prof_printer.new(res)
81
+ File.open(options.ruby_prof_path, 'w') do |f|
82
+ printer.print(f)
83
+ end
84
+ end
data/bin/random_ranges ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+
6
+ options = OpenStruct.new
7
+
8
+ op = OptionParser.new do |opts|
9
+ opts.banner = "Usage: random_ranges [options]"
10
+ opts.on("-r", "--range START:END", "range") do |range|
11
+ s, e = range.split(':')
12
+ options.start = s.to_i
13
+ options.end = e.to_i
14
+ end
15
+ opts.on("-l", "--length LEN", "block length") do |len|
16
+ options.length = len.to_i
17
+ end
18
+ opts.on("-n", "--number NUM", "number of blocks") do |num|
19
+ options.num = num.to_i
20
+ end
21
+ opts.on("-s", "--sequence SEQ", "sequence") do |seq|
22
+ options.seq = seq
23
+ end
24
+ end.parse!(ARGV)
25
+
26
+ rand = Random.new
27
+ range = options.end - options.start
28
+ block_range = range / options.num
29
+ block_start_range = block_range - options.length
30
+ (0...options.num).each do |n|
31
+ block_offset = rand.rand(block_start_range)
32
+ b_start = options.start + (block_range * n) + block_offset
33
+ b_end = b_start + options.length
34
+ puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
35
+ end
@@ -0,0 +1,31 @@
1
+ @milestone_2
2
+ Feature: Indexed access to MAF files
3
+ In order to extract alignment blocks from MAF files
4
+ By chromosomal ranges matching a source sequence
5
+ I want to have a way to build indexes on MAF files
6
+ And use indexes to efficiently find alignment blocks
7
+ Because linear searches of a 200 GB file are impractical
8
+
9
+ Scenario: Index a MAF file
10
+ Given a MAF source file "mm8_chr7_tiny.maf"
11
+ When I open it with a MAF reader
12
+ And build an index on the reference sequence
13
+ Then the index has at least 8 entries
14
+
15
+ Scenario: Extract alignment blocks by chromosomal range
16
+ Given a MAF source file "mm8_chr7_tiny.maf"
17
+ When I open it with a MAF reader
18
+ And build an index on the reference sequence
19
+ And search for blocks between positions 80082592 and 80082766 of mm8.chr7
20
+ Then 2 blocks are obtained
21
+ And sequence mm8.chr7 of block 0 has start 80082592
22
+ And sequence mm8.chr7 of block 1 has start 80082713
23
+
24
+ Scenario: Extract alignment blocks by chromosomal range from index file
25
+ Given a MAF source file "mm8_chr7_tiny.maf"
26
+ And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
27
+ When I open it with a MAF reader
28
+ And search for blocks between positions 80082592 and 80082766 of mm8.chr7
29
+ Then 2 blocks are obtained
30
+ And sequence mm8.chr7 of block 0 has start 80082592
31
+ And sequence mm8.chr7 of block 1 has start 80082713
@@ -0,0 +1,29 @@
1
+ Feature: MAF output
2
+ In order to output modified MAF files or subsets of them
3
+ I want to be able to write out parsed MAF data
4
+
5
+ Scenario: Reproduce simple test data
6
+ Given a MAF source file "mm8_single.maf"
7
+ When I open it with a MAF reader
8
+ And open a new MAF writer
9
+ And write the header from the original MAF file
10
+ And write all the parsed blocks
11
+ Then the output should match, except whitespace, "mm8_single.maf"
12
+
13
+ Scenario: Reproduce longer test data
14
+ Given a MAF source file "mm8_chr7_tiny.maf"
15
+ When I open it with a MAF reader
16
+ And open a new MAF writer
17
+ And write the header from the original MAF file
18
+ And write all the parsed blocks
19
+ Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
20
+
21
+ Scenario: Reproduce test data with i, e, q lines
22
+ Given a MAF source file "chr22_ieq.maf"
23
+ When I enable the :parse_extended parser option
24
+ And I enable the :parse_empty parser option
25
+ And I open it with a MAF reader
26
+ And open a new MAF writer
27
+ And write the header from the original MAF file
28
+ And write all the parsed blocks
29
+ Then the output should match, except whitespace, "chr22_ieq.maf"
@@ -0,0 +1,44 @@
1
+ Feature: Parse MAF files
2
+ In order to extract information from a MAF file
3
+ I want to read it and pull out information
4
+
5
+ Scenario: Read MAF header
6
+ Given MAF data:
7
+ """
8
+ ##maf version=1 scoring=humor.v4
9
+ # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
10
+
11
+ a score=0.128
12
+ s human_hoxa 100 8 + 100257 ACA-TTACT
13
+ s horse_hoxa 120 9 - 98892 ACAATTGCT
14
+ s fugu_hoxa 88 7 + 90788 ACA--TGCT
15
+ """
16
+ When I open it with a MAF reader
17
+ Then the MAF version should be "1"
18
+ And the scoring scheme should be "humor.v4"
19
+ # third line a continuation
20
+ And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf"
21
+
22
+ Scenario: Read alignment block
23
+ Given MAF data:
24
+ """
25
+ ##maf version=1 scoring=humor.v4
26
+ # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
27
+ # /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
28
+
29
+ a score=0.128
30
+ s human_hoxa 100 8 + 100257 ACA-TTACT
31
+ s horse_hoxa 120 9 - 98892 ACAATTGCT
32
+ s fugu_hoxa 88 7 + 90788 ACA--TGCT
33
+ """
34
+ When I open it with a MAF reader
35
+ Then an alignment block can be obtained
36
+ And the alignment block has 3 sequences
37
+ And sequence 0 has source "human_hoxa"
38
+ And sequence 0 has start 100
39
+ And sequence 0 has size 8
40
+ And sequence 0 has strand :+
41
+ And sequence 0 has source size 100257
42
+ And sequence 0 has text "ACA-TTACT"
43
+ And sequence 1 has strand :-
44
+