bio-maf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/.document +5 -0
  2. data/.simplecov +1 -0
  3. data/.travis.yml +16 -0
  4. data/.yardopts +3 -0
  5. data/DEVELOPMENT.md +40 -0
  6. data/Gemfile +23 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +209 -0
  9. data/Rakefile +76 -0
  10. data/VERSION +1 -0
  11. data/benchmarks/dispatch_bench +53 -0
  12. data/benchmarks/iter_bench +44 -0
  13. data/benchmarks/read_bench +40 -0
  14. data/benchmarks/sort_bench +33 -0
  15. data/benchmarks/split_bench +33 -0
  16. data/bin/maf_count +82 -0
  17. data/bin/maf_dump_blocks +27 -0
  18. data/bin/maf_extract_ranges_count +44 -0
  19. data/bin/maf_index +88 -0
  20. data/bin/maf_parse_bench +94 -0
  21. data/bin/maf_to_fasta +68 -0
  22. data/bin/maf_write +84 -0
  23. data/bin/random_ranges +35 -0
  24. data/features/maf-indexing.feature +31 -0
  25. data/features/maf-output.feature +29 -0
  26. data/features/maf-parsing.feature +44 -0
  27. data/features/maf-querying.feature +75 -0
  28. data/features/maf-to-fasta.feature +50 -0
  29. data/features/step_definitions/convert_steps.rb +45 -0
  30. data/features/step_definitions/index_steps.rb +20 -0
  31. data/features/step_definitions/output_steps.rb +27 -0
  32. data/features/step_definitions/parse_steps.rb +63 -0
  33. data/features/step_definitions/query_steps.rb +31 -0
  34. data/features/step_definitions/ucsc_bin_steps.rb +14 -0
  35. data/features/support/env.rb +16 -0
  36. data/features/ucsc-bins.feature +24 -0
  37. data/lib/bio/maf/index.rb +620 -0
  38. data/lib/bio/maf/parser.rb +888 -0
  39. data/lib/bio/maf/struct.rb +63 -0
  40. data/lib/bio/maf/writer.rb +63 -0
  41. data/lib/bio/maf.rb +4 -0
  42. data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
  43. data/lib/bio/ucsc/ucsc_bin.rb +117 -0
  44. data/lib/bio/ucsc.rb +2 -0
  45. data/lib/bio-maf/maf.rb +3 -0
  46. data/lib/bio-maf.rb +12 -0
  47. data/man/.gitignore +1 -0
  48. data/man/maf_index.1 +105 -0
  49. data/man/maf_index.1.markdown +97 -0
  50. data/man/maf_index.1.ronn +83 -0
  51. data/man/maf_to_fasta.1 +53 -0
  52. data/man/maf_to_fasta.1.ronn +51 -0
  53. data/spec/bio/maf/index_spec.rb +363 -0
  54. data/spec/bio/maf/parser_spec.rb +354 -0
  55. data/spec/bio/maf/struct_spec.rb +75 -0
  56. data/spec/spec_helper.rb +14 -0
  57. data/test/data/big-block.maf +15999 -0
  58. data/test/data/chr22_ieq.maf +11 -0
  59. data/test/data/chrY-1block.maf +6 -0
  60. data/test/data/empty +0 -0
  61. data/test/data/empty.db +0 -0
  62. data/test/data/mm8_chr7_tiny.kct +0 -0
  63. data/test/data/mm8_chr7_tiny.maf +76 -0
  64. data/test/data/mm8_mod_a.maf +7 -0
  65. data/test/data/mm8_single.maf +13 -0
  66. data/test/data/mm8_subset_a.maf +23 -0
  67. data/test/data/t1-bad1.maf +15 -0
  68. data/test/data/t1.fasta +12 -0
  69. data/test/data/t1.maf +15 -0
  70. data/test/data/t1a.maf +17 -0
  71. data/test/helper.rb +18 -0
  72. data/test/test_bio-maf.rb +7 -0
  73. data/travis-ci/install_kc +13 -0
  74. data/travis-ci/install_kc_java +13 -0
  75. data/travis-ci/report_errors +4 -0
  76. metadata +181 -0
data/bin/maf_count ADDED
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'bigbio'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+ options.reader = Bio::MAF::ChunkReader
11
+
12
+ PRINTERS = {
13
+ 'flat' => :FlatPrinter,
14
+ 'stack' => :CallStackPrinter
15
+ }
16
+
17
+ OptionParser.new do |opts|
18
+ opts.banner = "Usage: maf_count [options] <maf>"
19
+ opts.separator ""
20
+ opts.separator "Options:"
21
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
22
+ options.prof = prof
23
+ end
24
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
25
+ if pspec =~ /(\w+):(.+)/
26
+ require 'ruby-prof'
27
+ options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
28
+ options.ruby_prof_path = $2
29
+ else
30
+ options.ruby_prof_printer = RubyProf::FlatPrinter
31
+ options.ruby_prof_path = pspec
32
+ end
33
+ end
34
+ opts.on("--profile-gc", "Profile GC") do |prof|
35
+ options.profile_gc = true
36
+ end
37
+ opts.on("--parser PARSER", "parser") do |name|
38
+ options.parser = Bio::MAF.const_get(name)
39
+ end
40
+ opts.on("-t", "--threaded") do
41
+ options.reader = Bio::MAF::ThreadedChunkReader
42
+ end
43
+ end.parse!(ARGV)
44
+
45
+ src_path = ARGV.shift
46
+
47
+ if options.prof
48
+ require 'perftools'
49
+ PerfTools::CpuProfiler.start(options.prof)
50
+ elsif options.ruby_prof_path
51
+ require 'ruby-prof'
52
+ RubyProf.start
53
+ end
54
+
55
+ if options.profile_gc
56
+ GC::Profiler.enable
57
+ end
58
+
59
+ parser = options.parser.new(src_path,
60
+ :chunk_reader => options.reader,
61
+ :parse_extended => false)
62
+
63
+ n = 0
64
+ parser.each_block do |block|
65
+ n += 1
66
+ end
67
+ puts "Parsed #{n} MAF alignment blocks."
68
+
69
+ if options.profile_gc
70
+ $stderr.puts GC::Profiler.result
71
+ GC::Profiler.disable
72
+ end
73
+
74
+ if options.prof
75
+ PerfTools::CpuProfiler.stop
76
+ elsif options.ruby_prof_path
77
+ res = RubyProf.stop
78
+ printer = options.ruby_prof_printer.new(res)
79
+ File.open(options.ruby_prof_path, 'w') do |f|
80
+ printer.print(f)
81
+ end
82
+ end
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'bigbio'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: maf_dump_blocks [options] <maf>"
13
+ opts.separator ""
14
+ opts.separator "Options:"
15
+ opts.on("--parser PARSER", "parser") do |name|
16
+ options.parser = Bio::MAF.const_get(name)
17
+ end
18
+ end.parse!(ARGV)
19
+
20
+ src_path = ARGV.shift
21
+
22
+ parser = options.parser.new(src_path)
23
+
24
+ parser.each_block do |block|
25
+ $stdout.printf("%12d\t%7d\n", block.offset, block.size)
26
+ end
27
+
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+
6
+ require 'bio-maf'
7
+ require 'bio-genomic-interval'
8
+
9
+ options = OpenStruct.new
10
+ options.p = { :threads => 1 }
11
+ options.passes = 1
12
+
13
+ OptionParser.new do |opts|
14
+ opts.banner = "Usage: maf_extract_ranges_count [options] <maf> <index>"
15
+ opts.separator ""
16
+ opts.separator "Options:"
17
+ opts.on("-t", "--threads N", "Parser threads") do |n|
18
+ options.p[:threads] = n.to_i
19
+ end
20
+ opts.on("-p", "--passes N", "Number of passes") do |n|
21
+ options.passes = n.to_i
22
+ end
23
+ end.parse!(ARGV)
24
+
25
+ maf_p = ARGV.shift
26
+ index_p = ARGV.shift
27
+
28
+ parser = Bio::MAF::Parser.new(maf_p, options.p)
29
+ index = Bio::MAF::KyotoIndex.open(index_p)
30
+
31
+ def parse_interval(line)
32
+ src, r_start_s, r_end_s, _ = line.split(nil, 4)
33
+ r_start = r_start_s.to_i
34
+ r_end = r_end_s.to_i
35
+ return Bio::GenomicInterval.zero_based(src, r_start, r_end)
36
+ end
37
+
38
+ intervals = []
39
+ $stdin.each_line { |line| intervals << parse_interval(line) }
40
+
41
+ options.passes.times do
42
+ blocks = index.find(intervals, parser)
43
+ puts "TOTAL: #{blocks.count} blocks parsed."
44
+ end
data/bin/maf_index ADDED
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+ require 'bio-maf'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ PRINTERS = {
9
+ 'flat' => :FlatPrinter,
10
+ 'stack' => :CallStackPrinter,
11
+ 'graph' => :GraphHtmlPrinter
12
+ }
13
+
14
+ $options = OpenStruct.new
15
+ $options.mode = :build
16
+ $options.reader = Bio::MAF::ChunkReader
17
+
18
+ def build_index(maf, index)
19
+ parser = Bio::MAF::Parser.new(maf,
20
+ :chunk_reader => $options.reader,
21
+ :parse_extended => false)
22
+ idx = Bio::MAF::KyotoIndex.build(parser, index)
23
+ idx.close
24
+ end
25
+
26
+ op = OptionParser.new do |opts|
27
+ opts.banner = "Usage: maf_index [options] <maf> <index>"
28
+ #opts.separator ""
29
+ #opts.separator "Options:"
30
+ opts.on("--time", "print elapsed time") do
31
+ $options.bench = true
32
+ end
33
+ opts.on("-d", "--dump") do
34
+ $options.mode = :dump
35
+ end
36
+ opts.on("-t", "--threaded") do
37
+ $options.reader = Bio::MAF::ThreadedChunkReader
38
+ end
39
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
40
+ require 'ruby-prof'
41
+ if pspec =~ /(\w+):(.+)/
42
+ $options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
43
+ $options.ruby_prof_path = $2
44
+ else
45
+ $options.ruby_prof_printer = Ruby_Prof::FlatPrinter
46
+ $options.ruby_prof_path = pspec
47
+ end
48
+ end
49
+ end
50
+
51
+ op.parse!(ARGV)
52
+
53
+ maf_p = ARGV.shift if $options.mode == :build
54
+ index_p = ARGV.shift
55
+
56
+ unless (maf_p || $options.mode == :dump) && index_p
57
+ $stderr.puts op
58
+ exit 1
59
+ end
60
+
61
+ if $options.ruby_prof_path
62
+ RubyProf.start
63
+ end
64
+
65
+ case $options.mode
66
+ when :build
67
+ if ! $options.bench
68
+ build_index(maf_p, index_p)
69
+ else
70
+ bm_res = Benchmark.measure do
71
+ build_index(maf_p, index_p)
72
+ end
73
+ puts bm_res
74
+ end
75
+ when :dump
76
+ idx = Bio::MAF::KyotoIndex.open(index_p)
77
+ idx.dump
78
+ else
79
+ raise "Unsupported mode: #{$options.mode}"
80
+ end
81
+
82
+ if $options.ruby_prof_path
83
+ res = RubyProf.stop
84
+ printer = $options.ruby_prof_printer.new(res)
85
+ File.open($options.ruby_prof_path, 'w') do |f|
86
+ printer.print(f)
87
+ end
88
+ end
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'benchmark'
4
+ require 'bio-maf'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+ options.runs = 100_000
11
+ options.warmup = false
12
+
13
+ PRINTERS = {
14
+ 'flat' => :FlatPrinter,
15
+ 'stack' => :CallStackPrinter
16
+ }
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: maf_parse_bench [options] <maf>"
20
+ opts.separator ""
21
+ opts.separator "Options:"
22
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
23
+ options.prof = prof
24
+ end
25
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
26
+ if pspec =~ /(\w+):(.+)/
27
+ require 'ruby-prof'
28
+ options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
29
+ options.ruby_prof_path = $2
30
+ else
31
+ options.ruby_prof_printer = :FlatPrinter
32
+ options.ruby_prof_path = pspec
33
+ end
34
+ end
35
+ opts.on("--profile-gc", "Profile GC") do |prof|
36
+ options.profile_gc = true
37
+ end
38
+ opts.on("--parser PARSER", "parser") do |name|
39
+ options.parser = Bio::MAF.const_get(name)
40
+ end
41
+ opts.on("-w", "--warmup", "perform warmup run") do
42
+ options.warmup = true
43
+ end
44
+ end.parse!(ARGV)
45
+
46
+ src_path = ARGV.shift
47
+
48
+ if options.prof
49
+ require 'perftools'
50
+ PerfTools::CpuProfiler.start(options.prof)
51
+ elsif options.ruby_prof_path
52
+ require 'ruby-prof'
53
+ RubyProf.start
54
+ end
55
+
56
+ if options.profile_gc
57
+ GC::Profiler.enable
58
+ end
59
+
60
+ parser = options.parser.new(src_path)
61
+ parser.parse_block
62
+ parser.parse_block
63
+ pos = parser.s.pos
64
+
65
+ if options.warmup
66
+ options.runs.times do
67
+ parser.parse_block
68
+ parser.s.pos = pos
69
+ end
70
+ end
71
+
72
+ bm_res = Benchmark.measure do
73
+ options.runs.times do
74
+ parser.parse_block
75
+ parser.s.pos = pos
76
+ end
77
+ end
78
+
79
+ if options.profile_gc
80
+ $stderr.puts GC::Profiler.result
81
+ GC::Profiler.disable
82
+ end
83
+
84
+ if options.prof
85
+ PerfTools::CpuProfiler.stop
86
+ elsif options.ruby_prof_path
87
+ res = RubyProf.stop
88
+ printer = options.ruby_prof_printer.new(res)
89
+ File.open(options.ruby_prof_path, 'w') do |f|
90
+ printer.print(f)
91
+ end
92
+ end
93
+
94
+ puts bm_res / options.runs
data/bin/maf_to_fasta ADDED
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'bigbio'
5
+ require 'optparse'
6
+ require 'ostruct'
7
+
8
+ options = OpenStruct.new
9
+ options.parser = Bio::MAF::Parser
10
+
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
13
+ opts.separator ""
14
+ opts.separator "Options:"
15
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
16
+ options.prof = prof
17
+ end
18
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
19
+ options.ruby_prof = path
20
+ end
21
+ opts.on("--profile-gc", "Profile GC") do |prof|
22
+ options.profile_gc = true
23
+ end
24
+ opts.on("--parser PARSER", "parser") do |name|
25
+ options.parser = Bio::MAF.const_get(name)
26
+ end
27
+ end.parse!(ARGV)
28
+
29
+ src_path = ARGV.shift
30
+ dst_path = ARGV.shift
31
+
32
+ if options.prof
33
+ require 'perftools'
34
+ PerfTools::CpuProfiler.start(options.prof)
35
+ elsif options.ruby_prof
36
+ require 'ruby-prof'
37
+ RubyProf.start
38
+ end
39
+
40
+ if options.profile_gc
41
+ GC::Profiler.enable
42
+ end
43
+
44
+ parser = options.parser.new(src_path)
45
+ writer = FastaWriter.new(dst_path)
46
+
47
+ parser.each_block do |block|
48
+ block.each_raw_seq do |seq|
49
+ seq.write_fasta(writer)
50
+ end
51
+ end
52
+
53
+ writer.close
54
+
55
+ if options.profile_gc
56
+ $stderr.puts GC::Profiler.result
57
+ GC::Profiler.disable
58
+ end
59
+
60
+ if options.prof
61
+ PerfTools::CpuProfiler.stop
62
+ elsif options.ruby_prof
63
+ res = RubyProf.stop
64
+ printer = RubyProf::FlatPrinter.new(res)
65
+ File.open(options.ruby_prof, 'w') do |f|
66
+ printer.print(f)
67
+ end
68
+ end
data/bin/maf_write ADDED
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio-maf'
4
+ require 'optparse'
5
+ require 'ostruct'
6
+
7
+ options = OpenStruct.new
8
+ options.parser = Bio::MAF::Parser
9
+ options.opts = {
10
+ :chunk_reader => Bio::MAF::ChunkReader,
11
+ :parse_extended => false
12
+ }
13
+
14
+ PRINTERS = {
15
+ 'flat' => :FlatPrinter,
16
+ 'stack' => :CallStackPrinter
17
+ }
18
+
19
+ OptionParser.new do |opts|
20
+ opts.banner = "Usage: maf_write [options] <maf>"
21
+ opts.separator ""
22
+ opts.separator "Options:"
23
+ opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
24
+ options.prof = prof
25
+ end
26
+ opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
27
+ if pspec =~ /(\w+):(.+)/
28
+ require 'ruby-prof'
29
+ options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
30
+ options.ruby_prof_path = $2
31
+ else
32
+ options.ruby_prof_printer = RubyProf::FlatPrinter
33
+ options.ruby_prof_path = pspec
34
+ end
35
+ end
36
+ opts.on("--profile-gc", "Profile GC") do |prof|
37
+ options.profile_gc = true
38
+ end
39
+ opts.on("--parser PARSER", "parser") do |name|
40
+ options.parser = Bio::MAF.const_get(name)
41
+ end
42
+ opts.on("-t", "--threaded") do
43
+ options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
44
+ options.opts[:threads] = 1
45
+ end
46
+ opts.on("-e", "--extended") do
47
+ options.opts[:parse_extended] = true
48
+ options.opts[:parse_empty] = true
49
+ end
50
+ end.parse!(ARGV)
51
+
52
+ src_path = ARGV.shift
53
+
54
+ if options.prof
55
+ require 'perftools'
56
+ PerfTools::CpuProfiler.start(options.prof)
57
+ elsif options.ruby_prof_path
58
+ require 'ruby-prof'
59
+ RubyProf.start
60
+ end
61
+
62
+ if options.profile_gc
63
+ GC::Profiler.enable
64
+ end
65
+
66
+ parser = options.parser.new(src_path, options.opts)
67
+ writer = Bio::MAF::Writer.new($stdout)
68
+ writer.write_header(parser.header)
69
+ writer.write_blocks(parser.parse_blocks)
70
+
71
+ if options.profile_gc
72
+ $stderr.puts GC::Profiler.result
73
+ GC::Profiler.disable
74
+ end
75
+
76
+ if options.prof
77
+ PerfTools::CpuProfiler.stop
78
+ elsif options.ruby_prof_path
79
+ res = RubyProf.stop
80
+ printer = options.ruby_prof_printer.new(res)
81
+ File.open(options.ruby_prof_path, 'w') do |f|
82
+ printer.print(f)
83
+ end
84
+ end
data/bin/random_ranges ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+
6
+ options = OpenStruct.new
7
+
8
+ op = OptionParser.new do |opts|
9
+ opts.banner = "Usage: random_ranges [options]"
10
+ opts.on("-r", "--range START:END", "range") do |range|
11
+ s, e = range.split(':')
12
+ options.start = s.to_i
13
+ options.end = e.to_i
14
+ end
15
+ opts.on("-l", "--length LEN", "block length") do |len|
16
+ options.length = len.to_i
17
+ end
18
+ opts.on("-n", "--number NUM", "number of blocks") do |num|
19
+ options.num = num.to_i
20
+ end
21
+ opts.on("-s", "--sequence SEQ", "sequence") do |seq|
22
+ options.seq = seq
23
+ end
24
+ end.parse!(ARGV)
25
+
26
+ rand = Random.new
27
+ range = options.end - options.start
28
+ block_range = range / options.num
29
+ block_start_range = block_range - options.length
30
+ (0...options.num).each do |n|
31
+ block_offset = rand.rand(block_start_range)
32
+ b_start = options.start + (block_range * n) + block_offset
33
+ b_end = b_start + options.length
34
+ puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
35
+ end
@@ -0,0 +1,31 @@
1
+ @milestone_2
2
+ Feature: Indexed access to MAF files
3
+ In order to extract alignment blocks from MAF files
4
+ By chromosomal ranges matching a source sequence
5
+ I want to have a way to build indexes on MAF files
6
+ And use indexes to efficiently find alignment blocks
7
+ Because linear searches of a 200 GB file are impractical
8
+
9
+ Scenario: Index a MAF file
10
+ Given a MAF source file "mm8_chr7_tiny.maf"
11
+ When I open it with a MAF reader
12
+ And build an index on the reference sequence
13
+ Then the index has at least 8 entries
14
+
15
+ Scenario: Extract alignment blocks by chromosomal range
16
+ Given a MAF source file "mm8_chr7_tiny.maf"
17
+ When I open it with a MAF reader
18
+ And build an index on the reference sequence
19
+ And search for blocks between positions 80082592 and 80082766 of mm8.chr7
20
+ Then 2 blocks are obtained
21
+ And sequence mm8.chr7 of block 0 has start 80082592
22
+ And sequence mm8.chr7 of block 1 has start 80082713
23
+
24
+ Scenario: Extract alignment blocks by chromosomal range from index file
25
+ Given a MAF source file "mm8_chr7_tiny.maf"
26
+ And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
27
+ When I open it with a MAF reader
28
+ And search for blocks between positions 80082592 and 80082766 of mm8.chr7
29
+ Then 2 blocks are obtained
30
+ And sequence mm8.chr7 of block 0 has start 80082592
31
+ And sequence mm8.chr7 of block 1 has start 80082713
@@ -0,0 +1,29 @@
1
+ Feature: MAF output
2
+ In order to output modified MAF files or subsets of them
3
+ I want to be able to write out parsed MAF data
4
+
5
+ Scenario: Reproduce simple test data
6
+ Given a MAF source file "mm8_single.maf"
7
+ When I open it with a MAF reader
8
+ And open a new MAF writer
9
+ And write the header from the original MAF file
10
+ And write all the parsed blocks
11
+ Then the output should match, except whitespace, "mm8_single.maf"
12
+
13
+ Scenario: Reproduce longer test data
14
+ Given a MAF source file "mm8_chr7_tiny.maf"
15
+ When I open it with a MAF reader
16
+ And open a new MAF writer
17
+ And write the header from the original MAF file
18
+ And write all the parsed blocks
19
+ Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
20
+
21
+ Scenario: Reproduce test data with i, e, q lines
22
+ Given a MAF source file "chr22_ieq.maf"
23
+ When I enable the :parse_extended parser option
24
+ And I enable the :parse_empty parser option
25
+ And I open it with a MAF reader
26
+ And open a new MAF writer
27
+ And write the header from the original MAF file
28
+ And write all the parsed blocks
29
+ Then the output should match, except whitespace, "chr22_ieq.maf"
@@ -0,0 +1,44 @@
1
+ Feature: Parse MAF files
2
+ In order to extract information from a MAF file
3
+ I want to read it and pull out information
4
+
5
+ Scenario: Read MAF header
6
+ Given MAF data:
7
+ """
8
+ ##maf version=1 scoring=humor.v4
9
+ # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
10
+
11
+ a score=0.128
12
+ s human_hoxa 100 8 + 100257 ACA-TTACT
13
+ s horse_hoxa 120 9 - 98892 ACAATTGCT
14
+ s fugu_hoxa 88 7 + 90788 ACA--TGCT
15
+ """
16
+ When I open it with a MAF reader
17
+ Then the MAF version should be "1"
18
+ And the scoring scheme should be "humor.v4"
19
+ # third line a continuation
20
+ And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf"
21
+
22
+ Scenario: Read alignment block
23
+ Given MAF data:
24
+ """
25
+ ##maf version=1 scoring=humor.v4
26
+ # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
27
+ # /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
28
+
29
+ a score=0.128
30
+ s human_hoxa 100 8 + 100257 ACA-TTACT
31
+ s horse_hoxa 120 9 - 98892 ACAATTGCT
32
+ s fugu_hoxa 88 7 + 90788 ACA--TGCT
33
+ """
34
+ When I open it with a MAF reader
35
+ Then an alignment block can be obtained
36
+ And the alignment block has 3 sequences
37
+ And sequence 0 has source "human_hoxa"
38
+ And sequence 0 has start 100
39
+ And sequence 0 has size 8
40
+ And sequence 0 has strand :+
41
+ And sequence 0 has source size 100257
42
+ And sequence 0 has text "ACA-TTACT"
43
+ And sequence 1 has strand :-
44
+