bio-maf 1.0.0-java → 1.0.1-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
3
4
  require 'ostruct'
4
5
 
5
6
  require 'bio-maf'
@@ -8,6 +9,9 @@ require 'bio-bgzf'
8
9
  $options = OpenStruct.new
9
10
  $options.dir = '.'
10
11
  $options.ref_only = true
12
+ $options.n_jobs = 1
13
+ $options.force = false
14
+ $options.level = 2
11
15
 
12
16
  op = OptionParser.new do |opts|
13
17
  opts.banner = "Usage: maf_bgzip [options] [<maf> ...]"
@@ -26,31 +30,155 @@ op = OptionParser.new do |opts|
26
30
  "(has no effect without --index)") do
27
31
  $options.ref_only = false
28
32
  end
33
+ opts.on("-l", "--level LEVEL", Integer,
34
+ "gzip compression level for BGZF (1-9)") do |level|
35
+ unless 1 <= level && level <= 9
36
+ $stderr.puts "Invalid compression level: #{level}"
37
+ $stderr.puts opts
38
+ exit 2
39
+ end
40
+ $options.level = level
41
+ end
42
+ opts.on("-f", "--force",
43
+ "Replace output files if they already exist") do
44
+ $options.force = true
45
+ end
46
+ opts.on("-j", "--jobs N", Integer,
47
+ "Run N concurrent jobs (default 1)") do |n|
48
+ $options.n_jobs = n
49
+ end
50
+ Bio::MAF::handle_logging_options(opts)
29
51
  end
30
52
 
31
53
  op.parse!(ARGV)
54
+ Bio::Log::CLI.configure('bio-maf')
32
55
 
33
- until ARGV.empty?
34
- maf_path = ARGV.shift
35
- maf_base = File.basename(maf_path)
56
+ INTERVAL = 10
57
+ LOG = Bio::MAF::LOG
58
+
59
+ def make_processing_task(maf)
60
+ maf_base = File.basename(maf)
36
61
  base = maf_base.gsub(/\.maf.*/, '')
37
62
  bgz_path = "#{$options.dir}/#{base}.maf.bgz"
63
+ if File.exist?(bgz_path) && ! $options.force
64
+ LOG.error "#{bgz_path} already exists, refusing to overwrite " \
65
+ "without --force!"
66
+ exit 1
67
+ end
68
+ idx_path = nil
69
+ if $options.index
70
+ idx_path = "#{$options.dir}/#{base}.kct"
71
+ if File.exist?(idx_path) && ! $options.force
72
+ LOG.error "#{idx_path} already exists, refusing to overwrite " \
73
+ "without --force!"
74
+ exit 1
75
+ end
76
+ end
77
+ lambda { process_maf(maf, bgz_path, idx_path) }
78
+ end
79
+
80
+ def process_maf(maf_path, bgz_path, idx_path)
81
+ maf_base = File.basename(maf_path)
82
+ LOG.debug { "Processing #{maf_base}." }
38
83
  p = Bio::MAF::Parser.new(maf_path,
39
- :parse_extended => true,
40
- :parse_empty => true)
41
- File.open(bgz_path, 'w') do |out_f|
42
- Bio::BGZF::Writer.new(out_f) do |bgz_w|
84
+ :retain_text => true)
85
+ if idx_path
86
+ if File.exists?(idx_path)
87
+ File.unlink(idx_path)
88
+ end
89
+ idx = Bio::MAF::KyotoIndex.new(idx_path)
90
+ idx.prep(bgz_path, :bgzf, $options.ref_only)
91
+ exec = Bio::MAF::Executor.create
92
+ end
93
+ start_t = Time.now
94
+ last_t = start_t
95
+ last_pos = 0
96
+ n_blocks = 0
97
+ maf_size = File.size(maf_path)
98
+ File.open(bgz_path, 'wb') do |out_f|
99
+ Bio::BGZF::Writer.new(out_f, $options.level) do |bgz_w|
43
100
  maf_w = Bio::MAF::Writer.new(bgz_w)
44
101
  maf_w.write_header(p.header)
45
102
  p.each_block do |block|
46
- maf_w.write_block(block)
103
+ bgz_w.write(block.orig_text)
104
+ if idx
105
+ block.offset = bgz_w.last_write_pos
106
+ exec.submit do
107
+ idx.index_blocks([block])
108
+ end
109
+ end
110
+ n_blocks += 1
111
+ if n_blocks % 100 == 0
112
+ cur_t = Time.now
113
+ delta_t = cur_t - last_t
114
+ if delta_t > INTERVAL
115
+ cur_pos = p.phys_f.tell
116
+ LOG.debug {
117
+ pos_mb = cur_pos.to_f / 1048576
118
+ delta_bytes = cur_pos - last_pos
119
+ rate = delta_bytes.to_f / delta_t
120
+ mb_rate = rate / 1048576
121
+ pct = cur_pos.to_f / maf_size * 100
122
+ elapsed = cur_t - start_t
123
+ sprintf("%s: processed %.1f MB (%.1f%%) in %ds, %.2f MB/s.",
124
+ maf_base,
125
+ pos_mb,
126
+ pct,
127
+ elapsed,
128
+ mb_rate)
129
+ }
130
+ last_t = cur_t
131
+ last_pos = cur_pos
132
+ end
133
+ end
47
134
  end
48
135
  end
49
136
  end
137
+ unc = p.f.tell if p.f != p.phys_f
50
138
  p.close
51
- if $options.index
52
- p2 = Bio::MAF::Parser.new(bgz_path)
53
- idx_path = "#{$options.dir}/#{base}.kct"
54
- Bio::MAF::KyotoIndex.build(p2, idx_path, $options.ref_only)
139
+ if idx
140
+ exec.shutdown
141
+ idx.db.synchronize(true)
142
+ end
143
+ elapsed = Time.now - start_t
144
+ mb = maf_size.to_f / 1048576
145
+ mb_rate = mb / elapsed
146
+ LOG.info { sprintf("Processed %s (%.1f MB) in %ds, %.2f MB/s",
147
+ maf_base,
148
+ mb,
149
+ elapsed,
150
+ mb_rate) }
151
+ if unc
152
+ LOG.info {
153
+ unc_mb = unc / 1048576
154
+ unc_rate = unc_mb / elapsed
155
+ sprintf(" Uncompressed: %.1f MB, %.2f MB/s",
156
+ unc_mb, unc_rate)
157
+ }
55
158
  end
159
+ LOG.info {
160
+ raw_size = unc || maf_size
161
+ avg_block_kb = raw_size.to_f / n_blocks / 1024
162
+ sprintf(" %d alignment blocks, average size %.2f KB",
163
+ n_blocks, avg_block_kb)
164
+ }
165
+ LOG.info {
166
+ orig_size = unc ? unc : maf_size
167
+ bgzf_size = File.size(bgz_path).to_f
168
+ ratio = bgzf_size / orig_size
169
+ sprintf(" Compressed with BGZF (level=%d) to %.1f MB (%.1fx)",
170
+ $options.level,
171
+ bgzf_size / 1048576,
172
+ ratio)
173
+ }
174
+ end
175
+
176
+ runner = Bio::MAF::JobRunner.create($options.n_jobs)
177
+ LOG.debug "Created #{runner.class} set for #{$options.n_jobs} concurrent jobs."
178
+ ARGV.each do |maf|
179
+ task = make_processing_task(maf)
180
+ runner.add(&task)
56
181
  end
182
+ LOG.debug "Running jobs."
183
+ runner.run
184
+ LOG.debug "Finished processing."
@@ -6,12 +6,13 @@ require 'ostruct'
6
6
 
7
7
  include Bio::MAF
8
8
 
9
- options = OpenStruct.new
10
- options.mode = :intersect
11
- options.format = :maf
12
- options.seq_filter = {}
13
- options.block_filter = {}
14
- options.parse_options = {}
9
+ $options = OpenStruct.new
10
+ $options.mode = :intersect
11
+ $options.format = :maf
12
+ $options.one_based = false
13
+ $options.seq_filter = {}
14
+ $options.block_filter = {}
15
+ $options.parse_options = {}
15
16
 
16
17
  def handle_list_spec(spec)
17
18
  if spec =~ /^@(.+)/
@@ -23,7 +24,11 @@ end
23
24
 
24
25
  def handle_interval_spec(int)
25
26
  if int =~ /(.+):(\d+)-(\d+)/
26
- Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i)
27
+ if $options.one_based
28
+ Bio::GenomicInterval.new($1, $2.to_i, $3.to_i)
29
+ else
30
+ Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i)
31
+ end
27
32
  else
28
33
  raise "Invalid interval specification: #{int}"
29
34
  end
@@ -34,13 +39,13 @@ $op = OptionParser.new do |opts|
34
39
  opts.separator ""
35
40
  opts.separator "MAF source options (either --maf or --maf-dir must be given):"
36
41
  opts.on("-m", "--maf MAF", "MAF file") do |maf|
37
- options.maf = maf
42
+ $options.maf = maf
38
43
  end
39
44
  opts.on("-i", "--index INDEX", "MAF index") do |idx|
40
- options.idx = idx
45
+ $options.idx = idx
41
46
  end
42
47
  opts.on("-d", "--maf-dir DIR", "MAF directory") do |dir|
43
- options.maf_dir = dir
48
+ $options.maf_dir = dir
44
49
  end
45
50
  opts.separator ""
46
51
  opts.separator "Extraction options:"
@@ -49,21 +54,26 @@ $op = OptionParser.new do |opts|
49
54
  "blocks intersecting the given region,",
50
55
  "or 'slice' to extract subsets covering ",
51
56
  "given regions") do |mode|
52
- options.mode = mode
57
+ $options.mode = mode
53
58
  end
54
59
  opts.on("--bed BED", "Use intervals from the given BED file") do |bed|
55
- options.bed = bed
60
+ $options.bed = bed
56
61
  end
57
62
  opts.on("--interval SEQ:START:END", "Zero-based genomic interval to match") do |int|
58
- options.interval = handle_interval_spec(int)
63
+ $options.interval = handle_interval_spec(int)
64
+ end
65
+ opts.on("--one-based",
66
+ "Treat all intervals as one-based",
67
+ "(even from BED files, contrary to the standard)") do
68
+ $options.one_based = true
59
69
  end
60
70
  opts.separator ""
61
71
  opts.separator "Output options:"
62
72
  opts.on("-f", "--format FMT", [:maf, :fasta], "Output format") do |fmt|
63
- options.format = fmt
73
+ $options.format = fmt
64
74
  end
65
75
  opts.on("-o", "--output OUT", "Write output to file OUT") do |out|
66
- options.out_path = out
76
+ $options.out_path = out
67
77
  end
68
78
  opts.separator ""
69
79
  opts.separator "Filtering options:"
@@ -71,41 +81,41 @@ $op = OptionParser.new do |opts|
71
81
  "Filter out all but the species in the",
72
82
  "given comma-separated list",
73
83
  "(or @FILE to read from a file)") do |spec|
74
- options.seq_filter[:only_species] = handle_list_spec(spec)
84
+ $options.seq_filter[:only_species] = handle_list_spec(spec)
75
85
  end
76
86
  opts.on("--with-all-species SPECIES",
77
87
  "Only match blocks with all the given",
78
88
  "species, comma-separated",
79
89
  "(or @FILE to read from a file)") do |spec|
80
- options.block_filter[:with_all_species] = handle_list_spec(spec)
90
+ $options.block_filter[:with_all_species] = handle_list_spec(spec)
81
91
  end
82
92
  opts.on("--min-sequences N", Integer,
83
93
  "Match only blocks with at least N sequences") do |n|
84
- options.block_filter[:at_least_n_sequences] = n
94
+ $options.block_filter[:at_least_n_sequences] = n
85
95
  end
86
96
  opts.on("--min-text-size N", Integer,
87
97
  "Match only blocks with minimum text size N") do |n|
88
- options.block_filter[:min_size] = n
98
+ $options.block_filter[:min_size] = n
89
99
  end
90
100
  opts.on("--max-text-size N", Integer,
91
101
  "Match only blocks with maximum text size N") do |n|
92
- options.block_filter[:max_size] = n
102
+ $options.block_filter[:max_size] = n
93
103
  end
94
104
  opts.separator ""
95
105
  opts.separator "Block processing options:"
96
106
  opts.on("--join-blocks",
97
107
  "Join blocks if appropriate after filtering",
98
108
  "out sequences") do
99
- options.parse_options[:join_blocks] = true
109
+ $options.parse_options[:join_blocks] = true
100
110
  end
101
111
  opts.on("--remove-gaps", "Remove gaps after filtering out sequences") do
102
- options.parse_options[:remove_gaps] = true
112
+ $options.parse_options[:remove_gaps] = true
103
113
  end
104
114
  opts.on("--parse-extended", "Parse 'extended' MAF data (i, q lines)") do
105
- options.parse_options[:parse_extended] = true
115
+ $options.parse_options[:parse_extended] = true
106
116
  end
107
117
  opts.on("--parse-empty", "Parse empty (e) lines of MAF data") do
108
- options.parse_options[:parse_empty] = true
118
+ $options.parse_options[:parse_empty] = true
109
119
  end
110
120
  opts.separator ""
111
121
  opts.separator "Logging options:"
@@ -120,24 +130,24 @@ def usage(msg)
120
130
  exit 2
121
131
  end
122
132
 
123
- if options.maf
124
- access = Access.file(options.maf, options.idx, options.parse_options)
125
- elsif options.maf_dir
126
- access = Access.maf_dir(options.maf_dir, options.parse_options)
133
+ if $options.maf
134
+ access = Access.file($options.maf, $options.idx, $options.parse_options)
135
+ elsif $options.maf_dir
136
+ access = Access.maf_dir($options.maf_dir, $options.parse_options)
127
137
  else
128
138
  usage "Must supply --maf or --maf-dir!"
129
139
  end
130
140
 
131
141
  begin
132
- access.sequence_filter = options.seq_filter unless options.seq_filter.empty?
133
- access.block_filter = options.block_filter unless options.block_filter.empty?
134
- if options.out_path
135
- outf = File.open(options.out_path, 'w')
142
+ access.sequence_filter = $options.seq_filter unless $options.seq_filter.empty?
143
+ access.block_filter = $options.block_filter unless $options.block_filter.empty?
144
+ if $options.out_path
145
+ outf = File.open($options.out_path, 'w')
136
146
  else
137
147
  outf = $stdout
138
148
  end
139
149
 
140
- case options.format
150
+ case $options.format
141
151
  when :maf
142
152
  writer = Writer.new(outf)
143
153
  when :fasta
@@ -146,20 +156,20 @@ begin
146
156
  raise "unsupported output format #{format}!"
147
157
  end
148
158
 
149
- if options.bed
150
- intervals = read_bed_intervals(options.bed)
151
- elsif options.interval
152
- intervals = [options.interval]
159
+ if $options.bed
160
+ intervals = read_bed_intervals($options.bed)
161
+ elsif $options.interval
162
+ intervals = [$options.interval]
153
163
  else
154
164
  usage "Must supply --interval or --bed!"
155
165
  end
156
166
 
157
167
  # TODO: provide access to original MAF header?
158
- if options.format == :maf
168
+ if $options.format == :maf
159
169
  writer.write_header(Header.default)
160
170
  end
161
171
 
162
- case options.mode
172
+ case $options.mode
163
173
  when :intersect
164
174
  access.find(intervals) do |block|
165
175
  writer.write_block(block)
@@ -172,7 +182,7 @@ begin
172
182
  end
173
183
  end
174
184
  else
175
- raise "Unsupported mode #{options.mode}!"
185
+ raise "Unsupported mode #{$options.mode}!"
176
186
  end
177
187
 
178
188
  ensure
@@ -14,10 +14,10 @@ PRINTERS = {
14
14
  $options = OpenStruct.new
15
15
  $options.mode = :build
16
16
  $options.ref_only = true
17
+ $options.parser_opts = { :parse_extended => false }
17
18
 
18
19
  def build_index(maf, index)
19
- parser = Bio::MAF::Parser.new(maf,
20
- :parse_extended => false)
20
+ parser = Bio::MAF::Parser.new(maf, $options.parser_opts)
21
21
  idx = Bio::MAF::KyotoIndex.build(parser, index, $options.ref_only)
22
22
  idx.close
23
23
  end
@@ -36,6 +36,15 @@ op = OptionParser.new do |opts|
36
36
  opts.on("-d", "--dump", "Dump contents of given INDEX") do
37
37
  $options.mode = :dump
38
38
  end
39
+ opts.on("-O", "--parser-option OPT") do |opt|
40
+ if opt =~ /(-?)(.+)/
41
+ val = ! ($1 == "-")
42
+ option = $2.to_sym
43
+ $options.parser_opts[option] = val
44
+ else
45
+ raise "malformed parser option #{opt}!"
46
+ end
47
+ end
39
48
  opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
40
49
  require 'ruby-prof'
41
50
  if pspec =~ /(\w+):(.+)/
@@ -10,7 +10,16 @@ def parse_interval(line)
10
10
  src, r_start_s, r_end_s, _ = line.split(nil, 4)
11
11
  r_start = r_start_s.to_i
12
12
  r_end = r_end_s.to_i
13
- return Bio::GenomicInterval.zero_based(src, r_start, r_end)
13
+ i_src = if $options.bed_species
14
+ "#{$options.bed_species}.#{src}"
15
+ else
16
+ src
17
+ end
18
+ if $options.one_based
19
+ Bio::GenomicInterval.new(i_src, r_start, r_end)
20
+ else
21
+ Bio::GenomicInterval.zero_based(i_src, r_start, r_end)
22
+ end
14
23
  end
15
24
 
16
25
  def target_for(base, interval, &blk)
@@ -18,52 +27,96 @@ def target_for(base, interval, &blk)
18
27
  File.open(path, 'w', &blk)
19
28
  end
20
29
 
21
- def apply_options(options, tiler)
22
- tiler.reference = options.ref if options.ref
23
- tiler.species = options.species
24
- tiler.species_map = options.species_map
30
+ def apply_options(tiler)
31
+ tiler.reference = $options.ref if $options.ref
32
+ tiler.species = $options.species
33
+ tiler.species_map = $options.species_map
34
+ tiler.fill_char = $options.fill_char if $options.fill_char
25
35
  end
26
36
 
27
- options = OpenStruct.new
28
- options.p = { :threads => 1 }
29
- options.species = []
30
- options.species_map = {}
31
- options.usage = false
37
+ def each_tiler(access, intervals)
38
+ intervals.each do |int|
39
+ access.tile(int) do |tiler|
40
+ apply_options(tiler)
41
+ yield tiler
42
+ end
43
+ end
44
+ end
45
+
46
+ $options = OpenStruct.new
47
+ $options.p = { :threads => 1 }
48
+ $options.species = []
49
+ $options.species_map = {}
50
+ $options.usage = false
32
51
 
33
52
  o_parser = OptionParser.new do |opts|
34
53
  opts.banner = "Usage: maf_tile [options] <maf> [index]"
35
54
  opts.separator ""
36
55
  opts.separator "Options:"
37
56
  opts.on("-r", "--reference SEQ", "FASTA reference sequence") do |ref|
38
- options.ref = ref
39
- end
40
- opts.on("-i", "--interval [CHR:]BEGIN:END", "Genomic interval, zero-based") do |int|
41
- if int =~ /(.+):(\d+):(\d+)/
42
- gi = Bio::GenomicInterval.zero_based($1, ($2.to_i), ($3.to_i))
43
- options.genomic_interval = gi
44
- elsif int =~ /(\d+):(\d+)/
45
- options.interval = ($1.to_i)...($2.to_i)
57
+ $options.ref = ref
58
+ end
59
+ opts.on("-i", "--interval [CHR:]BEGIN-END", "Genomic interval, zero-based") do |int|
60
+ if int =~ /(.+):(\d+)-(\d+)/
61
+ $options.genomic_interval_spec = [$1, $2.to_i, $3.to_i]
62
+ elsif int =~ /(\d+)-(\d+)/
63
+ $options.interval = ($1.to_i)...($2.to_i)
46
64
  else
47
65
  $stderr.puts "Invalid interval specification #{int}!"
48
- options.usage = true
66
+ $options.usage = true
49
67
  end
50
68
  end
51
- opts.on("-s", "--species SPECIES[:NAME]", "Species to use (with mapped name)") do |sp|
69
+ opts.on("--one-based",
70
+ "Treat all intervals as one-based",
71
+ "(even from BED files, contrary to the standard)") do
72
+ $options.one_based = true
73
+ end
74
+ opts.on("-s", "--species SPECIES[:NAME]",
75
+ "Species to use (mapped name optional)",
76
+ "(can be a comma-separated list)") do |sp|
52
77
  if sp =~ /:/
53
78
  species, mapped = sp.split(/:/)
54
- options.species << species
55
- options.species_map[species] = mapped
79
+ $options.species << species
80
+ $options.species_map[species] = mapped
81
+ elsif sp =~ /,/
82
+ $options.species.concat(sp.split(/,/))
56
83
  else
57
- options.species << sp
84
+ $options.species << sp
85
+ end
86
+ end
87
+ opts.on("--species-file FILE", "File specifying species and optionally mapped names") do |file|
88
+ File.open(file) do |f|
89
+ f.each_line do |line|
90
+ next if line =~ /^#/
91
+ parts = line.split
92
+ next unless parts.size > 0
93
+ $options.species << parts[0]
94
+ $options.species_map[parts[0]] = parts[1] if parts[1]
95
+ end
58
96
  end
59
97
  end
60
98
  opts.on("-o", "--output-base BASE", "Base name for output files",
61
99
  "Use stdout for a single interval if not given") do |base|
62
- options.output_base = base
100
+ $options.output_base = base
63
101
  end
64
102
  opts.on("--bed BED", "BED file specifying intervals",
65
103
  "(requires --output-base)") do |bed|
66
- options.bed = bed
104
+ $options.bed = bed
105
+ end
106
+ opts.on("--bed-species SPECIES",
107
+ "Species to prepend to BED chromosome specs") do |species|
108
+ $options.bed_species = species
109
+ end
110
+ opts.on("--fill-char C",
111
+ "Fill gaps with character C",
112
+ "(default is *)") do |char|
113
+ $options.fill_char = char
114
+ end
115
+ opts.on("--upcase", "Fold all sequence data to upper case") do
116
+ $options.p[:upcase] = true
117
+ end
118
+ opts.on("--concat", "Concatenate result blocks") do
119
+ $options.concat = true
67
120
  end
68
121
  Bio::MAF::handle_logging_options(opts)
69
122
  end
@@ -74,52 +127,96 @@ Bio::Log::CLI.configure('bio-maf')
74
127
  maf_p = ARGV.shift
75
128
  index_p = ARGV.shift
76
129
 
77
- unless (! options.usage) \
78
- && maf_p && (! options.species.empty?) \
79
- && (options.output_base \
80
- ? options.bed \
81
- : options.interval || options.genomic_interval)
130
+ unless maf_p
131
+ $stderr.puts "Must specify MAF file to process!"
132
+ $options.usage = true
133
+ end
134
+
135
+ if $options.species.empty?
136
+ $stderr.puts "Must specify species to tile with --species!"
137
+ $options.usage = true
138
+ end
139
+
140
+ unless $options.bed || $options.interval || $options.genomic_interval_spec
141
+ $stderr.puts "Must specify --bed or --interval!"
142
+ $options.usage = true
143
+ end
144
+
145
+ if $options.bed && ! ($options.output_base || $options.concat)
146
+ $stderr.puts "Must specify --output-base or --concat when specifying --bed!"
147
+ $options.usage = true
148
+ end
149
+
150
+ if (! $options.output_base) && ! ($options.interval || $options.genomic_interval_spec || ($options.bed && $options.concat))
151
+ $stderr.puts "Must specify --interval or --bed with --concat if --output-base is not given!"
152
+ $options.usage = true
153
+ end
154
+
155
+ if $options.usage
82
156
  $stderr.puts o_parser
83
157
  exit 2
84
158
  end
85
159
 
86
160
  access = if File.directory? maf_p
87
- Bio::MAF::Access.maf_dir(maf_p, options.p)
161
+ Bio::MAF::Access.maf_dir(maf_p, $options.p)
88
162
  else
89
- Bio::MAF::Access.file(maf_p, index_p, options.p)
163
+ Bio::MAF::Access.file(maf_p, index_p, $options.p)
90
164
  end
91
165
 
92
- if options.bed
166
+ if $options.bed
93
167
  intervals = []
94
- File.open(options.bed) do |bed_f|
168
+ File.open($options.bed) do |bed_f|
95
169
  bed_f.each_line { |line| intervals << parse_interval(line) }
96
170
  end
97
- intervals.sort_by! { |int| int.zero_start }
98
- intervals.each do |int|
99
- access.tile(int) do |tiler|
100
- apply_options(options, tiler)
101
- target_for(options.output_base, int) do |target|
171
+ #intervals.sort_by! { |int| int.zero_start }
172
+ if $options.concat
173
+ # concatenate, as with exons
174
+ concat = Array.new($options.species.size)
175
+ concat.fill { '' }
176
+ non_fill = nil
177
+ each_tiler(access, intervals) do |tiler|
178
+ non_fill = tiler.non_fill_re if ! non_fill
179
+ concat.zip(tiler.build_bio_alignment) do |buf, seq|
180
+ buf << seq.to_s
181
+ end
182
+ end
183
+ fh = $options.output_base ? File.open($options.output_base, 'wb') : $stdout
184
+ $options.species.zip(concat) do |species, seq|
185
+ if non_fill.match(seq)
186
+ sp_out_name = $options.species_map[species] || species
187
+ fh.puts ">#{sp_out_name}", seq.scan(/.{1,70}/)
188
+ end
189
+ end
190
+ else
191
+ # output each interval separately
192
+ each_tiler(access, intervals) do |tiler|
193
+ target_for($options.output_base, tiler.interval) do |target|
102
194
  tiler.write_fasta(target)
103
195
  end
104
196
  end
105
197
  end
106
198
  else
107
199
  # single interval
108
- if options.genomic_interval
109
- interval = options.genomic_interval
200
+ if $options.genomic_interval_spec
201
+ spec = $options.genomic_interval_spec
202
+ if $options.one_based
203
+ interval = Bio::GenomicInterval.new(*spec)
204
+ else
205
+ interval = Bio::GenomicInterval.zero_based(*spec)
206
+ end
110
207
  else
111
208
  if access.indices.size != 1
112
209
  raise "Must explicitly specify sequence in --interval argument with multiple candidate MAF files!"
113
210
  end
114
211
  ref_seq = access.indices.keys.first
115
212
  interval = Bio::GenomicInterval.zero_based(ref_seq,
116
- options.interval.begin,
117
- options.interval.end)
213
+ $options.interval.begin,
214
+ $options.interval.end)
118
215
  end
119
216
  access.tile(interval) do |tiler|
120
- apply_options(options, tiler)
121
- if options.output_base
122
- target = target_for(options.output_base, tiler.interval)
217
+ apply_options(tiler)
218
+ if $options.output_base
219
+ target = target_for($options.output_base, tiler.interval)
123
220
  else
124
221
  target = $stdout
125
222
  end