bio-maf 1.0.0-java → 1.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
3
4
  require 'ostruct'
4
5
 
5
6
  require 'bio-maf'
@@ -8,6 +9,9 @@ require 'bio-bgzf'
8
9
  $options = OpenStruct.new
9
10
  $options.dir = '.'
10
11
  $options.ref_only = true
12
+ $options.n_jobs = 1
13
+ $options.force = false
14
+ $options.level = 2
11
15
 
12
16
  op = OptionParser.new do |opts|
13
17
  opts.banner = "Usage: maf_bgzip [options] [<maf> ...]"
@@ -26,31 +30,155 @@ op = OptionParser.new do |opts|
26
30
  "(has no effect without --index)") do
27
31
  $options.ref_only = false
28
32
  end
33
+ opts.on("-l", "--level LEVEL", Integer,
34
+ "gzip compression level for BGZF (1-9)") do |level|
35
+ unless 1 <= level && level <= 9
36
+ $stderr.puts "Invalid compression level: #{level}"
37
+ $stderr.puts opts
38
+ exit 2
39
+ end
40
+ $options.level = level
41
+ end
42
+ opts.on("-f", "--force",
43
+ "Replace output files if they already exist") do
44
+ $options.force = true
45
+ end
46
+ opts.on("-j", "--jobs N", Integer,
47
+ "Run N concurrent jobs (default 1)") do |n|
48
+ $options.n_jobs = n
49
+ end
50
+ Bio::MAF::handle_logging_options(opts)
29
51
  end
30
52
 
31
53
  op.parse!(ARGV)
54
+ Bio::Log::CLI.configure('bio-maf')
32
55
 
33
- until ARGV.empty?
34
- maf_path = ARGV.shift
35
- maf_base = File.basename(maf_path)
56
+ INTERVAL = 10
57
+ LOG = Bio::MAF::LOG
58
+
59
+ def make_processing_task(maf)
60
+ maf_base = File.basename(maf)
36
61
  base = maf_base.gsub(/\.maf.*/, '')
37
62
  bgz_path = "#{$options.dir}/#{base}.maf.bgz"
63
+ if File.exist?(bgz_path) && ! $options.force
64
+ LOG.error "#{bgz_path} already exists, refusing to overwrite " \
65
+ "without --force!"
66
+ exit 1
67
+ end
68
+ idx_path = nil
69
+ if $options.index
70
+ idx_path = "#{$options.dir}/#{base}.kct"
71
+ if File.exist?(idx_path) && ! $options.force
72
+ LOG.error "#{idx_path} already exists, refusing to overwrite " \
73
+ "without --force!"
74
+ exit 1
75
+ end
76
+ end
77
+ lambda { process_maf(maf, bgz_path, idx_path) }
78
+ end
79
+
80
+ def process_maf(maf_path, bgz_path, idx_path)
81
+ maf_base = File.basename(maf_path)
82
+ LOG.debug { "Processing #{maf_base}." }
38
83
  p = Bio::MAF::Parser.new(maf_path,
39
- :parse_extended => true,
40
- :parse_empty => true)
41
- File.open(bgz_path, 'w') do |out_f|
42
- Bio::BGZF::Writer.new(out_f) do |bgz_w|
84
+ :retain_text => true)
85
+ if idx_path
86
+ if File.exists?(idx_path)
87
+ File.unlink(idx_path)
88
+ end
89
+ idx = Bio::MAF::KyotoIndex.new(idx_path)
90
+ idx.prep(bgz_path, :bgzf, $options.ref_only)
91
+ exec = Bio::MAF::Executor.create
92
+ end
93
+ start_t = Time.now
94
+ last_t = start_t
95
+ last_pos = 0
96
+ n_blocks = 0
97
+ maf_size = File.size(maf_path)
98
+ File.open(bgz_path, 'wb') do |out_f|
99
+ Bio::BGZF::Writer.new(out_f, $options.level) do |bgz_w|
43
100
  maf_w = Bio::MAF::Writer.new(bgz_w)
44
101
  maf_w.write_header(p.header)
45
102
  p.each_block do |block|
46
- maf_w.write_block(block)
103
+ bgz_w.write(block.orig_text)
104
+ if idx
105
+ block.offset = bgz_w.last_write_pos
106
+ exec.submit do
107
+ idx.index_blocks([block])
108
+ end
109
+ end
110
+ n_blocks += 1
111
+ if n_blocks % 100 == 0
112
+ cur_t = Time.now
113
+ delta_t = cur_t - last_t
114
+ if delta_t > INTERVAL
115
+ cur_pos = p.phys_f.tell
116
+ LOG.debug {
117
+ pos_mb = cur_pos.to_f / 1048576
118
+ delta_bytes = cur_pos - last_pos
119
+ rate = delta_bytes.to_f / delta_t
120
+ mb_rate = rate / 1048576
121
+ pct = cur_pos.to_f / maf_size * 100
122
+ elapsed = cur_t - start_t
123
+ sprintf("%s: processed %.1f MB (%.1f%%) in %ds, %.2f MB/s.",
124
+ maf_base,
125
+ pos_mb,
126
+ pct,
127
+ elapsed,
128
+ mb_rate)
129
+ }
130
+ last_t = cur_t
131
+ last_pos = cur_pos
132
+ end
133
+ end
47
134
  end
48
135
  end
49
136
  end
137
+ unc = p.f.tell if p.f != p.phys_f
50
138
  p.close
51
- if $options.index
52
- p2 = Bio::MAF::Parser.new(bgz_path)
53
- idx_path = "#{$options.dir}/#{base}.kct"
54
- Bio::MAF::KyotoIndex.build(p2, idx_path, $options.ref_only)
139
+ if idx
140
+ exec.shutdown
141
+ idx.db.synchronize(true)
142
+ end
143
+ elapsed = Time.now - start_t
144
+ mb = maf_size.to_f / 1048576
145
+ mb_rate = mb / elapsed
146
+ LOG.info { sprintf("Processed %s (%.1f MB) in %ds, %.2f MB/s",
147
+ maf_base,
148
+ mb,
149
+ elapsed,
150
+ mb_rate) }
151
+ if unc
152
+ LOG.info {
153
+ unc_mb = unc / 1048576
154
+ unc_rate = unc_mb / elapsed
155
+ sprintf(" Uncompressed: %.1f MB, %.2f MB/s",
156
+ unc_mb, unc_rate)
157
+ }
55
158
  end
159
+ LOG.info {
160
+ raw_size = unc || maf_size
161
+ avg_block_kb = raw_size.to_f / n_blocks / 1024
162
+ sprintf(" %d alignment blocks, average size %.2f KB",
163
+ n_blocks, avg_block_kb)
164
+ }
165
+ LOG.info {
166
+ orig_size = unc ? unc : maf_size
167
+ bgzf_size = File.size(bgz_path).to_f
168
+ ratio = bgzf_size / orig_size
169
+ sprintf(" Compressed with BGZF (level=%d) to %.1f MB (%.1fx)",
170
+ $options.level,
171
+ bgzf_size / 1048576,
172
+ ratio)
173
+ }
174
+ end
175
+
176
+ runner = Bio::MAF::JobRunner.create($options.n_jobs)
177
+ LOG.debug "Created #{runner.class} set for #{$options.n_jobs} concurrent jobs."
178
+ ARGV.each do |maf|
179
+ task = make_processing_task(maf)
180
+ runner.add(&task)
56
181
  end
182
+ LOG.debug "Running jobs."
183
+ runner.run
184
+ LOG.debug "Finished processing."
@@ -6,12 +6,13 @@ require 'ostruct'
6
6
 
7
7
  include Bio::MAF
8
8
 
9
- options = OpenStruct.new
10
- options.mode = :intersect
11
- options.format = :maf
12
- options.seq_filter = {}
13
- options.block_filter = {}
14
- options.parse_options = {}
9
+ $options = OpenStruct.new
10
+ $options.mode = :intersect
11
+ $options.format = :maf
12
+ $options.one_based = false
13
+ $options.seq_filter = {}
14
+ $options.block_filter = {}
15
+ $options.parse_options = {}
15
16
 
16
17
  def handle_list_spec(spec)
17
18
  if spec =~ /^@(.+)/
@@ -23,7 +24,11 @@ end
23
24
 
24
25
  def handle_interval_spec(int)
25
26
  if int =~ /(.+):(\d+)-(\d+)/
26
- Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i)
27
+ if $options.one_based
28
+ Bio::GenomicInterval.new($1, $2.to_i, $3.to_i)
29
+ else
30
+ Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i)
31
+ end
27
32
  else
28
33
  raise "Invalid interval specification: #{int}"
29
34
  end
@@ -34,13 +39,13 @@ $op = OptionParser.new do |opts|
34
39
  opts.separator ""
35
40
  opts.separator "MAF source options (either --maf or --maf-dir must be given):"
36
41
  opts.on("-m", "--maf MAF", "MAF file") do |maf|
37
- options.maf = maf
42
+ $options.maf = maf
38
43
  end
39
44
  opts.on("-i", "--index INDEX", "MAF index") do |idx|
40
- options.idx = idx
45
+ $options.idx = idx
41
46
  end
42
47
  opts.on("-d", "--maf-dir DIR", "MAF directory") do |dir|
43
- options.maf_dir = dir
48
+ $options.maf_dir = dir
44
49
  end
45
50
  opts.separator ""
46
51
  opts.separator "Extraction options:"
@@ -49,21 +54,26 @@ $op = OptionParser.new do |opts|
49
54
  "blocks intersecting the given region,",
50
55
  "or 'slice' to extract subsets covering ",
51
56
  "given regions") do |mode|
52
- options.mode = mode
57
+ $options.mode = mode
53
58
  end
54
59
  opts.on("--bed BED", "Use intervals from the given BED file") do |bed|
55
- options.bed = bed
60
+ $options.bed = bed
56
61
  end
57
62
  opts.on("--interval SEQ:START:END", "Zero-based genomic interval to match") do |int|
58
- options.interval = handle_interval_spec(int)
63
+ $options.interval = handle_interval_spec(int)
64
+ end
65
+ opts.on("--one-based",
66
+ "Treat all intervals as one-based",
67
+ "(even from BED files, contrary to the standard)") do
68
+ $options.one_based = true
59
69
  end
60
70
  opts.separator ""
61
71
  opts.separator "Output options:"
62
72
  opts.on("-f", "--format FMT", [:maf, :fasta], "Output format") do |fmt|
63
- options.format = fmt
73
+ $options.format = fmt
64
74
  end
65
75
  opts.on("-o", "--output OUT", "Write output to file OUT") do |out|
66
- options.out_path = out
76
+ $options.out_path = out
67
77
  end
68
78
  opts.separator ""
69
79
  opts.separator "Filtering options:"
@@ -71,41 +81,41 @@ $op = OptionParser.new do |opts|
71
81
  "Filter out all but the species in the",
72
82
  "given comma-separated list",
73
83
  "(or @FILE to read from a file)") do |spec|
74
- options.seq_filter[:only_species] = handle_list_spec(spec)
84
+ $options.seq_filter[:only_species] = handle_list_spec(spec)
75
85
  end
76
86
  opts.on("--with-all-species SPECIES",
77
87
  "Only match blocks with all the given",
78
88
  "species, comma-separated",
79
89
  "(or @FILE to read from a file)") do |spec|
80
- options.block_filter[:with_all_species] = handle_list_spec(spec)
90
+ $options.block_filter[:with_all_species] = handle_list_spec(spec)
81
91
  end
82
92
  opts.on("--min-sequences N", Integer,
83
93
  "Match only blocks with at least N sequences") do |n|
84
- options.block_filter[:at_least_n_sequences] = n
94
+ $options.block_filter[:at_least_n_sequences] = n
85
95
  end
86
96
  opts.on("--min-text-size N", Integer,
87
97
  "Match only blocks with minimum text size N") do |n|
88
- options.block_filter[:min_size] = n
98
+ $options.block_filter[:min_size] = n
89
99
  end
90
100
  opts.on("--max-text-size N", Integer,
91
101
  "Match only blocks with maximum text size N") do |n|
92
- options.block_filter[:max_size] = n
102
+ $options.block_filter[:max_size] = n
93
103
  end
94
104
  opts.separator ""
95
105
  opts.separator "Block processing options:"
96
106
  opts.on("--join-blocks",
97
107
  "Join blocks if appropriate after filtering",
98
108
  "out sequences") do
99
- options.parse_options[:join_blocks] = true
109
+ $options.parse_options[:join_blocks] = true
100
110
  end
101
111
  opts.on("--remove-gaps", "Remove gaps after filtering out sequences") do
102
- options.parse_options[:remove_gaps] = true
112
+ $options.parse_options[:remove_gaps] = true
103
113
  end
104
114
  opts.on("--parse-extended", "Parse 'extended' MAF data (i, q lines)") do
105
- options.parse_options[:parse_extended] = true
115
+ $options.parse_options[:parse_extended] = true
106
116
  end
107
117
  opts.on("--parse-empty", "Parse empty (e) lines of MAF data") do
108
- options.parse_options[:parse_empty] = true
118
+ $options.parse_options[:parse_empty] = true
109
119
  end
110
120
  opts.separator ""
111
121
  opts.separator "Logging options:"
@@ -120,24 +130,24 @@ def usage(msg)
120
130
  exit 2
121
131
  end
122
132
 
123
- if options.maf
124
- access = Access.file(options.maf, options.idx, options.parse_options)
125
- elsif options.maf_dir
126
- access = Access.maf_dir(options.maf_dir, options.parse_options)
133
+ if $options.maf
134
+ access = Access.file($options.maf, $options.idx, $options.parse_options)
135
+ elsif $options.maf_dir
136
+ access = Access.maf_dir($options.maf_dir, $options.parse_options)
127
137
  else
128
138
  usage "Must supply --maf or --maf-dir!"
129
139
  end
130
140
 
131
141
  begin
132
- access.sequence_filter = options.seq_filter unless options.seq_filter.empty?
133
- access.block_filter = options.block_filter unless options.block_filter.empty?
134
- if options.out_path
135
- outf = File.open(options.out_path, 'w')
142
+ access.sequence_filter = $options.seq_filter unless $options.seq_filter.empty?
143
+ access.block_filter = $options.block_filter unless $options.block_filter.empty?
144
+ if $options.out_path
145
+ outf = File.open($options.out_path, 'w')
136
146
  else
137
147
  outf = $stdout
138
148
  end
139
149
 
140
- case options.format
150
+ case $options.format
141
151
  when :maf
142
152
  writer = Writer.new(outf)
143
153
  when :fasta
@@ -146,20 +156,20 @@ begin
146
156
  raise "unsupported output format #{format}!"
147
157
  end
148
158
 
149
- if options.bed
150
- intervals = read_bed_intervals(options.bed)
151
- elsif options.interval
152
- intervals = [options.interval]
159
+ if $options.bed
160
+ intervals = read_bed_intervals($options.bed)
161
+ elsif $options.interval
162
+ intervals = [$options.interval]
153
163
  else
154
164
  usage "Must supply --interval or --bed!"
155
165
  end
156
166
 
157
167
  # TODO: provide access to original MAF header?
158
- if options.format == :maf
168
+ if $options.format == :maf
159
169
  writer.write_header(Header.default)
160
170
  end
161
171
 
162
- case options.mode
172
+ case $options.mode
163
173
  when :intersect
164
174
  access.find(intervals) do |block|
165
175
  writer.write_block(block)
@@ -172,7 +182,7 @@ begin
172
182
  end
173
183
  end
174
184
  else
175
- raise "Unsupported mode #{options.mode}!"
185
+ raise "Unsupported mode #{$options.mode}!"
176
186
  end
177
187
 
178
188
  ensure
@@ -14,10 +14,10 @@ PRINTERS = {
14
14
  $options = OpenStruct.new
15
15
  $options.mode = :build
16
16
  $options.ref_only = true
17
+ $options.parser_opts = { :parse_extended => false }
17
18
 
18
19
  def build_index(maf, index)
19
- parser = Bio::MAF::Parser.new(maf,
20
- :parse_extended => false)
20
+ parser = Bio::MAF::Parser.new(maf, $options.parser_opts)
21
21
  idx = Bio::MAF::KyotoIndex.build(parser, index, $options.ref_only)
22
22
  idx.close
23
23
  end
@@ -36,6 +36,15 @@ op = OptionParser.new do |opts|
36
36
  opts.on("-d", "--dump", "Dump contents of given INDEX") do
37
37
  $options.mode = :dump
38
38
  end
39
+ opts.on("-O", "--parser-option OPT") do |opt|
40
+ if opt =~ /(-?)(.+)/
41
+ val = ! ($1 == "-")
42
+ option = $2.to_sym
43
+ $options.parser_opts[option] = val
44
+ else
45
+ raise "malformed parser option #{opt}!"
46
+ end
47
+ end
39
48
  opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
40
49
  require 'ruby-prof'
41
50
  if pspec =~ /(\w+):(.+)/
@@ -10,7 +10,16 @@ def parse_interval(line)
10
10
  src, r_start_s, r_end_s, _ = line.split(nil, 4)
11
11
  r_start = r_start_s.to_i
12
12
  r_end = r_end_s.to_i
13
- return Bio::GenomicInterval.zero_based(src, r_start, r_end)
13
+ i_src = if $options.bed_species
14
+ "#{$options.bed_species}.#{src}"
15
+ else
16
+ src
17
+ end
18
+ if $options.one_based
19
+ Bio::GenomicInterval.new(i_src, r_start, r_end)
20
+ else
21
+ Bio::GenomicInterval.zero_based(i_src, r_start, r_end)
22
+ end
14
23
  end
15
24
 
16
25
  def target_for(base, interval, &blk)
@@ -18,52 +27,96 @@ def target_for(base, interval, &blk)
18
27
  File.open(path, 'w', &blk)
19
28
  end
20
29
 
21
- def apply_options(options, tiler)
22
- tiler.reference = options.ref if options.ref
23
- tiler.species = options.species
24
- tiler.species_map = options.species_map
30
+ def apply_options(tiler)
31
+ tiler.reference = $options.ref if $options.ref
32
+ tiler.species = $options.species
33
+ tiler.species_map = $options.species_map
34
+ tiler.fill_char = $options.fill_char if $options.fill_char
25
35
  end
26
36
 
27
- options = OpenStruct.new
28
- options.p = { :threads => 1 }
29
- options.species = []
30
- options.species_map = {}
31
- options.usage = false
37
+ def each_tiler(access, intervals)
38
+ intervals.each do |int|
39
+ access.tile(int) do |tiler|
40
+ apply_options(tiler)
41
+ yield tiler
42
+ end
43
+ end
44
+ end
45
+
46
+ $options = OpenStruct.new
47
+ $options.p = { :threads => 1 }
48
+ $options.species = []
49
+ $options.species_map = {}
50
+ $options.usage = false
32
51
 
33
52
  o_parser = OptionParser.new do |opts|
34
53
  opts.banner = "Usage: maf_tile [options] <maf> [index]"
35
54
  opts.separator ""
36
55
  opts.separator "Options:"
37
56
  opts.on("-r", "--reference SEQ", "FASTA reference sequence") do |ref|
38
- options.ref = ref
39
- end
40
- opts.on("-i", "--interval [CHR:]BEGIN:END", "Genomic interval, zero-based") do |int|
41
- if int =~ /(.+):(\d+):(\d+)/
42
- gi = Bio::GenomicInterval.zero_based($1, ($2.to_i), ($3.to_i))
43
- options.genomic_interval = gi
44
- elsif int =~ /(\d+):(\d+)/
45
- options.interval = ($1.to_i)...($2.to_i)
57
+ $options.ref = ref
58
+ end
59
+ opts.on("-i", "--interval [CHR:]BEGIN-END", "Genomic interval, zero-based") do |int|
60
+ if int =~ /(.+):(\d+)-(\d+)/
61
+ $options.genomic_interval_spec = [$1, $2.to_i, $3.to_i]
62
+ elsif int =~ /(\d+)-(\d+)/
63
+ $options.interval = ($1.to_i)...($2.to_i)
46
64
  else
47
65
  $stderr.puts "Invalid interval specification #{int}!"
48
- options.usage = true
66
+ $options.usage = true
49
67
  end
50
68
  end
51
- opts.on("-s", "--species SPECIES[:NAME]", "Species to use (with mapped name)") do |sp|
69
+ opts.on("--one-based",
70
+ "Treat all intervals as one-based",
71
+ "(even from BED files, contrary to the standard)") do
72
+ $options.one_based = true
73
+ end
74
+ opts.on("-s", "--species SPECIES[:NAME]",
75
+ "Species to use (mapped name optional)",
76
+ "(can be a comma-separated list)") do |sp|
52
77
  if sp =~ /:/
53
78
  species, mapped = sp.split(/:/)
54
- options.species << species
55
- options.species_map[species] = mapped
79
+ $options.species << species
80
+ $options.species_map[species] = mapped
81
+ elsif sp =~ /,/
82
+ $options.species.concat(sp.split(/,/))
56
83
  else
57
- options.species << sp
84
+ $options.species << sp
85
+ end
86
+ end
87
+ opts.on("--species-file FILE", "File specifying species and optionally mapped names") do |file|
88
+ File.open(file) do |f|
89
+ f.each_line do |line|
90
+ next if line =~ /^#/
91
+ parts = line.split
92
+ next unless parts.size > 0
93
+ $options.species << parts[0]
94
+ $options.species_map[parts[0]] = parts[1] if parts[1]
95
+ end
58
96
  end
59
97
  end
60
98
  opts.on("-o", "--output-base BASE", "Base name for output files",
61
99
  "Use stdout for a single interval if not given") do |base|
62
- options.output_base = base
100
+ $options.output_base = base
63
101
  end
64
102
  opts.on("--bed BED", "BED file specifying intervals",
65
103
  "(requires --output-base)") do |bed|
66
- options.bed = bed
104
+ $options.bed = bed
105
+ end
106
+ opts.on("--bed-species SPECIES",
107
+ "Species to prepend to BED chromosome specs") do |species|
108
+ $options.bed_species = species
109
+ end
110
+ opts.on("--fill-char C",
111
+ "Fill gaps with character C",
112
+ "(default is *)") do |char|
113
+ $options.fill_char = char
114
+ end
115
+ opts.on("--upcase", "Fold all sequence data to upper case") do
116
+ $options.p[:upcase] = true
117
+ end
118
+ opts.on("--concat", "Concatenate result blocks") do
119
+ $options.concat = true
67
120
  end
68
121
  Bio::MAF::handle_logging_options(opts)
69
122
  end
@@ -74,52 +127,96 @@ Bio::Log::CLI.configure('bio-maf')
74
127
  maf_p = ARGV.shift
75
128
  index_p = ARGV.shift
76
129
 
77
- unless (! options.usage) \
78
- && maf_p && (! options.species.empty?) \
79
- && (options.output_base \
80
- ? options.bed \
81
- : options.interval || options.genomic_interval)
130
+ unless maf_p
131
+ $stderr.puts "Must specify MAF file to process!"
132
+ $options.usage = true
133
+ end
134
+
135
+ if $options.species.empty?
136
+ $stderr.puts "Must specify species to tile with --species!"
137
+ $options.usage = true
138
+ end
139
+
140
+ unless $options.bed || $options.interval || $options.genomic_interval_spec
141
+ $stderr.puts "Must specify --bed or --interval!"
142
+ $options.usage = true
143
+ end
144
+
145
+ if $options.bed && ! ($options.output_base || $options.concat)
146
+ $stderr.puts "Must specify --output-base or --concat when specifying --bed!"
147
+ $options.usage = true
148
+ end
149
+
150
+ if (! $options.output_base) && ! ($options.interval || $options.genomic_interval_spec || ($options.bed && $options.concat))
151
+ $stderr.puts "Must specify --interval or --bed with --concat if --output-base is not given!"
152
+ $options.usage = true
153
+ end
154
+
155
+ if $options.usage
82
156
  $stderr.puts o_parser
83
157
  exit 2
84
158
  end
85
159
 
86
160
  access = if File.directory? maf_p
87
- Bio::MAF::Access.maf_dir(maf_p, options.p)
161
+ Bio::MAF::Access.maf_dir(maf_p, $options.p)
88
162
  else
89
- Bio::MAF::Access.file(maf_p, index_p, options.p)
163
+ Bio::MAF::Access.file(maf_p, index_p, $options.p)
90
164
  end
91
165
 
92
- if options.bed
166
+ if $options.bed
93
167
  intervals = []
94
- File.open(options.bed) do |bed_f|
168
+ File.open($options.bed) do |bed_f|
95
169
  bed_f.each_line { |line| intervals << parse_interval(line) }
96
170
  end
97
- intervals.sort_by! { |int| int.zero_start }
98
- intervals.each do |int|
99
- access.tile(int) do |tiler|
100
- apply_options(options, tiler)
101
- target_for(options.output_base, int) do |target|
171
+ #intervals.sort_by! { |int| int.zero_start }
172
+ if $options.concat
173
+ # concatenate, as with exons
174
+ concat = Array.new($options.species.size)
175
+ concat.fill { '' }
176
+ non_fill = nil
177
+ each_tiler(access, intervals) do |tiler|
178
+ non_fill = tiler.non_fill_re if ! non_fill
179
+ concat.zip(tiler.build_bio_alignment) do |buf, seq|
180
+ buf << seq.to_s
181
+ end
182
+ end
183
+ fh = $options.output_base ? File.open($options.output_base, 'wb') : $stdout
184
+ $options.species.zip(concat) do |species, seq|
185
+ if non_fill.match(seq)
186
+ sp_out_name = $options.species_map[species] || species
187
+ fh.puts ">#{sp_out_name}", seq.scan(/.{1,70}/)
188
+ end
189
+ end
190
+ else
191
+ # output each interval separately
192
+ each_tiler(access, intervals) do |tiler|
193
+ target_for($options.output_base, tiler.interval) do |target|
102
194
  tiler.write_fasta(target)
103
195
  end
104
196
  end
105
197
  end
106
198
  else
107
199
  # single interval
108
- if options.genomic_interval
109
- interval = options.genomic_interval
200
+ if $options.genomic_interval_spec
201
+ spec = $options.genomic_interval_spec
202
+ if $options.one_based
203
+ interval = Bio::GenomicInterval.new(*spec)
204
+ else
205
+ interval = Bio::GenomicInterval.zero_based(*spec)
206
+ end
110
207
  else
111
208
  if access.indices.size != 1
112
209
  raise "Must explicitly specify sequence in --interval argument with multiple candidate MAF files!"
113
210
  end
114
211
  ref_seq = access.indices.keys.first
115
212
  interval = Bio::GenomicInterval.zero_based(ref_seq,
116
- options.interval.begin,
117
- options.interval.end)
213
+ $options.interval.begin,
214
+ $options.interval.end)
118
215
  end
119
216
  access.tile(interval) do |tiler|
120
- apply_options(options, tiler)
121
- if options.output_base
122
- target = target_for(options.output_base, tiler.interval)
217
+ apply_options(tiler)
218
+ if $options.output_base
219
+ target = target_for($options.output_base, tiler.interval)
123
220
  else
124
221
  target = $stdout
125
222
  end