bio-maf 1.0.0-java → 1.0.1-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "bio-maf"
5
- s.version = "1.0.0"
5
+ s.version = "1.0.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Clayton Wheeler"]
9
- s.date = "2012-08-02"
9
+ s.date = "2012-08-08"
10
10
  s.description = "Multiple Alignment Format parser for BioRuby."
11
11
  s.email = "cswh@umich.edu"
12
12
  s.extra_rdoc_files = [
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  end
33
33
 
34
34
  s.add_runtime_dependency('bio-alignment', ["~> 0.0.7"])
35
- s.add_runtime_dependency('bio-bgzf', ["~> 0.2.0"])
35
+ s.add_runtime_dependency('bio-bgzf', ["~> 0.2.1"])
36
36
  s.add_runtime_dependency('bio-genomic-interval', ["~> 0.1.2"])
37
37
  s.add_runtime_dependency('bio-logger', ["~> 1.0.1"])
38
38
  if RUBY_PLATFORM == 'java'
@@ -60,3 +60,48 @@ Feature: BGZF compression
60
60
  And a file named "mm8_chr7_tiny.maf.bgz" should exist
61
61
  And a file named "mm8.chrM.maf.bgz" should exist
62
62
 
63
+ @no_jruby
64
+ Scenario: Don't overwrite MAF files
65
+ Given test files:
66
+ | mm8.chrM.maf |
67
+ | mm8.chrM.maf.bgz |
68
+ When I run `maf_bgzip mm8.chrM.maf`
69
+ Then it should fail with:
70
+ """
71
+ exists
72
+ """
73
+
74
+ @no_jruby
75
+ Scenario: Don't overwrite indexes
76
+ Given test files:
77
+ | mm8_chr7_tiny.maf |
78
+ When I run `maf_bgzip --index mm8_chr7_tiny.maf`
79
+ And I run `rm mm8_chr7_tiny.maf.bgz`
80
+ And I run `maf_bgzip --index mm8_chr7_tiny.maf`
81
+ Then it should fail with:
82
+ """
83
+ exists
84
+ """
85
+
86
+ @no_jruby
87
+ Scenario: Overwrite MAF files with --force
88
+ Given test files:
89
+ | mm8.chrM.maf |
90
+ | mm8.chrM.maf.bgz |
91
+ When I run `maf_bgzip --force mm8.chrM.maf`
92
+ Then it should pass with:
93
+ """
94
+ """
95
+
96
+ @no_jruby
97
+ Scenario: Overwrite indexes with --force
98
+ Given test files:
99
+ | mm8_chr7_tiny.maf |
100
+ When I run `maf_bgzip --index mm8_chr7_tiny.maf`
101
+ And I run `rm mm8_chr7_tiny.maf.bgz`
102
+ And I run `maf_bgzip --force --index mm8_chr7_tiny.maf`
103
+ Then it should pass with:
104
+ """
105
+ """
106
+
107
+
@@ -39,6 +39,12 @@ Feature: Indexed access to MAF files
39
39
  And sequence mm8.chr7 of block 0 has start 80082368
40
40
  And sequence mm8.chr7 of block 1 has start 80082471
41
41
 
42
+ Scenario: Index MAF file with extended bin positions
43
+ Given a MAF source file "ext-bin.maf"
44
+ When I open it with a MAF reader
45
+ And build an index on all sequences
46
+ Then the index has at least 18 entries
47
+
42
48
  @no_jruby
43
49
  Scenario: Build MAF index with CLI tool
44
50
  Given test files:
@@ -42,3 +42,20 @@ Feature: Parse MAF files
42
42
  And sequence 0 has text "ACA-TTACT"
43
43
  And sequence 1 has strand :-
44
44
 
45
+ Scenario: Read alignment block, folded to upper case
46
+ Given MAF data:
47
+ """
48
+ ##maf version=1 scoring=humor.v4
49
+ # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
50
+ # /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
51
+
52
+ a score=0.128
53
+ s human_hoxa 100 8 + 100257 aca-ttact
54
+ s horse_hoxa 120 9 - 98892 acaattgct
55
+ s fugu_hoxa 88 7 + 90788 aca--tgct
56
+ """
57
+ When I enable the :upcase parser option
58
+ And I open it with a MAF reader
59
+ Then an alignment block can be obtained
60
+ And the alignment block has 3 sequences
61
+ And sequence 0 has text "ACA-TTACT"
@@ -82,3 +82,14 @@ Feature: Filter results from MAF files
82
82
  And I run `maf_extract -m mm8.chrM.maf.bgz --interval mm8.chrM:6938-13030 -o m2.maf`
83
83
  And I run `diff m1.maf m2.maf`
84
84
  Then the exit status should be 0
85
+
86
+ @no_jruby
87
+ Scenario: One-based indexing with maf_extract
88
+ Given test files:
89
+ | mm8_chr7_tiny.maf |
90
+ | mm8_chr7_tiny.kct |
91
+ When I run `sh -c 'maf_extract -d . --one-based --interval mm8.chr7:80082592-80082713 | grep "^a" | wc -l'`
92
+ Then it should pass with:
93
+ """
94
+ 2
95
+ """
@@ -18,6 +18,17 @@ Feature: MAF slicing
18
18
  And write all the matched blocks
19
19
  Then the output should match, except whitespace, "mm8_chr7_tiny_slice1.maf"
20
20
 
21
+ Scenario: Interval covering two blocks, using directory access, counting
22
+ Given indexed MAF files in "test/data"
23
+ When I enable the :remove_gaps parser option
24
+ And filter for only the species
25
+ | mm8 |
26
+ | rn4 |
27
+ And I extract a slice over the genomic interval
28
+ | chrom | start | end |
29
+ | mm8.chr7 | 80082350 | 80082380 |
30
+ Then 2 blocks are obtained
31
+
21
32
  Scenario: Interval covering two blocks, using directory access
22
33
  Given indexed MAF files in "test/data"
23
34
  When I enable the :remove_gaps parser option
@@ -30,6 +30,7 @@ end
30
30
  Then /^an alignment block can be obtained$/ do
31
31
  @block = @parser.parse_block
32
32
  @block.should_not be_nil
33
+ @block.is_a?(Bio::MAF::Block).should be_true
33
34
  end
34
35
 
35
36
  Then /^the alignment block has (\d+) sequences$/ do |n_seq|
@@ -160,7 +160,7 @@ Feature: Join alignment blocks with reference data
160
160
  | gap-sp1.fa.gz |
161
161
  | gap-1.maf |
162
162
  | gap-1.kct |
163
- When I run `maf_tile --reference gap-sp1.fa.gz --interval 0:50 -s sp1:mouse -s sp2:nautilus -s sp3:jaguar gap-1.maf gap-1.kct`
163
+ When I run `maf_tile --reference gap-sp1.fa.gz --interval 0-50 -s sp1:mouse -s sp2:nautilus -s sp3:jaguar gap-1.maf gap-1.kct`
164
164
  Then it should pass with:
165
165
  """
166
166
  >mouse
@@ -176,7 +176,7 @@ Feature: Join alignment blocks with reference data
176
176
  Given test files:
177
177
  | gap-1.maf |
178
178
  | gap-1.kct |
179
- When I run `maf_tile --interval 0:50 -s sp1:mouse -s sp2:nautilus -s sp3:jaguar gap-1.maf gap-1.kct`
179
+ When I run `maf_tile --interval 0-50 -s sp1:mouse -s sp2:nautilus -s sp3:jaguar gap-1.maf gap-1.kct`
180
180
  Then it should pass with:
181
181
  """
182
182
  >mouse
@@ -198,7 +198,10 @@ Feature: Join alignment blocks with reference data
198
198
  sp1.chr1 12 36
199
199
  """
200
200
  When I run `maf_tile -s sp1:mouse -s sp2:nautilus -s sp3:jaguar --output-base selected --bed example.bed --reference gap-sp1.fa.gz gap-1.maf gap-1.kct`
201
- Then the file "selected_12-36.fa" should contain exactly:
201
+ Then it should pass with:
202
+ """
203
+ """
204
+ And the file "selected_12-36.fa" should contain exactly:
202
205
  """
203
206
  >mouse
204
207
  GCTGAGGGC--AGTTGTGTCAGGGCG
@@ -214,7 +217,7 @@ Feature: Join alignment blocks with reference data
214
217
  Given test files:
215
218
  | mm8_chr7_tiny.maf |
216
219
  | mm8_chr7_tiny.kct |
217
- When I run `maf_tile -s mm8 -s rn4 -s hg18 --interval 80082334:80082344 mm8_chr7_tiny.maf`
220
+ When I run `maf_tile -s mm8 -s rn4 -s hg18 --interval 80082334-80082344 mm8_chr7_tiny.maf`
218
221
  Then it should pass with:
219
222
  """
220
223
  >mm8
@@ -230,7 +233,7 @@ Feature: Join alignment blocks with reference data
230
233
  Given test files:
231
234
  | mm8_chr7_tiny.maf |
232
235
  | mm8_chr7_tiny.kct |
233
- When I run `maf_tile -s mm8 -s rn4 -s hg18 --interval mm8.chr7:80082334:80082344 .`
236
+ When I run `maf_tile -s mm8 -s rn4 -s hg18 --interval mm8.chr7:80082334-80082344 .`
234
237
  Then it should pass with:
235
238
  """
236
239
  >mm8
@@ -241,3 +244,18 @@ Feature: Join alignment blocks with reference data
241
244
  --------GG
242
245
  """
243
246
 
247
+ @no_jruby
248
+ Scenario: Tile with CLI tool and directory, 1-based
249
+ Given test files:
250
+ | mm8_chr7_tiny.maf |
251
+ | mm8_chr7_tiny.kct |
252
+ When I run `maf_tile -s mm8 -s rn4 -s hg18 --one-based --interval mm8.chr7:80082335-80082344 .`
253
+ Then it should pass with:
254
+ """
255
+ >mm8
256
+ GGGCTGAGGG
257
+ >rn4
258
+ GGGCTGAGGG
259
+ >hg18
260
+ --------GG
261
+ """
@@ -11,7 +11,11 @@
11
11
  require 'bio-logger'
12
12
  log = Bio::Log::LoggerPlus.new('bio-maf')
13
13
  log.outputters = Bio::Log::Outputter.stderr
14
- log.level = Bio::Log::WARN
14
+ log.level = if ENV['BIO_MAF_DEBUG']
15
+ Bio::Log::DEBUG
16
+ else
17
+ Bio::Log::WARN
18
+ end
15
19
 
16
20
  require 'bio/ucsc'
17
21
  require 'bio/maf'
@@ -4,3 +4,4 @@ require 'bio/maf/index'
4
4
  require 'bio/maf/parser'
5
5
  require 'bio/maf/writer'
6
6
  require 'bio/maf/tiler'
7
+ require 'bio/maf/jobs'
@@ -125,7 +125,7 @@ module Bio
125
125
  # @param [Enumerable<Bio::GenomicInterval>] intervals genomic
126
126
  # intervals to parse.
127
127
  # @yield [block] each {Block} matched, in turn
128
- # @return [Enumerable<Block>] each matching {Block}, if no block given
128
+ # @return [Array<Block>] each matching {Block}, if no block given
129
129
  # @api public
130
130
  # @see KyotoIndex#find
131
131
  def find(intervals, &blk)
@@ -137,13 +137,16 @@ module Bio
137
137
  end
138
138
  end
139
139
  by_chrom.each do |chrom, c_intervals|
140
- index = @indices[chrom]
141
- with_parser(chrom) do |parser|
142
- index.find(c_intervals, parser, block_filter, &blk)
140
+ with_index(chrom) do |index|
141
+ with_parser(chrom) do |parser|
142
+ index.find(c_intervals, parser, block_filter, &blk)
143
+ end
143
144
  end
144
145
  end
145
146
  else
146
- enum_for(:find, intervals)
147
+ acc = []
148
+ self.find(intervals) { |block| acc << block }
149
+ acc
147
150
  end
148
151
  end
149
152
 
@@ -156,13 +159,14 @@ module Bio
156
159
  # @yield [tiler] a {Tiler} ready to operate on the given interval
157
160
  # @api public
158
161
  def tile(interval)
159
- index = chrom_index(interval.chrom)
160
- with_parser(interval.chrom) do |parser|
161
- tiler = Tiler.new
162
- tiler.index = index
163
- tiler.parser = parser
164
- tiler.interval = interval
165
- yield tiler
162
+ with_index(interval.chrom) do |index|
163
+ with_parser(interval.chrom) do |parser|
164
+ tiler = Tiler.new
165
+ tiler.index = index
166
+ tiler.parser = parser
167
+ tiler.interval = interval
168
+ yield tiler
169
+ end
166
170
  end
167
171
  end
168
172
 
@@ -172,13 +176,15 @@ module Bio
172
176
  #
173
177
  # @param [Bio::GenomicInterval] interval interval to search
174
178
  # @yield [block] each {Block} matched, in turn
175
- # @return [Enumerable<Block>] each matching {Block}, if no block given
179
+ # @return [Array<Block>] each matching {Block}, if no block given
176
180
  # @api public
177
181
  # @see KyotoIndex#slice
178
182
  def slice(interval, &blk)
179
- index = chrom_index(interval.chrom)
180
- with_parser(interval.chrom) do |parser|
181
- index.slice(interval, parser, &blk)
183
+ with_index(interval.chrom) do |index|
184
+ with_parser(interval.chrom) do |parser|
185
+ s = index.slice(interval, parser, block_filter, &blk)
186
+ block_given? ? s : s.to_a
187
+ end
182
188
  end
183
189
  end
184
190
 
@@ -193,12 +199,17 @@ module Bio
193
199
  scan_dir(options[:dir])
194
200
  elsif options[:maf]
195
201
  if options[:index]
196
- register_index(KyotoIndex.open(options[:index]),
202
+ LOG.debug { "Opening index file #{options[:index]}" }
203
+ index = KyotoIndex.open(options[:index])
204
+ register_index(index,
197
205
  options[:maf])
206
+ index.close
198
207
  else
199
- idx = find_index_file(options[:maf])
200
- if idx
201
- register_index(KyotoIndex.open(idx), options[:maf])
208
+ idx_f = find_index_file(options[:maf])
209
+ if idx_f
210
+ index = KyotoIndex.open(idx_f)
211
+ register_index(index, options[:maf])
212
+ index.close
202
213
  end
203
214
  end
204
215
  else
@@ -229,7 +240,11 @@ module Bio
229
240
  unless index.maf_file == File.basename(maf)
230
241
  raise "Index #{index.path} was created for #{index.maf_file}, not #{File.basename(maf)}!"
231
242
  end
232
- @indices[index.ref_seq] = index
243
+ if index.path.to_s.start_with? '%'
244
+ @indices[index.ref_seq] = index
245
+ else
246
+ @indices[index.ref_seq] = index.path.to_s
247
+ end
233
248
  @maf_by_chrom[index.ref_seq] = maf
234
249
  end
235
250
 
@@ -241,6 +256,7 @@ module Bio
241
256
  if File.exist? maf
242
257
  register_index(index, maf)
243
258
  end
259
+ index.close
244
260
  end
245
261
  end
246
262
 
@@ -249,7 +265,23 @@ module Bio
249
265
  unless @indices.has_key? chrom
250
266
  raise "No index available for chromosome #{chrom}!"
251
267
  end
252
- @indices[chrom]
268
+ index = @indices[chrom]
269
+ if index.is_a? KyotoIndex
270
+ # temporary
271
+ index
272
+ else
273
+ KyotoIndex.open(index)
274
+ end
275
+ end
276
+
277
+ def with_index(chrom)
278
+ index = chrom_index(chrom)
279
+ LOG.debug { "Selected index #{index} for sequence #{chrom}." }
280
+ begin
281
+ yield index
282
+ ensure
283
+ index.close unless index.path.to_s.start_with? '%'
284
+ end
253
285
  end
254
286
 
255
287
  # @api private
@@ -403,7 +435,7 @@ module Bio
403
435
  def find(intervals, parser, filter={}, &blk)
404
436
  start = Time.now
405
437
  fl = fetch_list(intervals, filter)
406
- LOG.debug { sprintf("Built fetch list of %d items in %.3fs.\n",
438
+ LOG.debug { sprintf("Built fetch list of %d items in %.3fs.",
407
439
  fl.size,
408
440
  Time.now - start) }
409
441
  if ! fl.empty?
@@ -426,6 +458,7 @@ module Bio
426
458
  yield block.slice(interval)
427
459
  end
428
460
  else
461
+ LOG.debug { "accumulating results of #slice" }
429
462
  enum_for(:slice, interval, parser, filter)
430
463
  end
431
464
  end
@@ -436,6 +469,7 @@ module Bio
436
469
  def initialize(path, db_arg=nil)
437
470
  @species = {}
438
471
  @species_max_id = -1
472
+ @index_sequences = {}
439
473
  @max_sid = -1
440
474
  if db_arg || ((path.size > 1) and File.exist?(path))
441
475
  mode = KyotoCabinet::DB::OREADER
@@ -444,15 +478,25 @@ module Bio
444
478
  end
445
479
  @db = db_arg || KyotoCabinet::DB.new
446
480
  @path = path
447
- unless db_arg || db.open(path.to_s, mode)
481
+ path_str = "#{path.to_s}#opts=ls#dfunit=100000"
482
+ unless db_arg || db.open(path_str, mode)
448
483
  raise "Could not open DB file!"
449
484
  end
450
485
  if mode == KyotoCabinet::DB::OREADER
486
+ version = db[FORMAT_VERSION_KEY].to_i
487
+ if version != FORMAT_VERSION
488
+ raise "Index #{path} is version #{version}, expecting version #{FORMAT_VERSION}!"
489
+ end
451
490
  @maf_file = db[FILE_KEY]
452
491
  self.ref_seq = db[REF_SEQ_KEY]
453
492
  load_index_sequences
454
493
  load_species
455
494
  end
495
+ @mutex = Mutex.new
496
+ end
497
+
498
+ def to_s
499
+ "#<KyotoIndex path=#{path}>"
456
500
  end
457
501
 
458
502
  # Reopen the same DB handle read-only. Only useful for unit tests.
@@ -576,6 +620,11 @@ module Bio
576
620
  end
577
621
 
578
622
  def scan_bins_parallel(chrom_id, bin_intervals, filters)
623
+ LOG.debug {
624
+ sprintf("Beginning scan of %d bin intervals %s filters.",
625
+ bin_intervals.size,
626
+ filters.empty? ? "without" : "with")
627
+ }
579
628
  start = Time.now
580
629
  n_threads = ENV['profile'] ? 1 : java.lang.Runtime.runtime.availableProcessors
581
630
  jobs = java.util.concurrent.ConcurrentLinkedQueue.new(bin_intervals.to_a)
@@ -603,7 +652,7 @@ module Bio
603
652
  n_completed += 1
604
653
  end
605
654
  threads.each { |t| t.join }
606
- LOG.debug { sprintf("Matched %d index records with %d threads in %.3f seconds.\n",
655
+ LOG.debug { sprintf("Matched %d index records with %d threads in %.3f seconds.",
607
656
  to_fetch.size, n_threads, Time.now - start) }
608
657
  to_fetch
609
658
  end
@@ -676,30 +725,55 @@ module Bio
676
725
  || gi.include?(i_start)
677
726
  end
678
727
 
679
- def build(parser, ref_only=true)
680
- db[FILE_KEY] = File.basename(parser.file_spec)
728
+ CHUNK_THRESHOLD_BYTES = 50 * 1024 * 1024
729
+ CHUNK_THRESHOLD_BLOCKS = 1000
730
+
731
+ def prep(file_spec, compression, ref_only)
732
+ db[FORMAT_VERSION_KEY] = FORMAT_VERSION
733
+ db[FILE_KEY] = File.basename(file_spec)
681
734
  @maf_file = db[FILE_KEY]
682
- if parser.compression
683
- db[COMPRESSION_KEY] = parser.compression.to_s
735
+ if compression
736
+ db[COMPRESSION_KEY] = compression.to_s
684
737
  end
685
- first_block = parser.parse_block
686
- self.ref_seq = first_block.sequences.first.source
687
738
  @ref_only = ref_only
688
- db[REF_SEQ_KEY] = ref_seq
689
- db[FORMAT_VERSION_KEY] = FORMAT_VERSION
690
- @index_sequences = {}
691
- index_blocks([first_block])
739
+ @seen_first = false
740
+ end
741
+
742
+ def build(parser, ref_only=true)
743
+ prep(parser.file_spec,
744
+ parser.compression,
745
+ ref_only)
746
+
692
747
  n = 0
693
- parser.each_block.each_slice(1000).each do |blocks|
694
- index_blocks(blocks)
695
- n += blocks.size
748
+ acc = []
749
+ acc_bytes = 0
750
+ parser.each_block do |block|
751
+ acc << block
752
+ acc_bytes += block.size
753
+ if acc_bytes > CHUNK_THRESHOLD_BYTES \
754
+ || acc.size > CHUNK_THRESHOLD_BLOCKS
755
+ index_blocks(acc)
756
+ acc = []
757
+ acc_bytes = 0
758
+ end
759
+ n += 1
696
760
  end
761
+ index_blocks(acc)
697
762
  LOG.debug { "Created index for #{n} blocks and #{@index_sequences.size} sequences." }
698
763
  db.synchronize(true)
699
764
  end
700
765
 
701
766
  def index_blocks(blocks)
702
- h = blocks.map { |b| entries_for(b) }.reduce(:merge!)
767
+ h = @mutex.synchronize do
768
+ if ! @seen_first
769
+ # set the reference sequence from the first block
770
+ first_block = blocks.first
771
+ self.ref_seq = first_block.sequences.first.source
772
+ db[REF_SEQ_KEY] = ref_seq
773
+ @seen_first = true
774
+ end
775
+ blocks.map { |b| entries_for(b) }.reduce(:merge!)
776
+ end
703
777
  db.set_bulk(h, false)
704
778
  end
705
779
 
@@ -719,8 +793,11 @@ module Bio
719
793
  if ! sid
720
794
  @max_sid += 1
721
795
  sid = @max_sid
722
- db.set("sequence:#{name}", sid.to_s)
723
- index_sequences[name] = sid
796
+ # "" << foo is hideous but apparently what it takes to get a
797
+ # non-shared copy of a string on JRuby...
798
+ name_copy = "" << name
799
+ db.set("sequence:#{name_copy}", sid.to_s)
800
+ index_sequences[name_copy] = sid
724
801
  end
725
802
  return sid
726
803
  end
@@ -739,22 +816,24 @@ module Bio
739
816
  # example: otoGar1.scaffold_104707.1-93001
740
817
  parts = seq.split('.', 2)
741
818
  if parts.size == 2
742
- species_name = parts[0]
743
- if species.has_key? species_name
744
- return species[species_name]
745
- else
746
- species_id = @species_max_id + 1
747
- if species_id >= MAX_SPECIES
748
- raise "cannot index MAF file with more than #{MAX_SPECIES} species"
749
- end
750
- species[species_name] = species_id
751
- db["species:#{species_name}"] = species_id
752
- @species_max_id = species_id
753
- return species_id
754
- end
819
+ # "" << foo is hideous but apparently what it takes to get a
820
+ # non-shared copy of a string on JRuby...
821
+ species_name = "" << parts[0]
755
822
  else
756
823
  # not in species.sequence format, apparently
757
- return nil
824
+ species_name = "" << seq
825
+ end
826
+ if species.has_key? species_name
827
+ return species[species_name]
828
+ else
829
+ species_id = @species_max_id + 1
830
+ if species_id >= MAX_SPECIES
831
+ raise "cannot index MAF file with more than #{MAX_SPECIES} species"
832
+ end
833
+ species[species_name] = species_id
834
+ db["species:#{species_name}"] = species_id
835
+ @species_max_id = species_id
836
+ return species_id
758
837
  end
759
838
  end
760
839
 
@@ -769,20 +848,27 @@ module Bio
769
848
  end
770
849
 
771
850
  def entries_for(block)
772
- unless block.ref_seq.source == @ref_seq
773
- raise "Inconsistent reference sequence: expected #{@ref_seq}, got #{block.ref_seq.source}"
774
- end
775
- h = {}
776
- val = build_block_value(block)
777
- to_index = ref_only ? [block.sequences.first] : block.sequences
778
- to_index.each do |seq|
779
- seq_id = seq_id_for(seq.source)
780
- seq_end = seq.start + seq.size
781
- bin = Bio::Ucsc::UcscBin.bin_from_range(seq.start, seq_end)
782
- key = [255, seq_id, bin, seq.start, seq_end].pack(KEY_FMT)
783
- h[key] = val
851
+ begin
852
+ unless block.ref_seq.source == @ref_seq
853
+ raise "Inconsistent reference sequence: expected #{@ref_seq}, got #{block.ref_seq.source}"
854
+ end
855
+ h = {}
856
+ val = build_block_value(block)
857
+ to_index = ref_only ? [block.sequences.first] : block.sequences
858
+ to_index.each do |seq|
859
+ seq_id = seq_id_for(seq.source)
860
+ # size 0 occurs in e.g. upstream1000.maf.gz
861
+ next if seq.size == 0
862
+ seq_end = seq.start + seq.size
863
+ bin = Bio::Ucsc::UcscBin.bin_from_range(seq.start, seq_end)
864
+ key = [255, seq_id, bin, seq.start, seq_end].pack(KEY_FMT)
865
+ h[key] = val
866
+ end
867
+ return h
868
+ rescue Exception => e
869
+ LOG.error "Failed to index block at offset #{block.offset}:\n#{block}"
870
+ raise e
784
871
  end
785
- return h
786
872
  end
787
873
  end # class KyotoIndex
788
874
 
@@ -861,6 +947,10 @@ module Bio
861
947
  @l = l
862
948
  end
863
949
 
950
+ def empty?
951
+ @l.empty?
952
+ end
953
+
864
954
  def match(entry)
865
955
  return ! @l.find { |f| ! f.call(entry) }
866
956
  end