RubyGems - bio-polyploid-tools - Versions diffs - 0.1.0 → 0.2.3 - Mend

bio-polyploid-tools 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/Gemfile +4 -3
data/Gemfile.lock +8 -8
data/README.md +45 -0
data/VERSION +1 -1
data/bin/bfr.rb +2 -7
data/bin/count_variations.rb +1 -1
data/bin/find_best_exonerate.rb +17 -0
data/bin/hexaploid_primers.rb +2 -2
data/bin/homokaryot_primers.rb +1 -1
data/bin/polymarker.rb +2 -2
data/bin/snps_between_bams.rb +37 -7
data/bio-polyploid-tools.gemspec +17 -13
data/lib/bio/BFRTools.rb +27 -261
data/lib/bio/BIOExtensions.rb +0 -124
data/lib/bio/PolyploidTools/ChromosomeArm.rb +1 -1
data/lib/bio/PolyploidTools/ExonContainer.rb +6 -5
data/lib/bio/PolyploidTools/Marker.rb +2 -2
data/lib/bio/PolyploidTools/SNP.rb +5 -4
data/lib/bio/db/exonerate.rb +1 -1
data/test/test_bfr.rb +101 -9
metadata +28 -12
data/lib/bio/SAMToolsExtensions.rb +0 -284
data/lib/bio/db/fastadb.rb +0 -164

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
-  data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
+  metadata.gz: 019bf8dc15f35de0be9a55567b8041f9b98ac326
+  data.tar.gz: 3e0a76bbefead5c5284c64a01b36645748a70098
 SHA512:
-  metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
-  data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
+  metadata.gz: 98e2d6c023ee8d89014efe65da619f0a98808c1540c3773aaef901de9f5c2338a3cc4645bdee1a3cdc430d525587c27d576d1f19e2ac8e59d7724a6efaac5901
+  data.tar.gz: 3d09c9a1972b7538eb160ee89786f9bdd3f8c52fec554da110222241767e7a17f6efd7f0f42a219fb06cd3689037fbee2ec88eeae0cb94e40333a5c491259421

data/Gemfile CHANGED

@@ -2,12 +2,13 @@ source "http://rubygems.org"
 # Add dependencies required to use your gem here.
 # Example:
 #   gem "activesupport", ">= 2.3.5"
-gem "bio", "= 1.4.2"
-gem "bio-samtools", "= 0.6.2"
+gem "bio", ">= 1.4.3"
+gem "bio-samtools", ">= 2.0.3"
 gem "rake"
 gem "jeweler"
-#gem "systemu", ">=2.5.2"
+gem "systemu", ">=2.5.2"
 group :development do
 #  gem "shoulda", ">= 0"

data/Gemfile.lock CHANGED

@@ -3,17 +3,16 @@ GEM
   specs:
     addressable (2.3.6)
     atomic (1.1.16)
-    bio (1.4.2)
-    bio-samtools (0.6.2)
+    bio (1.4.3.0001)
+    bio-samtools (2.0.3)
       bio (>= 1.4.2)
-      ffi
-      systemu (>= 2.5.2)
+      bio-svgenes (>= 0.4.1)
+    bio-svgenes (0.4.1)
     builder (3.2.2)
     descendants_tracker (0.0.4)
       thread_safe (~> 0.3, >= 0.3.1)
     faraday (0.9.0)
       multipart-post (>= 1.2, < 3)
-    ffi (1.9.3)
     git (1.2.6)
     github_api (0.11.3)
       addressable (~> 2.3)
@@ -53,7 +52,7 @@ GEM
     rake (10.2.2)
     rdoc (4.1.1)
       json (~> 1.4)
-    systemu (2.6.0)
+    systemu (2.6.4)
     thread_safe (0.3.1)
       atomic (>= 1.1.7, < 2)
@@ -61,7 +60,8 @@ PLATFORMS
   ruby
 DEPENDENCIES
-  bio (= 1.4.2)
-  bio-samtools (= 0.6.2)
+  bio (>= 1.4.3)
+  bio-samtools (>= 2.0.3)
   jeweler
   rake
+  systemu (>= 2.5.2)

data/README.md ADDED

@@ -0,0 +1,45 @@
+bio-polyploid-tools
+===================
+Introduction
+-------------
+This tools are designed to deal with polyploid wheat. The first tool is to design KASP primers,
+making them as specific as possible.
+Installation
+------------
+'gem install bio-polyploid-tools'
+You need to have in your $PATH the following programs:
+* [MAFFT]{http://mafft.cbrc.jp/alignment/software/}
+* [primer3]{http://primer3.sourceforge.net/releases.php}
+* [exonerate]{http://www.ebi.ac.uk/~guy/exonerate/}
+The code has been developed on ruby 2.1.0, but it should work on 1.9.3 and above.
+Polymarker
+----------
+To run poolymerker with the CSS wheat contigs, you need to unzip the
+reference file [Triticum_aestivum.IWGSP1.22.dna_rm.genome.fa.gz{ftp://ftp.ensemblgenomes.org/pub/release-22/plants/fasta/triticum_aestivum/dna/}.
+polymarker.rb --contigs Triticum_aestivum.IWGSP1.22.dna_rm.genome.fa --marker_list snp_list.csv --output output_folder
+The snp_list file must follow the convention
+<ID>,<Chromosome>,<SEQUENCE>
+with the SNP inside the sequence in the format [A/T]. As a reference, look at test/data/short_primer_design_test.csv
+Notes
+-----
+* If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
+BUG: Blocks with NNNs are picked and treated as semi-specific.
+BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
+TODO: If reading from a reference file, only get one reference to align when the region is queried several times
+TODO: Add a parameter file to configure the alignments.
+TODO: Produce primers for products of different sizes

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.0
1	+ 0.2.3

data/bin/bfr.rb CHANGED

@@ -1,3 +1,4 @@
+#!/usr/bin/env ruby
 require 'rubygems'
 #require 'extensions/all'
 require 'bio-samtools'
@@ -70,18 +71,12 @@ chunk_size = options[:chunk_size]
 output_filename =  options[:output_filename]
 stats_file = options[:stats_file]
-#reference = ARGV[6]
 min = chunk * chunk_size
 max = min + chunk_size
-#AvocetS
 parental_1=options[:parent_1]
-#AvocetS (Yr15)
 parental_2=options[:parent_2]
@@ -89,7 +84,7 @@ bulk_1 = options[:bulk_1]
 bulk_2 = options[:bulk_2]
-fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
+fasta_db = Bio::DB::Fasta::FastaFile.new({:fasta=>reference})
 fasta_db.load_fai_entries

data/bin/count_variations.rb CHANGED

@@ -14,7 +14,7 @@ require path
 puts  ARGV[0]
-fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
+fasta_db = Bio::DB::Fasta::FastaFile.new( {:fasta=>ARGV[0]})
 fasta_db.load_fai_entries
 bam1 =  Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})

data/bin/find_best_exonerate.rb ADDED

@@ -0,0 +1,17 @@
+#!/usr/bin/env ruby
+found_cointigs = Set.new
+Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model, :chunk=>chunk, :total_chunks=>}) do |aln|
+  if aln.identity > min_identity
+    exo_f.puts aln.line
+    unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
+      found_cointigs.add(aln.target_id)
+      entry = fasta_file.index.region_for_entry(aln.target_id)
+      raise ExonerateException.new,  "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
+      region = entry.get_full_region
+      seq = fasta_file.fetch_sequence(region)
+      contigs_f.puts(">#{aln.target_id}\n#{seq}")
+    end
+  end
+end

data/bin/hexaploid_primers.rb CHANGED

@@ -43,7 +43,7 @@ snps = Array.new
 #0. Load the fasta index
 fasta_reference_db = nil
 if fasta_reference
-  fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
+  fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
   fasta_reference_db.load_fai_entries
   p "Fasta reference: #{fasta_reference}"
 end
@@ -99,7 +99,7 @@ Dir.foreach(path_to_contigs) do |filename |
     puts filename
     target="#{path_to_contigs}/#{filename}"
-    fasta_file = Bio::DB::Fasta::FastaFile.new(target)
+    fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
     fasta_file.load_fai_entries
     Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
       if aln.identity > min_identity

data/bin/homokaryot_primers.rb CHANGED

@@ -82,7 +82,7 @@ snps = Array.new
 #0. Load the fasta index
 fasta_reference_db = nil
 if reference_file
-  fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
+  fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>reference_file})
   fasta_reference_db.load_fai_entries
   p "Fasta reference: #{reference_file}"
 end

data/bin/polymarker.rb CHANGED

@@ -87,7 +87,7 @@ snps = Array.new
 #0. Load the fasta index
 fasta_reference_db = nil
 if fasta_reference
-  fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
+  fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
   fasta_reference_db.load_fai_entries
   p "Fasta reference: #{fasta_reference}"
 end
@@ -141,7 +141,7 @@ filename=path_to_contigs
 puts filename
 target=filename
-fasta_file = Bio::DB::Fasta::FastaFile.new(target)
+fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
 fasta_file.load_fai_entries
 found_cointigs = Set.new

data/bin/snps_between_bams.rb CHANGED

@@ -15,7 +15,7 @@ require path
-fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
+fasta_db = Bio::DB::Fasta::FastaFile.new(:fasta=>ARGV[0])
 fasta_db.load_fai_entries
 bam1 =  Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
 bam2 =  Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
@@ -23,7 +23,7 @@ bam2 =  Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
 output_prefix = ARGV[3]
-block_size=300
+block_size=1000
 min_cov = ARGV[4].to_i ? ARGV[4].to_i : 10
 chunk = ARGV[5].to_i
@@ -54,6 +54,38 @@ fasta_db.index.entries.each do | r |
   begin
+<<<<<<< HEAD
+    reg_a = bam1.fetch_region({:region=>region,  :min_cov=>min_cov, :A=>1})
+    reg_b = bam2.fetch_region({:region=>region,  :min_cov=>min_cov, :A=>1})
+    cons_1 = reg_a.consensus
+    cons_2 = reg_b.consensus
+    snps_1 = cons_1.count_ambiguities
+    snps_2 = cons_2.count_ambiguities
+    called_1 = reg_a.called
+    called_2 = reg_b.called
+    snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
+    snps_per_1k_1   = (block_size * snps_1.to_f   ) / region.size
+    snps_per_1k_2   = (block_size * snps_2.to_f   ) / region.size
+    snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
+    hist_1[snps_per_1k_1.to_i] += 1
+    hist_2[snps_per_1k_2.to_i] += 1
+    table_file.print "#{r.id}\t#{region.size}\t"
+    table_file.print "#{snps_1}\t#{called_1}\t#{snps_per_1k_1}\t"
+    table_file.print "#{snps_2}\t#{called_2}\t#{snps_per_1k_2}\t"
+    table_file.print "#{snps_tot}\t#{snps_per_1k_tot}\n"
+    fasta_file.puts ">#{r.id}_1"
+    fasta_file.puts "#{cons_1}"
+    fasta_file.puts ">#{r.id}_2"
+    fasta_file.puts "#{cons_2}"
+=======
     cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
     cons_2 = bam2.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
@@ -62,13 +94,10 @@ fasta_db.index.entries.each do | r |
       snps_1 = cons_1.count_ambiguities
       snps_2 = cons_2.count_ambiguities
-      called_1 = cons_1.upper_case_count
-      called_2 = cons_2.upper_case_count
       snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
-      snps_per_1k_1   = (block_size * snps_1.to_f   ) / called_1
-      snps_per_1k_2   = (block_size * snps_2.to_f   ) / called_2
+      snps_per_1k_1   = (block_size * snps_1.to_f   ) / region.size
+      snps_per_1k_2   = (block_size * snps_2.to_f   ) / region.size
       snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
       hist_1[snps_per_1k_1.to_i] += 1
@@ -83,6 +112,7 @@ fasta_db.index.entries.each do | r |
       fasta_file.puts ">#{r.id}_2"
       fasta_file.puts "#{cons_2}"
     end
+>>>>>>> 1b60bd09fdb1b087d6cb53c643ff36e536efe4a3
   rescue Exception => e
     $stderr.puts "Unable to process #{region}: #{e.to_s}"
   end

data/bio-polyploid-tools.gemspec CHANGED

@@ -2,32 +2,35 @@
 # DO NOT EDIT THIS FILE DIRECTLY
 # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
 # -*- encoding: utf-8 -*-
-# stub: bio-polyploid-tools 0.1.0 ruby lib
+# stub: bio-polyploid-tools 0.2.3 ruby lib
 Gem::Specification.new do |s|
   s.name = "bio-polyploid-tools"
-  s.version = "0.1.0"
+  s.version = "0.2.3"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.require_paths = ["lib"]
   s.authors = ["Ricardo H.  Ramirez-Gonzalez"]
-  s.date = "2014-03-31"
+  s.date = "2014-04-27"
   s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
   s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
-  s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
+  s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "find_best_exonerate.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
   s.extra_rdoc_files = [
-    "README"
+    "README",
+    "README.md"
   ]
   s.files = [
     "Gemfile",
     "Gemfile.lock",
     "README",
+    "README.md",
     "Rakefile",
     "VERSION",
     "bin/bfr.rb",
     "bin/count_variations.rb",
     "bin/filter_blat_by_target_coverage.rb",
     "bin/find_best_blat_hit.rb",
+    "bin/find_best_exonerate.rb",
     "bin/hexaploid_primers.rb",
     "bin/homokaryot_primers.rb",
     "bin/map_markers_to_contigs.rb",
@@ -78,9 +81,7 @@ Gem::Specification.new do |s|
     "lib/bio/PolyploidTools/PrimerRegion.rb",
     "lib/bio/PolyploidTools/SNP.rb",
     "lib/bio/PolyploidTools/SNPSequence.rb",
-    "lib/bio/SAMToolsExtensions.rb",
     "lib/bio/db/exonerate.rb",
-    "lib/bio/db/fastadb.rb",
     "lib/bio/db/primer3.rb",
     "lib/bioruby-polyploid-tools.rb",
     "test/data/BS00068396_51.fa",
@@ -119,21 +120,24 @@ Gem::Specification.new do |s|
     s.specification_version = 4
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
-      s.add_runtime_dependency(%q<bio>, ["= 1.4.2"])
-      s.add_runtime_dependency(%q<bio-samtools>, ["= 0.6.2"])
+      s.add_runtime_dependency(%q<bio>, [">= 1.4.3"])
+      s.add_runtime_dependency(%q<bio-samtools>, [">= 2.0.3"])
       s.add_runtime_dependency(%q<rake>, [">= 0"])
       s.add_runtime_dependency(%q<jeweler>, [">= 0"])
+      s.add_runtime_dependency(%q<systemu>, [">= 2.5.2"])
     else
-      s.add_dependency(%q<bio>, ["= 1.4.2"])
-      s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
+      s.add_dependency(%q<bio>, [">= 1.4.3"])
+      s.add_dependency(%q<bio-samtools>, [">= 2.0.3"])
       s.add_dependency(%q<rake>, [">= 0"])
       s.add_dependency(%q<jeweler>, [">= 0"])
+      s.add_dependency(%q<systemu>, [">= 2.5.2"])
     end
   else
-    s.add_dependency(%q<bio>, ["= 1.4.2"])
-    s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
+    s.add_dependency(%q<bio>, [">= 1.4.3"])
+    s.add_dependency(%q<bio-samtools>, [">= 2.0.3"])
     s.add_dependency(%q<rake>, [">= 0"])
     s.add_dependency(%q<jeweler>, [">= 0"])
+    s.add_dependency(%q<systemu>, [">= 2.5.2"])
   end
 end

data/lib/bio/BFRTools.rb CHANGED

@@ -5,252 +5,16 @@ require 'rubygems'
 #require 'bio/db/vcf'
 require 'pathname'
 #require_relative 'BIOExtensions.rb'
-require_relative 'db/fastadb.rb'
 require 'bio'
+require 'bio-samtools'
 require "set"
 require 'systemu'
 require 'json'
 #require 'strmask'
-=begin
-Extends the methods to be able to calculate the BFR and a consensus from the pileup
-=end
-class Bio::DB::Pileup
-  #attr_accessor :minumum_ratio_for_iup_consensus
-  #@minumum_ratio_for_iup_consensus = 0.20
-  #Returns a hash with the count of bases
-  def bases
-    return @bases if @bases
-    @bases = self.non_refs
-    #puts self.ref_count
-    @bases[self.ref_base.upcase.to_sym] = self.ref_count
-    @bases
-  end
-  def base_coverage
-    total = 0
-    @bases.each do |k,v|
-      total += v
-    end
-    total
-  end
-  def base_ratios
-    return @base_ratios if @base_ratios
-    bases = self.bases
-    @base_ratios = Hash.new
-    bases.each do |k,v|
-      @base_ratios[k] = v.to_f/self.base_coverage.to_f
-    end
-    @base_ratios
-  end
-  # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
-  def consensus_iuap(minumum_ratio_for_iup_consensus)
-    minumum_ratio_for_iup_consensus
-    if @consensus_iuap.nil?
-      @consensus_iuap = self.ref_base.downcase
-      bases = self.bases
-      tmp = String.new
-      bases.each do |k,v|
-        tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
-      end
-      if tmp.length > 0
-        @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
-      end
-    end
-    @consensus_iuap
-  end
-end
-class Bio::DB::Fasta::Region
-  attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
-  #TODO: Debug, as it hasnt been tested in the actual code.
-  def base_ratios_for_base(base)
-    @all_ratios = Hash.new unless @all_ratios
-    unless @all_ratios[base]
-      ratios = Array.new
-      for i in (0..region.size-1)
-        ratios << @base_ratios[i][base]
-      end
-      @all_ratios[base] = ratios
-    end
-    @all_ratios[base]
-  end
-end
-class Bio::DB::Sam::SAMException < RuntimeError
-end
-class Bio::DB::Sam
-  attr_accessor :minumum_ratio_for_iup_consensus
-  attr_reader :cached_regions
-  #attr_accessor :pileup_cache
-  @minumum_ratio_for_iup_consensus = 0.20
-  #Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
-  #the pile for different operations, it won't execute the mpilup command several times
-  #Whenever you finish using a region, call mpileup_clear_cache to free the cache
-  #The argument Region is required, as it will be the key for the underlying hash.
-  #We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
-  #
-  #TODO: It may be good to load partially the pileup
-  def mpileup_cached (opts={})
-    raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
-    @pileup_cache = Hash.new unless @pileup_cache
-    @cached_regions = Hash.new unless @cached_regions
-    region = opts[:r] ? opts[:r] : opts[:region]
-    opts[:r] = "'#{region.to_s}'"
-    opts[:region] = "'#{region.to_s}'"
-    opts[:A] = true
-    #reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
-    unless @cached_regions[region.to_s]
-      @cached_regions[region.to_s] =  Bio::DB::Fasta::Region.parse_region(region.to_s)
-      tmp = Array.new
-      @cached_regions[region.to_s].pileup =  tmp
-      #puts "Loading #{region.to_s}"
-      mpileup(opts) do | pile |
-        #   puts pile
-        tmp << pile
-        yield pile
-      end
-    else
-      #   puts "Loaded, reruning #{region.to_s}"
-      @cached_regions.pileup[region.to_s] .each do | pile |
-        yield pile
-      end
-    end
-  end
-  #Clears the pileup cache. If a region is passed as argument, just the specified region is removed
-  #If no region is passed, the hash is emptied
-  def mpileup_clear_cache (region)
-    return unless @cached_regions
-    if region
-      @cached_regions[region.to_s] = nil
-    else
-      @cached_regions.clear
-    end
-  end
-  #Gets the coverage of a region from a pileup.
-  def average_coverage_from_pileup(opts={})
-    opts[:region] =   opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
-    region = opts[:region]
-    calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
-    @cached_regions[region].average_coverage
-  end
-  #
-  def coverages_from_pileup(opts={})
-    opts[:region] =   opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
-    region = opts[:region]
-    calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
-    @cached_regions[region].coverages
-  end
-  def consensus_with_ambiguities(opts={})
-    opts[:region] =   opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
-    region = opts[:region]
-    #   p "consensus with ambiguities for: " << opts[:region]
-    calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
-    @cached_regions[region].consensus
-  end
-  def calculate_stats_from_pile(opts={})
-    min_cov = opts[:min_cov] ? opts[:min_cov] : 20
-    opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s)  unless opts[:region].class == Bio::DB::Fasta::Region
-    region = opts[:region]
-    reference = self.fetch_reference(region.entry, region.start, region.end).downcase
-    #  p "calculationg from pile..." << region.to_s
-    base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
-    bases = Array.new(region.size, BASE_COUNT_ZERO)
-    coverages = Array.new(region.size, 0)
-    total_cov = 0
-    self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
-      #puts pile
-      #puts pile.coverage
-      if pile.coverage > min_cov
-        base_ratios[pile.pos - region.start ] = pile.base_ratios
-        reference[pile.pos - region.start   ] = pile.consensus_iuap(0.20)
-        coverages[pile.pos - region.start    ]  = pile.coverage.to_i
-        bases[pile.pos - region.start   ]  = pile.bases
-      end
-      total_cov += pile.coverage
-    end
-    region = @cached_regions[region.to_s]
-    region.coverages = coverages
-    region.base_ratios = base_ratios
-    region.consensus = reference
-    region.average_coverage = total_cov.to_f/region.size.to_f
-    region.bases = bases
-    region
-  end
-  BASE_COUNT_ZERO =  {:A => 0, :C => 0, :G => 0,  :T => 0}
-  #Gets an array with the proportions of the bases in the region. If there is no coverage, a
-  def base_ratios_in_region(opts={})
-    opts[:region] =   opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
-    region = opts[:region]
-    calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
-    @cached_regions[region].base_ratios
-  end
-  #Gets an array with the bsaes count in the region. If there is no coverage, a
-  def bases_in_region(opts={})
-    opts[:region] =   opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
-    region = opts[:region]
-    calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
-    @cached_regions[region].bases
-  end
-  def extract_reads(opts={})
-    opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s)  unless opts[:region].class == Bio::DB::Fasta::Region
-    fastq_filename = opts[:fastq]
-    fastq_file = opts[:fastq_file]
-    out = $stdout
-    print_fastq = Proc.new do |alignment|
-      out.puts "@#{alignment.qname}"
-      out.puts "#{alignment.seq}"
-      out.puts "+#{alignment.qname}"
-      out.puts "#{alignment.qual}"
-    end
-    fetch_with_function(chromosome, qstart, qstart+len,  print_fastq)
-  end
-end
 module Bio::BFRTools
@@ -267,7 +31,7 @@ module Bio::BFRTools
     BASES = [:A, :C, :G, :T]
     #Sets the reference file
     def reference(path)
-      @reference_db = Bio::DB::Fasta::FastaFile.new(path)
+      @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>path})
       @reference_path = path
     end
@@ -350,33 +114,35 @@ module Bio::BFRTools
       self.entry = reg.entry
       self.start = reg.start
       self.end   = reg.end
+      opts[:region] = reg
       @container = opts[:container]
-      parental_1_sam = @container.parental_1_sam
-      parental_2_sam = @container.parental_2_sam
-      bulk_1_sam = @container.bulk_1_sam
-      bulk_2_sam = @container.bulk_2_sam
-      @parental_1_sequence = parental_1_sam.consensus_with_ambiguities(opts)
-      @parental_2_sequence = parental_2_sam.consensus_with_ambiguities(opts)
+      parental_1_reg = @container.parental_1_sam.fetch_region(opts)
+      parental_2_reg = @container.parental_2_sam.fetch_region(opts)
+      bulk_1_reg = @container.bulk_1_sam.fetch_region(opts)
+      bulk_2_reg = @container.bulk_2_sam.fetch_region(opts)
-      @bulk_1_sequence = bulk_1_sam.consensus_with_ambiguities(opts)
-      @bulk_2_sequence = bulk_2_sam.consensus_with_ambiguities(opts)
+      @parental_1_sequence = parental_1_reg.consensus
+      @parental_2_sequence = parental_2_reg.consensus
+      @bulk_1_sequence = bulk_1_reg.consensus
+      @bulk_2_sequence = bulk_2_reg.consensus
       @snp_count = Container.snps_between( @parental_1_sequence , @parental_2_sequence )
-      @ratios_bulk_1 = bulk_1_sam.base_ratios_in_region(opts)
-      @ratios_bulk_2 = bulk_2_sam.base_ratios_in_region(opts)
+      @ratios_bulk_1 = bulk_1_reg.base_ratios
+      @ratios_bulk_2 = bulk_2_reg.base_ratios
-      @bases_bulk_1 = bulk_1_sam.bases_in_region(opts)
-      @bases_bulk_2 = bulk_2_sam.bases_in_region(opts)
+      @bases_bulk_1 = bulk_1_reg.bases
+      @bases_bulk_2 = bulk_2_reg.bases
-      @avg_cov_bulk_1 = bulk_1_sam.average_coverage_from_pileup(opts)
-      @avg_cov_bulk_2 = bulk_2_sam.average_coverage_from_pileup(opts)
+      @avg_cov_bulk_1 = bulk_1_reg.average_coverage
+      @avg_cov_bulk_2 = bulk_2_reg.average_coverage
-      @coverages_1 =  bulk_1_sam.coverages_from_pileup(opts)
-      @coverages_2 =  bulk_2_sam.coverages_from_pileup(opts)
+      @coverages_1 =  bulk_1_reg.coverages
+      @coverages_2 =  bulk_2_reg.coverages
     end
@@ -472,7 +238,7 @@ module Bio::BFRTools
         raise BFRToolsException.new ("The reference for the line should be :first or :second, but was " + reference.to_s )
       end
-      relative_position = self.start +  position + 1
+      relative_position = self.start +  position
       bfr = bfrs[reference][base][position]
       cov_1 = @coverages_1[position]
@@ -622,7 +388,7 @@ module Bio::BFRTools
     end
     def process_region(opts={})
-      opts = { :min_cov=>20, :max_snp_1kbp => 10 }.merge!(opts)
+      opts = { :min_cov=>20, :max_snp_1kbp => 10, :max_per=>0.20 }.merge!(opts)
       @proccesed_regions += 1
       output = opts[:output_file] ? opts[:output_file] : $stdout
@@ -675,7 +441,7 @@ module Bio::BFRTools
               for informative in info
-                line = region.get_bfr_line(i+1, base, informative)
+                line = region.get_bfr_line(i, base, informative)
                 output.print  line , "\n"
               end
             end