bio-polyploid-tools 0.10.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/polymarker.rb +23 -19
- data/bin/polymarker_capillary.rb +75 -51
- data/bin/{find_homoeologue_variations.rb → polymarker_deletions.rb} +55 -90
- data/bio-polyploid-tools.gemspec +5 -7
- data/lib/bio/PolyploidTools/ExonContainer.rb +3 -3
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +38 -32
- data/lib/bio/PolyploidTools/SNP.rb +6 -5
- data/lib/bio/db/blast.rb +1 -1
- data/lib/bio/db/primer3.rb +14 -17
- metadata +4 -6
- data/README +0 -21
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: a8d10f674380ca0d78e0efbbf5bd81e44327fd66dfcbc5f9443891ebad6f2ee5
         | 
| 4 | 
            +
              data.tar.gz: b787eef663d8c1b2932b38a877bb870521e71c72f6584d9b08d3ebf0c937b36e
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 4fdad615441a69e1af27e9ca23949e57b36c100773ed17ced255bec11c6d1d04778622199e832901861c0494fea018155bbf2d9b737f1672e342b88197123782
         | 
| 7 | 
            +
              data.tar.gz: 074c38a5d9b59a116509a45e43d406bcc113cecfa83029239d748128715e74815fbbbb8880035abfb6272d96048dd5fb029fd363f75f699abadf46135ad67bc0
         | 
    
        data/VERSION
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            0. | 
| 1 | 
            +
            1.0.0
         | 
    
        data/bin/polymarker.rb
    CHANGED
    
    | @@ -40,7 +40,7 @@ options[:scoring] = :genome_specific | |
| 40 40 | 
             
            options[:database]  = false
         | 
| 41 41 | 
             
            options[:filter_best]  = false
         | 
| 42 42 | 
             
            options[:aligner] = :blast
         | 
| 43 | 
            -
             | 
| 43 | 
            +
            options[:max_hits] = 8
         | 
| 44 44 |  | 
| 45 45 | 
             
            options[:primer_3_preferences] = {
         | 
| 46 46 | 
             
                  :primer_product_size_range => "50-150" ,
         | 
| @@ -132,6 +132,10 @@ OptionParser.new do |opts| | |
| 132 132 | 
             
              opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
         | 
| 133 133 | 
             
                options[:database] = o
         | 
| 134 134 | 
             
              end
         | 
| 135 | 
            +
             | 
| 136 | 
            +
              opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
         | 
| 137 | 
            +
                options[:max_hits] = o.to_i
         | 
| 138 | 
            +
              end
         | 
| 135 139 | 
             
            end.parse!
         | 
| 136 140 |  | 
| 137 141 |  | 
| @@ -233,8 +237,8 @@ File.open(test_file) do | f | | |
| 233 237 | 
             
                   region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
         | 
| 234 238 | 
             
                   snp.template_sequence = fasta_reference_db.fetch_sequence(region)
         | 
| 235 239 | 
             
                 else
         | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 240 | 
            +
                  write_status "WARN: Unable to find entry for #{snp.gene}"
         | 
| 241 | 
            +
                end
         | 
| 238 242 | 
             
                elsif options[:mutant_list] and options[:reference] #List and fasta file
         | 
| 239 243 | 
             
                  snp = Bio::PolyploidTools::SNPMutant.parse(line)
         | 
| 240 244 | 
             
                  entry = fasta_reference_db.index.region_for_entry(snp.contig)
         | 
| @@ -242,21 +246,21 @@ File.open(test_file) do | f | | |
| 242 246 | 
             
                   region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
         | 
| 243 247 | 
             
                   snp.full_sequence = fasta_reference_db.fetch_sequence(region)
         | 
| 244 248 | 
             
                 else
         | 
| 245 | 
            -
             | 
| 246 | 
            -
                  end
         | 
| 247 | 
            -
                else
         | 
| 248 | 
            -
                  raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. " 
         | 
| 249 | 
            -
                end
         | 
| 250 | 
            -
                raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
         | 
| 251 | 
            -
             | 
| 252 | 
            -
                snp.genomes_count = options[:genomes_count]
         | 
| 253 | 
            -
                snp.snp_in = snp_in
         | 
| 254 | 
            -
                snp.original_name = original_name
         | 
| 255 | 
            -
                if snp.position 
         | 
| 256 | 
            -
                  snps << snp
         | 
| 257 | 
            -
                else
         | 
| 258 | 
            -
                  $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
         | 
| 249 | 
            +
                  write_status "WARN: Unable to find entry for #{snp.gene}"
         | 
| 259 250 | 
             
                end
         | 
| 251 | 
            +
              else
         | 
| 252 | 
            +
                raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. " 
         | 
| 253 | 
            +
              end
         | 
| 254 | 
            +
              raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
         | 
| 255 | 
            +
              snp.max_hits = options[:max_hits]
         | 
| 256 | 
            +
              snp.genomes_count = options[:genomes_count]
         | 
| 257 | 
            +
              snp.snp_in = snp_in
         | 
| 258 | 
            +
              snp.original_name = original_name
         | 
| 259 | 
            +
              if snp.position 
         | 
| 260 | 
            +
                snps << snp
         | 
| 261 | 
            +
              else
         | 
| 262 | 
            +
                $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
         | 
| 263 | 
            +
              end
         | 
| 260 264 | 
             
              end
         | 
| 261 265 | 
             
            end
         | 
| 262 266 |  | 
| @@ -307,7 +311,7 @@ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options) | |
| 307 311 |  | 
| 308 312 | 
             
            end
         | 
| 309 313 |  | 
| 310 | 
            -
            Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
         | 
| 314 | 
            +
            Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
         | 
| 311 315 | 
             
              do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
         | 
| 312 316 | 
             
            end if options[:aligner] == :blast
         | 
| 313 317 |  | 
| @@ -334,7 +338,7 @@ container.gene_models(temp_fasta_query) | |
| 334 338 | 
             
            container.chromosomes(target)
         | 
| 335 339 | 
             
            container.add_parental({:name=>snp_in})
         | 
| 336 340 | 
             
            container.add_parental({:name=>original_name})
         | 
| 337 | 
            -
             | 
| 341 | 
            +
            container.max_hits = options[:max_hits]
         | 
| 338 342 | 
             
            snps.each do |snp|
         | 
| 339 343 | 
             
              snp.container = container
         | 
| 340 344 | 
             
              snp.flanking_size = container.flanking_size
         | 
    
        data/bin/polymarker_capillary.rb
    CHANGED
    
    | @@ -35,15 +35,21 @@ options[:primer_3_preferences] = { | |
| 35 35 | 
             
            }
         | 
| 36 36 | 
             
            options[:genomes_count] = 3
         | 
| 37 37 | 
             
            options[:allow_non_specific] = false
         | 
| 38 | 
            +
            options[:aligner] = :blast
         | 
| 39 | 
            +
            options[:arm_selection]
         | 
| 40 | 
            +
            model="ungapped" 
         | 
| 41 | 
            +
            options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
         | 
| 42 | 
            +
            options[:database]  = false 
         | 
| 38 43 |  | 
| 39 44 | 
             
            OptionParser.new do |opts|
         | 
| 40 | 
            -
              opts.banner = "Usage:  | 
| 45 | 
            +
              opts.banner = "Usage: polymarker_deletions.rb [options]"
         | 
| 41 46 |  | 
| 42 47 | 
             
              opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
         | 
| 43 48 | 
             
                options[:reference] = o
         | 
| 44 49 | 
             
              end
         | 
| 45 50 |  | 
| 46 | 
            -
              opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome  | 
| 51 | 
            +
              opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome 
         | 
| 52 | 
            +
                should match the names to the entries in the fasta files as it is used as main target") do |o|
         | 
| 47 53 | 
             
                options[:markers] = o
         | 
| 48 54 | 
             
              end
         | 
| 49 55 |  | 
| @@ -53,10 +59,19 @@ OptionParser.new do |opts| | |
| 53 59 | 
             
              opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
         | 
| 54 60 | 
             
                options[:genomes_count] = o.to_i
         | 
| 55 61 | 
             
              end
         | 
| 56 | 
            -
              opts.on("- | 
| 62 | 
            +
              opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
         | 
| 57 63 | 
             
                options[:allow_non_specific] = true
         | 
| 58 64 | 
             
              end
         | 
| 59 65 |  | 
| 66 | 
            +
              opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
         | 
| 67 | 
            +
                options[:database] = o
         | 
| 68 | 
            +
              end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
             | 
| 71 | 
            +
              opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
         | 
| 72 | 
            +
                options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
         | 
| 73 | 
            +
              end
         | 
| 74 | 
            +
             | 
| 60 75 | 
             
            end.parse!
         | 
| 61 76 |  | 
| 62 77 |  | 
| @@ -65,23 +80,33 @@ reference     = options[:reference] | |
| 65 80 | 
             
            markers       = options[:markers]
         | 
| 66 81 | 
             
            output_folder = options[:output_folder]
         | 
| 67 82 | 
             
            allow_non_specific = options[:allow_non_specific]
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            options[:database] = options[:reference] unless  options[:database] 
         | 
| 85 | 
            +
            temp_fasta_query="#{output_folder}/to_align.fa"
         | 
| 68 86 | 
             
            log "Output folder: #{output_folder}"
         | 
| 69 87 | 
             
            exonerate_file="#{output_folder}/exonerate_tmp.tab"
         | 
| 70 88 | 
             
            Dir.mkdir(output_folder)
         | 
| 89 | 
            +
            arm_selection = options[:arm_selection]
         | 
| 71 90 |  | 
| 72 91 | 
             
            module Bio::PolyploidTools
         | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 92 |  | 
| 76 93 | 
             
              class SequenceToAmplify < SNP
         | 
| 77 94 |  | 
| 78 | 
            -
                def self.select_chromosome( | 
| 79 | 
            -
             | 
| 80 | 
            -
                   | 
| 81 | 
            -
                  ret =  | 
| 82 | 
            -
                   | 
| 83 | 
            -
             | 
| 84 | 
            -
                   | 
| 95 | 
            +
                def self.select_chromosome(gene_name, arm_selection)
         | 
| 96 | 
            +
                  #m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
         | 
| 97 | 
            +
                  #m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
         | 
| 98 | 
            +
                  #ret = {:group : m[1],
         | 
| 99 | 
            +
                  #       :genome : m[2],:version=>m[3],:chr_id=>m[4]}
         | 
| 100 | 
            +
                
         | 
| 101 | 
            +
                  
         | 
| 102 | 
            +
                  #arr = contig_name.split('_')
         | 
| 103 | 
            +
                  #ret = "U"
         | 
| 104 | 
            +
                  #ret = arr[2][0,2] if arr.size >= 3
         | 
| 105 | 
            +
                  #ret = "3B" if arr.size == 2 and arr[0] == "v443"
         | 
| 106 | 
            +
                  #ret = arr[0][0,2] if arr.size == 1   
         | 
| 107 | 
            +
                  #ret = "#{m[1]}#{m[2]}"
         | 
| 108 | 
            +
                  #puts ret
         | 
| 109 | 
            +
                  ret = arm_selection.call(gene_name)
         | 
| 85 110 | 
             
                  return ret
         | 
| 86 111 | 
             
                end
         | 
| 87 112 |  | 
| @@ -92,18 +117,18 @@ module Bio::PolyploidTools | |
| 92 117 | 
             
                #Format: 
         | 
| 93 118 | 
             
                #A fasta entry with the id: contig:start-end
         | 
| 94 119 | 
             
                #The sequence can be prodcued with samtools faidx
         | 
| 95 | 
            -
                def self.parse(fasta_entry)
         | 
| 96 | 
            -
             | 
| 120 | 
            +
                def self.parse(fasta_entry, arm_selection)
         | 
| 121 | 
            +
                  #puts fasta_entry.definition
         | 
| 97 122 | 
             
                  snp = SequenceToAmplify.new
         | 
| 98 123 | 
             
                  match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
         | 
| 99 | 
            -
                  
         | 
| 124 | 
            +
                  #puts match_data.inspect
         | 
| 100 125 | 
             
                  rName = Regexp.last_match(:rname)
         | 
| 101 126 | 
             
                  rStart =  Regexp.last_match(:rstart).to_i
         | 
| 102 127 | 
             
                  rEnd =  Regexp.last_match(:rend).to_i
         | 
| 103 128 | 
             
                  snp.gene = fasta_entry.definition
         | 
| 104 129 | 
             
                  #snp.chromosome=rName
         | 
| 105 | 
            -
             | 
| 106 | 
            -
                  snp.chromosome=select_chromosome( | 
| 130 | 
            +
                  #puts "Gene: #{snp.gene}"
         | 
| 131 | 
            +
                  snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
         | 
| 107 132 | 
             
                  #puts "#{rName}: #{snp.chromosome}"
         | 
| 108 133 | 
             
                  snp.sequence_original = fasta_entry.seq
         | 
| 109 134 | 
             
                  snp.template_sequence = fasta_entry.seq.upcase
         | 
| @@ -111,7 +136,7 @@ module Bio::PolyploidTools | |
| 111 136 | 
             
                  snp.rstart = rStart
         | 
| 112 137 | 
             
                  snp.rend = rEnd
         | 
| 113 138 |  | 
| 114 | 
            -
                  snp.position   =  | 
| 139 | 
            +
                  snp.position   = snp.sequence_original.size / 2
         | 
| 115 140 | 
             
                  snp.original   = snp.sequence_original[snp.position]
         | 
| 116 141 |  | 
| 117 142 | 
             
                  tmp =  Bio::Sequence::NA.new(snp.original)
         | 
| @@ -232,10 +257,13 @@ file = Bio::FastaFormat.open(markers) | |
| 232 257 | 
             
            file.each do |entry|
         | 
| 233 258 |  | 
| 234 259 | 
             
              begin
         | 
| 235 | 
            -
                 | 
| 260 | 
            +
                #puts entry.inspect
         | 
| 261 | 
            +
                tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
         | 
| 236 262 | 
             
                snps << tmp if tmp
         | 
| 237 | 
            -
              rescue
         | 
| 263 | 
            +
              rescue Exception => e
         | 
| 264 | 
            +
                log "ERROR\t#{e.message}"
         | 
| 238 265 | 
             
                $stderr.puts "Unable to generate the marker for: #{entry.definition}"
         | 
| 266 | 
            +
                $stderr.puts e.backtrace
         | 
| 239 267 | 
             
              end
         | 
| 240 268 |  | 
| 241 269 | 
             
            end
         | 
| @@ -251,40 +279,33 @@ fasta_file.load_fai_entries | |
| 251 279 | 
             
            min_identity = 95
         | 
| 252 280 | 
             
            found_contigs = Set.new
         | 
| 253 281 |  | 
| 254 | 
            -
             | 
| 282 | 
            +
             | 
| 283 | 
            +
            def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
         | 
| 255 284 | 
             
              if aln.identity > min_identity
         | 
| 256 285 | 
             
                exo_f.puts aln.line
         | 
| 257 | 
            -
                #puts aln.line
         | 
| 258 286 | 
             
                unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file. 
         | 
| 259 287 | 
             
                  found_contigs.add(aln.target_id)
         | 
| 260 288 | 
             
                  entry = fasta_file.index.region_for_entry(aln.target_id)
         | 
| 261 | 
            -
                  raise  | 
| 289 | 
            +
                  raise ExonerateException.new,  "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
         | 
| 290 | 
            +
                  if options[:extract_found_contigs]
         | 
| 291 | 
            +
                    region = entry.get_full_region
         | 
| 292 | 
            +
                    seq = fasta_file.fetch_sequence(region)
         | 
| 293 | 
            +
                    contigs_f.puts(">#{aln.target_id}\n#{seq}") 
         | 
| 294 | 
            +
                  end
         | 
| 262 295 | 
             
                end
         | 
| 263 296 | 
             
              end  
         | 
| 264 | 
            -
            end
         | 
| 265 | 
            -
            exo_f.close
         | 
| 266 | 
            -
             | 
| 267 | 
            -
            arm_selection_functions = Hash.new
         | 
| 268 297 |  | 
| 269 | 
            -
            arm_selection_functions[:full_scaffold] = lambda do | contig_name |    
         | 
| 270 | 
            -
              return contig_name
         | 
| 271 298 | 
             
            end
         | 
| 272 299 |  | 
| 273 | 
            -
             | 
| 274 | 
            -
             | 
| 275 | 
            -
             | 
| 276 | 
            -
            #And with the cases when 3B is named with the prefix: v443
         | 
| 277 | 
            -
            arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
         | 
| 278 | 
            -
              
         | 
| 279 | 
            -
              arr = contig_name.split('_')
         | 
| 280 | 
            -
              ret = "U"
         | 
| 281 | 
            -
              ret = arr[2][0,2] if arr.size >= 3
         | 
| 282 | 
            -
              ret = "3B" if arr.size == 2 and arr[0] == "v443"
         | 
| 283 | 
            -
              ret = arr[0][0,2] if arr.size == 1   
         | 
| 284 | 
            -
              return ret
         | 
| 285 | 
            -
            end
         | 
| 300 | 
            +
            Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
         | 
| 301 | 
            +
              do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
         | 
| 302 | 
            +
            end if options[:aligner] == :blast
         | 
| 286 303 |  | 
| 304 | 
            +
            Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
         | 
| 305 | 
            +
              do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
         | 
| 306 | 
            +
            end if options[:aligner] == :exonerate
         | 
| 287 307 |  | 
| 308 | 
            +
            exo_f.close
         | 
| 288 309 |  | 
| 289 310 | 
             
            container= Bio::PolyploidTools::ExonContainer.new
         | 
| 290 311 | 
             
            container.flanking_size=500 
         | 
| @@ -292,6 +313,7 @@ container.gene_models(markers) | |
| 292 313 | 
             
            container.chromosomes(target)
         | 
| 293 314 | 
             
            container.add_parental({:name=>"A"})
         | 
| 294 315 | 
             
            container.add_parental({:name=>"B"})
         | 
| 316 | 
            +
            #puts "SNPs size: #{snps.size}"
         | 
| 295 317 | 
             
            snps.each do |snp|
         | 
| 296 318 | 
             
              snp.snp_in = "B"
         | 
| 297 319 | 
             
              snp.container = container
         | 
| @@ -300,8 +322,10 @@ snps.each do |snp| | |
| 300 322 | 
             
              snp.includeNoSpecific = allow_non_specific
         | 
| 301 323 | 
             
              container.add_snp(snp)
         | 
| 302 324 | 
             
            end
         | 
| 303 | 
            -
            container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
         | 
| 304 325 |  | 
| 326 | 
            +
            container.add_alignments({:exonerate_file=>exonerate_file, 
         | 
| 327 | 
            +
              :arm_selection=> arm_selection,
         | 
| 328 | 
            +
              :min_identity=>min_identity})
         | 
| 305 329 |  | 
| 306 330 |  | 
| 307 331 | 
             
            exons_filename="#{output_folder}/localAlignment.fa"
         | 
| @@ -329,6 +353,9 @@ output_file  = "#{output_folder}/primers.csv" | |
| 329 353 | 
             
            file = File.open(masks_output, "w")
         | 
| 330 354 | 
             
            out  = File.open(output_file,  "w")
         | 
| 331 355 |  | 
| 356 | 
            +
            out.puts ["Id","specificity","inside","type","target","orientation","product_size",
         | 
| 357 | 
            +
              "left_position","left_tm","left_sequence",
         | 
| 358 | 
            +
            "right_position","right_tm","right_sequence"].join ","
         | 
| 332 359 | 
             
            class Bio::DB::Primer3::Primer3Record
         | 
| 333 360 | 
             
              attr_accessor :primerPairs
         | 
| 334 361 | 
             
            end
         | 
| @@ -358,10 +385,7 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record | | |
| 358 385 |  | 
| 359 386 | 
             
              file.puts ">#{seq_id}\n#{sequence_template}"
         | 
| 360 387 | 
             
              file.puts ">#{seq_id}:mask\n#{sequence_mask}"
         | 
| 361 | 
            -
             | 
| 362 | 
            -
             | 
| 363 | 
            -
               #puts primer3record.primerPairs
         | 
| 364 | 
            -
             | 
| 388 | 
            +
              
         | 
| 365 389 | 
             
               primer3record.primerPairs.each do |p| 
         | 
| 366 390 | 
             
                #puts p.inspect
         | 
| 367 391 | 
             
                printed += 1   
         | 
| @@ -381,10 +405,10 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record | | |
| 381 405 | 
             
                toPrint <<  p.right.sequence
         | 
| 382 406 |  | 
| 383 407 | 
             
                middle = 501 
         | 
| 384 | 
            -
                toPrint << lArr[0]
         | 
| 385 | 
            -
                toPrint << rArr[0]
         | 
| 386 | 
            -
                toPrint << middle - lArr[0]
         | 
| 387 | 
            -
                toPrint << rArr[0] - middle
         | 
| 408 | 
            +
                #toPrint << lArr[0]
         | 
| 409 | 
            +
                #toPrint << rArr[0]
         | 
| 410 | 
            +
                #toPrint << middle - lArr[0]
         | 
| 411 | 
            +
                #toPrint << rArr[0] - middle
         | 
| 388 412 | 
             
            #Start End LeftDistance  RightDistance
         | 
| 389 413 |  | 
| 390 414 | 
             
                out.puts toPrint.join(",")
         | 
| @@ -53,14 +53,12 @@ class Bio::PolyploidTools::ExonContainer | |
| 53 53 | 
             
            end
         | 
| 54 54 |  | 
| 55 55 | 
             
            class Bio::DB::Primer3::SNP
         | 
| 56 | 
            -
             | 
| 57 56 | 
             
              def to_s
         | 
| 58 57 | 
             
                 "#{gene}:#{snp_from.chromosome}"
         | 
| 59 58 | 
             
              end
         | 
| 60 | 
            -
             | 
| 61 59 | 
             
            end
         | 
| 62 | 
            -
            class Bio::DB::Primer3::Primer3Record
         | 
| 63 60 |  | 
| 61 | 
            +
            class Bio::DB::Primer3::Primer3Record
         | 
| 64 62 |  | 
| 65 63 | 
             
              def best_pair
         | 
| 66 64 | 
             
                return @best_pair if @best_pair
         | 
| @@ -82,7 +80,7 @@ class Bio::DB::Primer3::Primer3Record | |
| 82 80 | 
             
                    @total_caps = capital_count
         | 
| 83 81 | 
             
                  end
         | 
| 84 82 | 
             
                end
         | 
| 85 | 
            -
                 | 
| 83 | 
            +
                
         | 
| 86 84 | 
             
                @best_pair
         | 
| 87 85 | 
             
              end
         | 
| 88 86 |  | 
| @@ -107,12 +105,13 @@ class Bio::DB::Primer3::Primer3Record | |
| 107 105 |  | 
| 108 106 | 
             
              def score
         | 
| 109 107 | 
             
                best_pair
         | 
| 108 | 
            +
                total_caps = "#{best_pair.left.sequence}#{best_pair.right.sequence}".scan(/[A-Z]/).length
         | 
| 110 109 | 
             
            #    puts "score"
         | 
| 111 110 | 
             
             #   puts self.inspect
         | 
| 112 111 | 
             
                ret = 0
         | 
| 113 112 | 
             
                ret += @scores[type]
         | 
| 114 113 | 
             
                ret += @scores[:exon] if exon?
         | 
| 115 | 
            -
                ret -=  | 
| 114 | 
            +
                ret -= total_caps * 10  
         | 
| 116 115 | 
             
                ret -= product_length
         | 
| 117 116 | 
             
                ret
         | 
| 118 117 | 
             
              end
         | 
| @@ -123,71 +122,21 @@ class Bio::DB::Primer3::Primer3Record | |
| 123 122 |  | 
| 124 123 | 
             
               def left_primer_snp(snp)
         | 
| 125 124 | 
             
                  tmp_primer = String.new(left_primer)
         | 
| 126 | 
            -
                  #if self.orientation == :forward
         | 
| 127 | 
            -
                  #  base_original = snp.original 
         | 
| 128 | 
            -
                  #  base_snp = snp.snp
         | 
| 129 | 
            -
                  #elsif self.orientation == :reverse
         | 
| 130 | 
            -
                  #  base_original = reverse_complement_string(snp.original )
         | 
| 131 | 
            -
                  #  base_snp = reverse_complement_string(snp.snp)
         | 
| 132 | 
            -
                  #else
         | 
| 133 | 
            -
                  #  raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
         | 
| 134 | 
            -
                  #end
         | 
| 135 | 
            -
             | 
| 136 | 
            -
                  # puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
         | 
| 137 | 
            -
                  #if tmp_primer[-1] == base_original
         | 
| 138 | 
            -
                  #  tmp_primer[-1] = base_snp
         | 
| 139 | 
            -
                  #elsif tmp_primer[-1] == base_snp
         | 
| 140 | 
            -
                  #  tmp_primer[-1] = base_original  
         | 
| 141 | 
            -
                  #else
         | 
| 142 | 
            -
                  #  raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
         | 
| 143 | 
            -
                  #end
         | 
| 144 | 
            -
                  #puts "tmp_primer: #{tmp_primer}"
         | 
| 145 125 | 
             
                  return tmp_primer
         | 
| 146 126 | 
             
                end
         | 
| 147 127 |  | 
| 148 128 | 
             
            end
         | 
| 149 129 |  | 
| 150 | 
            -
            arm_selection_functions = Hash.new;
         | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
            arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
         | 
| 154 | 
            -
              ret = contig_name[0,2]       
         | 
| 155 | 
            -
              return ret
         | 
| 156 | 
            -
            end
         | 
| 157 | 
            -
             | 
| 158 | 
            -
            #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
         | 
| 159 | 
            -
            #Or the first two characters in the contig name, to deal with 
         | 
| 160 | 
            -
            #pseudomolecules that start with headers like: "1A"
         | 
| 161 | 
            -
            #And with the cases when 3B is named with the prefix: v443
         | 
| 162 | 
            -
            arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
         | 
| 163 | 
            -
              
         | 
| 164 | 
            -
              arr = contig_name.split('_')
         | 
| 165 | 
            -
              ret = "U"
         | 
| 166 | 
            -
              ret = arr[2][0,2] if arr.size >= 3
         | 
| 167 | 
            -
              ret = "3B" if arr.size == 2 and arr[0] == "v443"
         | 
| 168 | 
            -
              ret = arr[0][0,2] if arr.size == 1   
         | 
| 169 | 
            -
              return ret
         | 
| 170 | 
            -
            end
         | 
| 171 | 
            -
             | 
| 172 | 
            -
            arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
         | 
| 173 | 
            -
              ret = contig_name.split(':')[0].split("_")[1];       
         | 
| 174 | 
            -
              return ret
         | 
| 175 | 
            -
            end
         | 
| 176 | 
            -
             | 
| 177 | 
            -
            arm_selection_functions[:scaffold] = lambda do | contig_name |
         | 
| 178 | 
            -
              ret = contig_name;       
         | 
| 179 | 
            -
              return ret
         | 
| 180 | 
            -
            end
         | 
| 181 | 
            -
             | 
| 182 130 | 
             
            markers = nil
         | 
| 183 131 |  | 
| 184 132 | 
             
            options = {}
         | 
| 133 | 
            +
            options[:aligner] = :blast
         | 
| 185 134 | 
             
            options[:model] = "est2genome"
         | 
| 186 135 | 
             
            options[:min_identity] = 90
         | 
| 187 | 
            -
            options[:extract_found_contigs] =  | 
| 188 | 
            -
            options[:arm_selection] =  | 
| 136 | 
            +
            options[:extract_found_contigs] = true
         | 
| 137 | 
            +
            options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
         | 
| 189 138 | 
             
            options[:genomes_count] = 3
         | 
| 190 | 
            -
             | 
| 139 | 
            +
            options[:variation_free_region] =0 
         | 
| 191 140 |  | 
| 192 141 | 
             
            options[:primer_3_preferences] = {
         | 
| 193 142 | 
             
                  :primer_product_size_range => "50-150" ,
         | 
| @@ -200,11 +149,14 @@ options[:primer_3_preferences] = { | |
| 200 149 | 
             
              }
         | 
| 201 150 |  | 
| 202 151 |  | 
| 152 | 
            +
            options[:database]  = false 
         | 
| 153 | 
            +
             | 
| 154 | 
            +
             | 
| 203 155 | 
             
            OptionParser.new do |opts|
         | 
| 204 156 |  | 
| 205 | 
            -
              opts.banner = "Usage:  | 
| 157 | 
            +
              opts.banner = "Usage: polymarker_deletions.rb [options]"
         | 
| 206 158 |  | 
| 207 | 
            -
              opts.on("- | 
| 159 | 
            +
              opts.on("-m", "--sequences FASTA", "Sequence of the region to search") do |o|
         | 
| 208 160 | 
             
                options[:sequences] = o
         | 
| 209 161 | 
             
              end
         | 
| 210 162 | 
             
              opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
         | 
| @@ -221,6 +173,14 @@ OptionParser.new do |opts| | |
| 221 173 | 
             
              opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
         | 
| 222 174 | 
             
                options[:extract_found_contigs] = true
         | 
| 223 175 | 
             
              end
         | 
| 176 | 
            +
             | 
| 177 | 
            +
              opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
         | 
| 178 | 
            +
                options[:database] = o
         | 
| 179 | 
            +
              end
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
         | 
| 182 | 
            +
                options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
         | 
| 183 | 
            +
              end
         | 
| 224 184 |  | 
| 225 185 | 
             
            end.parse!
         | 
| 226 186 | 
             
            #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
         | 
| @@ -231,11 +191,14 @@ throw raise Exception.new(), "Fasta file with sequences has to be provided" unle | |
| 231 191 | 
             
            output_folder = options[:output] if options[:output]
         | 
| 232 192 | 
             
            throw raise Exception.new(), "An output directory has to be provided" unless output_folder
         | 
| 233 193 | 
             
            model=options[:model] 
         | 
| 194 | 
            +
             | 
| 195 | 
            +
            options[:database] = options[:reference] unless  options[:database] 
         | 
| 196 | 
            +
             | 
| 234 197 | 
             
            Dir.mkdir(output_folder)
         | 
| 235 198 | 
             
            min_identity= options[:min_identity]
         | 
| 236 199 |  | 
| 237 200 | 
             
            exonerate_file="#{output_folder}/exonerate_tmp.tab"
         | 
| 238 | 
            -
             | 
| 201 | 
            +
             | 
| 239 202 | 
             
            primer_3_input="#{output_folder}/primer_3_input_temp"
         | 
| 240 203 | 
             
            primer_3_output="#{output_folder}/primer_3_output_temp"
         | 
| 241 204 | 
             
            exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
         | 
| @@ -248,14 +211,8 @@ fasta_file.load_fai_entries | |
| 248 211 | 
             
            original_name="A"
         | 
| 249 212 | 
             
            snp_in="B"
         | 
| 250 213 |  | 
| 251 | 
            -
             | 
| 214 | 
            +
            arm_selection = options[:arm_selection]
         | 
| 252 215 |  | 
| 253 | 
            -
            unless arm_selection
         | 
| 254 | 
            -
               arm_selection = lambda do | contig_name |
         | 
| 255 | 
            -
                  ret = contig_name[0,3]       
         | 
| 256 | 
            -
                  return ret
         | 
| 257 | 
            -
                end
         | 
| 258 | 
            -
            end
         | 
| 259 216 | 
             
            begin
         | 
| 260 217 | 
             
            log "Reading exons"
         | 
| 261 218 | 
             
            exons = Array.new
         | 
| @@ -279,22 +236,28 @@ end | |
| 279 236 | 
             
            log "Searching markers in genome"
         | 
| 280 237 | 
             
            found_contigs = Set.new
         | 
| 281 238 | 
             
            exo_f = File.open(exonerate_file, "w")
         | 
| 282 | 
            -
             | 
| 283 | 
            -
             | 
| 284 | 
            -
             | 
| 239 | 
            +
             | 
| 240 | 
            +
            def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
         | 
| 241 | 
            +
              if aln.identity > min_identity
         | 
| 285 242 | 
             
                exo_f.puts aln.line
         | 
| 286 243 | 
             
                unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file. 
         | 
| 287 244 | 
             
                  found_contigs.add(aln.target_id)
         | 
| 288 245 | 
             
                  entry = fasta_file.index.region_for_entry(aln.target_id)
         | 
| 289 246 | 
             
                  raise ExonerateException.new,  "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
         | 
| 290 | 
            -
             | 
| 291 | 
            -
                  seq = fasta_file.fetch_sequence(region)
         | 
| 292 | 
            -
                  contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
         | 
| 247 | 
            +
             | 
| 293 248 | 
             
                end
         | 
| 294 249 | 
             
              end  
         | 
| 295 250 | 
             
            end
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            Bio::DB::Blast.align({:query=>sequences, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
         | 
| 253 | 
            +
              do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
         | 
| 254 | 
            +
            end if options[:aligner] == :blast
         | 
| 255 | 
            +
             | 
| 256 | 
            +
            Bio::DB::Exonerate.align({:query=>sequences, :target=>target, :model=>model}) do |aln|
         | 
| 257 | 
            +
              do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
         | 
| 258 | 
            +
            end if options[:aligner] == :exonerate
         | 
| 259 | 
            +
             | 
| 296 260 | 
             
            exo_f.close() 
         | 
| 297 | 
            -
            contigs_f.close() if options[:extract_found_contigs]
         | 
| 298 261 |  | 
| 299 262 |  | 
| 300 263 |  | 
| @@ -303,18 +266,24 @@ log "Reading best alignment on each chromosome" | |
| 303 266 | 
             
            container= Bio::PolyploidTools::ExonContainer.new
         | 
| 304 267 | 
             
            container.flanking_size=options[:flanking_size] 
         | 
| 305 268 | 
             
            container.gene_models(sequences)
         | 
| 306 | 
            -
            container.chromosomes( | 
| 269 | 
            +
            container.chromosomes(reference)
         | 
| 307 270 | 
             
            container.add_parental({:name=>"A"})
         | 
| 308 271 | 
             
            container.add_parental({:name=>"B"})
         | 
| 309 272 | 
             
            exons.each do |exon|
         | 
| 310 273 | 
             
              exon.container = container
         | 
| 311 | 
            -
              exon.flanking_size =  | 
| 274 | 
            +
              exon.flanking_size = 200
         | 
| 312 275 | 
             
              exon.variation_free_region = options[:variation_free_region]
         | 
| 313 | 
            -
            # | 
| 276 | 
            +
              #puts exon.inspect
         | 
| 314 277 | 
             
              container.add_snp(exon)
         | 
| 315 278 |  | 
| 316 279 | 
             
            end
         | 
| 317 | 
            -
            container.add_alignments( | 
| 280 | 
            +
            container.add_alignments(
         | 
| 281 | 
            +
              {:exonerate_file=>exonerate_file, 
         | 
| 282 | 
            +
              :arm_selection=>options[:arm_selection] , 
         | 
| 283 | 
            +
              :min_identity=>min_identity})
         | 
| 284 | 
            +
             | 
| 285 | 
            +
             | 
| 286 | 
            +
             | 
| 318 287 |  | 
| 319 288 | 
             
            #4.1 generating primer3 file
         | 
| 320 289 | 
             
            log "Running primer3"
         | 
| @@ -348,18 +317,14 @@ exons.each do |snp| | |
| 348 317 | 
             
            end
         | 
| 349 318 |  | 
| 350 319 | 
             
            kasp_container.add_primers_file(primer_3_output) if added_exons > 0
         | 
| 351 | 
            -
            header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
         | 
| 320 | 
            +
            header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,blast_hits"
         | 
| 352 321 | 
             
            File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
         | 
| 353 322 |  | 
| 354 | 
            -
             | 
| 355 | 
            -
               | 
| 356 | 
            -
               | 
| 357 | 
            -
             | 
| 358 | 
            -
             | 
| 359 | 
            -
              out_fasta_products = "#{output_folder}/#{name}.fa"
         | 
| 360 | 
            -
              File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
         | 
| 361 | 
            -
             | 
| 362 | 
            -
             | 
| 323 | 
            +
            out_fasta_products = "#{output_folder}/products.fa"
         | 
| 324 | 
            +
            File.open(out_fasta_products, 'w') do  |f|
         | 
| 325 | 
            +
              kasp_container.snp_hash.each_pair do |name, kaspSNP|  
         | 
| 326 | 
            +
                f.write(kaspSNP.realigned_primers_fasta) 
         | 
| 327 | 
            +
              end
         | 
| 363 328 | 
             
            end
         | 
| 364 329 |  | 
| 365 330 | 
             
            File.open(output_to_order, "w") { |io|  io.write(kasp_container.print_primers_with_tails()) }
         | 
    
        data/bio-polyploid-tools.gemspec
    CHANGED
    
    | @@ -2,27 +2,25 @@ | |
| 2 2 | 
             
            # DO NOT EDIT THIS FILE DIRECTLY
         | 
| 3 3 | 
             
            # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
         | 
| 4 4 | 
             
            # -*- encoding: utf-8 -*-
         | 
| 5 | 
            -
            # stub: bio-polyploid-tools 0. | 
| 5 | 
            +
            # stub: bio-polyploid-tools 1.0.0 ruby lib
         | 
| 6 6 |  | 
| 7 7 | 
             
            Gem::Specification.new do |s|
         | 
| 8 8 | 
             
              s.name = "bio-polyploid-tools".freeze
         | 
| 9 | 
            -
              s.version = "0. | 
| 9 | 
            +
              s.version = "1.0.0"
         | 
| 10 10 |  | 
| 11 11 | 
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
         | 
| 12 12 | 
             
              s.require_paths = ["lib".freeze]
         | 
| 13 13 | 
             
              s.authors = ["Ricardo H.  Ramirez-Gonzalez".freeze]
         | 
| 14 | 
            -
              s.date = "2019- | 
| 14 | 
            +
              s.date = "2019-07-05"
         | 
| 15 15 | 
             
              s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
         | 
| 16 16 | 
             
              s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
         | 
| 17 | 
            -
              s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, " | 
| 17 | 
            +
              s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "polymarker_deletions.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
         | 
| 18 18 | 
             
              s.extra_rdoc_files = [
         | 
| 19 | 
            -
                "README",
         | 
| 20 19 | 
             
                "README.md"
         | 
| 21 20 | 
             
              ]
         | 
| 22 21 | 
             
              s.files = [
         | 
| 23 22 | 
             
                ".travis.yml",
         | 
| 24 23 | 
             
                "Gemfile",
         | 
| 25 | 
            -
                "README",
         | 
| 26 24 | 
             
                "README.md",
         | 
| 27 25 | 
             
                "Rakefile",
         | 
| 28 26 | 
             
                "VERSION",
         | 
| @@ -34,7 +32,6 @@ Gem::Specification.new do |s| | |
| 34 32 | 
             
                "bin/filter_exonerate_by_identity.rb",
         | 
| 35 33 | 
             
                "bin/find_best_blat_hit.rb",
         | 
| 36 34 | 
             
                "bin/find_best_exonerate.rb",
         | 
| 37 | 
            -
                "bin/find_homoeologue_variations.rb",
         | 
| 38 35 | 
             
                "bin/get_longest_hsp_blastx_triads.rb",
         | 
| 39 36 | 
             
                "bin/hexaploid_primers.rb",
         | 
| 40 37 | 
             
                "bin/homokaryot_primers.rb",
         | 
| @@ -46,6 +43,7 @@ Gem::Specification.new do |s| | |
| 46 43 | 
             
                "bin/mask_triads.rb",
         | 
| 47 44 | 
             
                "bin/polymarker.rb",
         | 
| 48 45 | 
             
                "bin/polymarker_capillary.rb",
         | 
| 46 | 
            +
                "bin/polymarker_deletions.rb",
         | 
| 49 47 | 
             
                "bin/snp_position_to_polymarker.rb",
         | 
| 50 48 | 
             
                "bin/snps_between_bams.rb",
         | 
| 51 49 | 
             
                "bin/tag_stats.rb",
         | 
| @@ -76,7 +76,6 @@ module Bio::PolyploidTools | |
| 76 76 | 
             
                end
         | 
| 77 77 |  | 
| 78 78 | 
             
                def add_snp(snp)
         | 
| 79 | 
            -
                  #TODO: add to the snp the maximum number of hits? 
         | 
| 80 79 | 
             
                  snp.max_hits = self.max_hits
         | 
| 81 80 | 
             
                  @snp_map[snp.gene] = Array.new unless   @snp_map[snp.gene] 
         | 
| 82 81 | 
             
                  @snp_map[snp.gene] << snp
         | 
| @@ -141,6 +140,7 @@ module Bio::PolyploidTools | |
| 141 140 | 
             
                      begin 
         | 
| 142 141 | 
             
                        file.puts snp.aligned_sequences_fasta
         | 
| 143 142 | 
             
                      rescue Exception=>e
         | 
| 143 | 
            +
                        #puts snp.inspect
         | 
| 144 144 | 
             
                        @missing_exons << snp.to_s
         | 
| 145 145 | 
             
                        $stderr.puts "print_fasta_snp_exones:" + snp.to_s + ":" + e.to_s
         | 
| 146 146 | 
             
                        $stderr.puts "Local position: #{snp.local_position}"
         | 
| @@ -160,8 +160,8 @@ module Bio::PolyploidTools | |
| 160 160 | 
             
                      begin 
         | 
| 161 161 | 
             
                        primer_3_min_seq_length
         | 
| 162 162 | 
             
                        string = snp.primer_3_string( snp.chromosome, parental )
         | 
| 163 | 
            -
                        #TODO: add tan error to the SNP this snp has more than max_hits.  | 
| 164 | 
            -
                        # | 
| 163 | 
            +
                        #TODO: add tan error to the SNP this snp has more than max_hits. 
         | 
| 164 | 
            +
                        #Or maybe inside the SNP file. 
         | 
| 165 165 | 
             
                        if string.size > 0
         | 
| 166 166 | 
             
                          file.puts string
         | 
| 167 167 | 
             
                          added += 1
         | 
| @@ -55,11 +55,15 @@ module Bio::PolyploidTools | |
| 55 55 |  | 
| 56 56 | 
             
                 def mask_aligned_chromosomal_snp(chromosome)
         | 
| 57 57 | 
             
                  return nil if  aligned_sequences.values.size == 0
         | 
| 58 | 
            -
                  names =  | 
| 58 | 
            +
                  names = aligned_sequences.keys
         | 
| 59 | 
            +
                  parentals =  parental_sequences.keys
         | 
| 60 | 
            +
                  names = names - parentals
         | 
| 61 | 
            +
             | 
| 62 | 
            +
             | 
| 63 | 
            +
                  best_target = get_target_sequence(names, chromosome)
         | 
| 64 | 
            +
                  masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
         | 
| 65 | 
            +
                  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[best_target]
         | 
| 59 66 |  | 
| 60 | 
            -
                  masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
         | 
| 61 | 
            -
               
         | 
| 62 | 
            -
                  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[chromosome]
         | 
| 63 67 | 
             
                  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
         | 
| 64 68 | 
             
                  i = 0
         | 
| 65 69 | 
             
                  while i < masked_snps.size
         | 
| @@ -105,26 +109,23 @@ module Bio::PolyploidTools | |
| 105 109 |  | 
| 106 110 | 
             
                    aligned_sequences.each_pair do |name, val|  
         | 
| 107 111 | 
             
                      has_del = true if val[i] == '-'
         | 
| 108 | 
            -
                      print "#{val[i]}\t"
         | 
| 112 | 
            +
                      #print "#{val[i]}\t"
         | 
| 109 113 | 
             
                    end
         | 
| 110 114 | 
             
                    count += 1 if has_del
         | 
| 111 | 
            -
                    print "#{count}\n"
         | 
| 115 | 
            +
                    #print "#{count}\n"
         | 
| 112 116 | 
             
                  end
         | 
| 113 117 | 
             
                  return count
         | 
| 114 118 | 
             
                end
         | 
| 115 119 |  | 
| 116 120 | 
             
                def primer_region(target_chromosome, parental_chr )
         | 
| 117 121 | 
             
                  chromosome_seq = aligned_sequences[target_chromosome]
         | 
| 118 | 
            -
                   | 
| 119 | 
            -
                   | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
                    chromosome_seq = surrounding_exon_sequences[target_chromosome]
         | 
| 124 | 
            -
             | 
| 125 | 
            -
                  end
         | 
| 122 | 
            +
                  names = aligned_sequences.keys
         | 
| 123 | 
            +
                  target_chromosome = get_target_sequence(names, target_chromosome)
         | 
| 124 | 
            +
                  chromosome_seq = aligned_sequences[target_chromosome]
         | 
| 125 | 
            +
                  chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
         | 
| 126 | 
            +
                  chromosome_seq = "-" * sequence_original.size unless chromosome_seq
         | 
| 126 127 | 
             
                  chromosome_seq = chromosome_seq.downcase
         | 
| 127 | 
            -
             | 
| 128 | 
            +
                  #puts chromosome_seq
         | 
| 128 129 | 
             
                  mask = mask_aligned_chromosomal_snp(target_chromosome)
         | 
| 129 130 |  | 
| 130 131 | 
             
                  pr = PrimerRegion.new
         | 
| @@ -146,7 +147,7 @@ module Bio::PolyploidTools | |
| 146 147 | 
             
                          pr.crhomosome_specific_intron << position_in_region
         | 
| 147 148 | 
             
                        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
         | 
| 148 149 | 
             
                          parental[i] = mask[i]
         | 
| 149 | 
            -
                          pr.chromosome_specific << position_in_region if count_deletions_around(1,target_chromosome) < 3
         | 
| 150 | 
            +
                          pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
         | 
| 150 151 | 
             
                          pr.chromosome_specific_in_mask << i
         | 
| 151 152 | 
             
                        end
         | 
| 152 153 |  | 
| @@ -165,16 +166,15 @@ module Bio::PolyploidTools | |
| 165 166 | 
             
                      position_in_region += 1
         | 
| 166 167 | 
             
                    end #Closes region with bases 
         | 
| 167 168 | 
             
                  end         
         | 
| 168 | 
            -
             | 
| 169 169 | 
             
                  pr.sequence=parental.gsub('-','')
         | 
| 170 170 | 
             
                  pr
         | 
| 171 171 | 
             
                end
         | 
| 172 172 |  | 
| 173 | 
            -
                def  | 
| 174 | 
            -
             | 
| 175 | 
            -
                  left = opts[: | 
| 173 | 
            +
                def return_primer_3_string(opts={})
         | 
| 174 | 
            +
                  #puts "return_primer_3_string #{opts.inspect}"
         | 
| 175 | 
            +
                  left = opts[:left_pos]
         | 
| 176 176 | 
             
                  right = opts[:right_pos]
         | 
| 177 | 
            -
                  sequence =  opts[:sequence]
         | 
| 177 | 
            +
                  sequence =  opts[:sequence].clone
         | 
| 178 178 | 
             
                  orientation = "forward"
         | 
| 179 179 | 
             
                  if opts[:right_pos]
         | 
| 180 180 | 
             
                    orientation = "forward"
         | 
| @@ -201,7 +201,7 @@ module Bio::PolyploidTools | |
| 201 201 |  | 
| 202 202 | 
             
                  #In case that we don't have a right primer, we do both orientations
         | 
| 203 203 | 
             
                  unless opts[:right_pos]
         | 
| 204 | 
            -
                    sequence =  opts[:sequence]    
         | 
| 204 | 
            +
                    sequence =  opts[:sequence].clone    
         | 
| 205 205 | 
             
                    left = sequence.size - left - 1
         | 
| 206 206 | 
             
                    orientation = "reverse"
         | 
| 207 207 | 
             
                    sequence = reverse_complement_string(sequence)
         | 
| @@ -223,7 +223,9 @@ module Bio::PolyploidTools | |
| 223 223 | 
             
                end
         | 
| 224 224 |  | 
| 225 225 | 
             
                def primer_3_all_strings(target_chromosome, parental) 
         | 
| 226 | 
            +
                  #puts "primer_3_all_strings: #{target_chromosome} #{parental}"
         | 
| 226 227 | 
             
                  pr = primer_region(target_chromosome, parental )
         | 
| 228 | 
            +
                  #puts pr.inspect
         | 
| 227 229 | 
             
                  primer_3_propertes = Array.new
         | 
| 228 230 |  | 
| 229 231 | 
             
                  seq_original = String.new(pr.sequence)
         | 
| @@ -236,24 +238,28 @@ module Bio::PolyploidTools | |
| 236 238 | 
             
                    snp_type = "non-homoeologous"
         | 
| 237 239 | 
             
                  end
         | 
| 238 240 |  | 
| 239 | 
            -
                  pr.chromosome_specific. | 
| 240 | 
            -
                    
         | 
| 241 | 
            -
                     | 
| 242 | 
            -
                     | 
| 243 | 
            -
                     | 
| 241 | 
            +
                  pr.chromosome_specific.each_with_index do |pos , i|
         | 
| 242 | 
            +
                    seq_snp =  seq_original.clone
         | 
| 243 | 
            +
                    #original_base = seq_snp[pos]
         | 
| 244 | 
            +
                    #puts "___"
         | 
| 245 | 
            +
                    #puts aligned_sequences.keys.inspect
         | 
| 246 | 
            +
                    #puts target_chromosome
         | 
| 247 | 
            +
                    t_chr =  get_target_sequence(aligned_sequences.keys, target_chromosome)
         | 
| 248 | 
            +
                    other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
         | 
| 244 249 |  | 
| 245 250 | 
             
                    args = {
         | 
| 246 251 | 
             
                      :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}", 
         | 
| 247 252 | 
             
                      :left_pos => pos,  
         | 
| 248 | 
            -
                      :sequence=> | 
| 253 | 
            +
                      :sequence=>seq_snp
         | 
| 249 254 | 
             
                    }
         | 
| 250 255 |  | 
| 251 | 
            -
                    
         | 
| 256 | 
            +
                    seq_snp =  seq_original.clone
         | 
| 252 257 | 
             
                    primer_3_propertes << return_primer_3_string(args)
         | 
| 258 | 
            +
                    
         | 
| 253 259 | 
             
                    args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
         | 
| 254 | 
            -
                    args[:sequence] = seq_snp
         | 
| 255 | 
            -
                    #TODO: Find base from another chromosome
         | 
| 256 260 | 
             
                    seq_snp[pos] =  other_chromosome_base.upcase
         | 
| 261 | 
            +
                    args[:sequence] = seq_snp
         | 
| 262 | 
            +
                    
         | 
| 257 263 |  | 
| 258 264 | 
             
                    primer_3_propertes << return_primer_3_string(args)
         | 
| 259 265 | 
             
                  end
         | 
| @@ -265,7 +271,7 @@ module Bio::PolyploidTools | |
| 265 271 | 
             
                def aligned_sequences
         | 
| 266 272 |  | 
| 267 273 | 
             
                  return @aligned_sequences if @aligned_sequences
         | 
| 268 | 
            -
                  if sequences_to_align.size  | 
| 274 | 
            +
                  if sequences_to_align.size <= 1
         | 
| 269 275 | 
             
                    @aligned_sequences = sequences_to_align
         | 
| 270 276 | 
             
                    return @aligned_sequences
         | 
| 271 277 | 
             
                  end
         | 
| @@ -162,6 +162,7 @@ module Bio::PolyploidTools | |
| 162 162 | 
             
                end
         | 
| 163 163 |  | 
| 164 164 | 
             
                def add_exon(exon, arm, filter_best: true)
         | 
| 165 | 
            +
                  exon_list[arm] = Array.new unless exon_list[arm]
         | 
| 165 166 | 
             
                  if filter_best and exon_list[arm].size > 0
         | 
| 166 167 | 
             
                    current = exon_list[arm].first
         | 
| 167 168 | 
             
                    exon_list[arm] = [exon] if exon.record.score > current.record.score 
         | 
| @@ -558,7 +559,7 @@ module Bio::PolyploidTools | |
| 558 559 | 
             
                def aligned_sequences
         | 
| 559 560 |  | 
| 560 561 | 
             
                  return @aligned_sequences if @aligned_sequences
         | 
| 561 | 
            -
             | 
| 562 | 
            +
                  return Hash.new if sequences_to_align.size == 0
         | 
| 562 563 |  | 
| 563 564 | 
             
                  options = ['--maxiterate', '1000', '--localpair', '--quiet']
         | 
| 564 565 | 
             
                  mafft = Bio::MAFFT.new( "mafft" , options)
         | 
| @@ -756,13 +757,13 @@ module Bio::PolyploidTools | |
| 756 757 | 
             
                  self.exon_list.each do |chromosome, exon_arr| 
         | 
| 757 758 | 
             
                    exon_arr.each do |exon|
         | 
| 758 759 | 
             
                      exon_start_offset = exon.query_region.start - gene_region.start
         | 
| 759 | 
            -
                       | 
| 760 | 
            +
                      flanking_region  = exon.target_flanking_region_from_position(position,flanking_size)
         | 
| 760 761 | 
             
                      #TODO: Padd when the exon goes over the regions... 
         | 
| 761 | 
            -
                      #puts  | 
| 762 | 
            +
                      #puts flanking_region.inspect
         | 
| 762 763 | 
             
                      #Ignoring when the exon is in a gap
         | 
| 763 764 | 
             
                      unless exon.snp_in_gap 
         | 
| 764 | 
            -
                        exon_seq = container.chromosome_sequence( | 
| 765 | 
            -
                        @surrounding_exon_sequences["#{chromosome}_#{ | 
| 765 | 
            +
                        exon_seq = container.chromosome_sequence(flanking_region)
         | 
| 766 | 
            +
                        @surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
         | 
| 766 767 | 
             
                      end
         | 
| 767 768 | 
             
                    end
         | 
| 768 769 | 
             
                  end
         | 
    
        data/lib/bio/db/blast.rb
    CHANGED
    
    | @@ -82,7 +82,7 @@ module Bio::DB::Blast | |
| 82 82 | 
             
            		max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker. 
         | 
| 83 83 | 
             
            		max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
         | 
| 84 84 | 
             
            		cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
         | 
| 85 | 
            -
             | 
| 85 | 
            +
            		#puts cmdline
         | 
| 86 86 | 
             
            		status, stdout, stderr = systemu cmdline
         | 
| 87 87 | 
             
            		if status.exitstatus == 0
         | 
| 88 88 | 
             
            			alns = Array.new unless block_given?
         | 
    
        data/lib/bio/db/primer3.rb
    CHANGED
    
    | @@ -129,12 +129,12 @@ module Bio::DB::Primer3 | |
| 129 129 | 
             
                  @values << snp_type
         | 
| 130 130 | 
             
                  if primer3_line_1 and primer3_line_2
         | 
| 131 131 | 
             
                    #Block that searches both if both pairs have a TM
         | 
| 132 | 
            -
                     | 
| 133 | 
            -
                    primer_2_tm = find_left_primer_temp(primer_2)
         | 
| 134 | 
            -
                    primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation) 
         | 
| 132 | 
            +
                    primer_1    = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation) 
         | 
| 135 133 | 
             
                    primer_1_tm = find_left_primer_temp(primer_1)
         | 
| 136 | 
            -
             | 
| 137 | 
            -
                     | 
| 134 | 
            +
             | 
| 135 | 
            +
                    primer_2    = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
         | 
| 136 | 
            +
                    primer_2_tm = find_left_primer_temp(primer_2)
         | 
| 137 | 
            +
             | 
| 138 138 | 
             
                    if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
         | 
| 139 139 | 
             
                      @values << primer3_line_1.left_primer
         | 
| 140 140 | 
             
                      @values << primer_2
         | 
| @@ -159,7 +159,7 @@ module Bio::DB::Primer3 | |
| 159 159 | 
             
                      @values << primer3_line_2.best_pair.product_size
         | 
| 160 160 | 
             
                    else
         | 
| 161 161 |  | 
| 162 | 
            -
                      first_candidate | 
| 162 | 
            +
                      first_candidate  = find_primer_pair_first
         | 
| 163 163 | 
             
                      second_candidate = find_primer_pair_second
         | 
| 164 164 |  | 
| 165 165 | 
             
                      if first_candidate
         | 
| @@ -183,7 +183,7 @@ module Bio::DB::Primer3 | |
| 183 183 | 
             
                        @values << first_candidate.best_pair.left.tm 
         | 
| 184 184 | 
             
                        @values << primer_2_tm
         | 
| 185 185 | 
             
                        @values << first_candidate.best_pair.right.tm
         | 
| 186 | 
            -
                        @values << "first" 
         | 
| 186 | 
            +
                        @values << "first-" 
         | 
| 187 187 | 
             
                        @values << first_candidate.best_pair.product_size
         | 
| 188 188 | 
             
                      elsif  second_candidate 
         | 
| 189 189 | 
             
                        #puts "B"
         | 
| @@ -195,7 +195,7 @@ module Bio::DB::Primer3 | |
| 195 195 | 
             
                        @values << primer_1_tm
         | 
| 196 196 | 
             
                        @values << second_candidate.best_pair.left.tm
         | 
| 197 197 | 
             
                        @values << second_candidate.best_pair.right.tm
         | 
| 198 | 
            -
                        @values << "second"
         | 
| 198 | 
            +
                        @values << "second-"
         | 
| 199 199 | 
             
                        @values << second_candidate.best_pair.product_size
         | 
| 200 200 | 
             
                      elsif  first_candidate 
         | 
| 201 201 | 
             
                        #puts "C"
         | 
| @@ -207,7 +207,7 @@ module Bio::DB::Primer3 | |
| 207 207 | 
             
                        @values << primer_2_tm
         | 
| 208 208 | 
             
                        @values << first_candidate.best_pair.left.tm
         | 
| 209 209 | 
             
                        @values << first_candidate.best_pair.right.tm
         | 
| 210 | 
            -
                        @values << "first"
         | 
| 210 | 
            +
                        @values << "first/"
         | 
| 211 211 | 
             
                        @values << first_candidate.best_pair.product_size
         | 
| 212 212 | 
             
                      end
         | 
| 213 213 | 
             
                    end
         | 
| @@ -277,7 +277,6 @@ module Bio::DB::Primer3 | |
| 277 277 | 
             
                end
         | 
| 278 278 |  | 
| 279 279 | 
             
                def orientation
         | 
| 280 | 
            -
                  puts "insideOrientation: #{self.values[11]}"
         | 
| 281 280 | 
             
                  return self.values[11] if self.values[11]&& self.values[11] != nil 
         | 
| 282 281 | 
             
                  return 'unknown'
         | 
| 283 282 | 
             
                end
         | 
| @@ -385,7 +384,7 @@ module Bio::DB::Primer3 | |
| 385 384 | 
             
                      @primer3_line_1 = primer3record if not @primer3_line_1  or @primer3_line_1 > primer3record
         | 
| 386 385 | 
             
                    when primer3record.line == @line_2
         | 
| 387 386 | 
             
                      primers_line_2 << primer3record
         | 
| 388 | 
            -
                      @primer3_line_2 = primer3record if not @primer3_line_2 | 
| 387 | 
            +
                      @primer3_line_2 = primer3record if not @primer3_line_2  or @primer3_line_2 > primer3record
         | 
| 389 388 | 
             
                    else
         | 
| 390 389 | 
             
                      raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
         | 
| 391 390 | 
             
                    end
         | 
| @@ -508,9 +507,7 @@ module Bio::DB::Primer3 | |
| 508 507 | 
             
                def left_primer_with_coordinates(coordinates, other_orientation)
         | 
| 509 508 |  | 
| 510 509 | 
             
                  seq = self.sequence_template
         | 
| 511 | 
            -
                   | 
| 512 | 
            -
                  seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
         | 
| 513 | 
            -
             | 
| 510 | 
            +
                  seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation    
         | 
| 514 511 | 
             
                  seq[coordinates[0],coordinates[1]] 
         | 
| 515 512 | 
             
                end
         | 
| 516 513 |  | 
| @@ -807,9 +804,9 @@ module Bio::DB::Primer3 | |
| 807 804 | 
             
                  str = ""
         | 
| 808 805 | 
             
                  snp_hash.each do |k, snp|  
         | 
| 809 806 | 
             
                    if snp.found_primers?
         | 
| 810 | 
            -
                      str << snp.gene << snp.original << "\t" << tail_a << snp.first_primer | 
| 811 | 
            -
                      str << snp.gene << snp.snp      << "\t" << tail_b << snp.second_primer << "\n"
         | 
| 812 | 
            -
                      str << snp.gene                 << "\t" | 
| 807 | 
            +
                      str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer  << "\n"
         | 
| 808 | 
            +
                      str << snp.gene << snp.snp      << "_2nd\t" << tail_b << snp.second_primer << "\n"
         | 
| 809 | 
            +
                      str << snp.gene                 << "_common\t"        << snp.common_primer << "\n"
         | 
| 813 810 | 
             
                    end
         | 
| 814 811 | 
             
                  end
         | 
| 815 812 | 
             
                  return str
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: bio-polyploid-tools
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 1.0.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Ricardo H.  Ramirez-Gonzalez
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2019- | 
| 11 | 
            +
            date: 2019-07-05 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bio
         | 
| @@ -120,7 +120,6 @@ executables: | |
| 120 120 | 
             
            - filter_exonerate_by_identity.rb
         | 
| 121 121 | 
             
            - find_best_blat_hit.rb
         | 
| 122 122 | 
             
            - find_best_exonerate.rb
         | 
| 123 | 
            -
            - find_homoeologue_variations.rb
         | 
| 124 123 | 
             
            - get_longest_hsp_blastx_triads.rb
         | 
| 125 124 | 
             
            - hexaploid_primers.rb
         | 
| 126 125 | 
             
            - homokaryot_primers.rb
         | 
| @@ -132,6 +131,7 @@ executables: | |
| 132 131 | 
             
            - mask_triads.rb
         | 
| 133 132 | 
             
            - polymarker.rb
         | 
| 134 133 | 
             
            - polymarker_capillary.rb
         | 
| 134 | 
            +
            - polymarker_deletions.rb
         | 
| 135 135 | 
             
            - snp_position_to_polymarker.rb
         | 
| 136 136 | 
             
            - snps_between_bams.rb
         | 
| 137 137 | 
             
            - tag_stats.rb
         | 
| @@ -139,12 +139,10 @@ executables: | |
| 139 139 | 
             
            - vcfToPolyMarker.rb
         | 
| 140 140 | 
             
            extensions: []
         | 
| 141 141 | 
             
            extra_rdoc_files:
         | 
| 142 | 
            -
            - README
         | 
| 143 142 | 
             
            - README.md
         | 
| 144 143 | 
             
            files:
         | 
| 145 144 | 
             
            - ".travis.yml"
         | 
| 146 145 | 
             
            - Gemfile
         | 
| 147 | 
            -
            - README
         | 
| 148 146 | 
             
            - README.md
         | 
| 149 147 | 
             
            - Rakefile
         | 
| 150 148 | 
             
            - VERSION
         | 
| @@ -156,7 +154,6 @@ files: | |
| 156 154 | 
             
            - bin/filter_exonerate_by_identity.rb
         | 
| 157 155 | 
             
            - bin/find_best_blat_hit.rb
         | 
| 158 156 | 
             
            - bin/find_best_exonerate.rb
         | 
| 159 | 
            -
            - bin/find_homoeologue_variations.rb
         | 
| 160 157 | 
             
            - bin/get_longest_hsp_blastx_triads.rb
         | 
| 161 158 | 
             
            - bin/hexaploid_primers.rb
         | 
| 162 159 | 
             
            - bin/homokaryot_primers.rb
         | 
| @@ -168,6 +165,7 @@ files: | |
| 168 165 | 
             
            - bin/mask_triads.rb
         | 
| 169 166 | 
             
            - bin/polymarker.rb
         | 
| 170 167 | 
             
            - bin/polymarker_capillary.rb
         | 
| 168 | 
            +
            - bin/polymarker_deletions.rb
         | 
| 171 169 | 
             
            - bin/snp_position_to_polymarker.rb
         | 
| 172 170 | 
             
            - bin/snps_between_bams.rb
         | 
| 173 171 | 
             
            - bin/tag_stats.rb
         | 
    
        data/README
    DELETED
    
    | @@ -1,21 +0,0 @@ | |
| 1 | 
            -
            = bio-polyploid-tools
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            == Introduction
         | 
| 4 | 
            -
            This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible. 
         | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
            == Installation
         | 
| 8 | 
            -
            'gem install bio-polyploid-tools'
         | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
            == Notes
         | 
| 12 | 
            -
             | 
| 13 | 
            -
            * If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored. 
         | 
| 14 | 
            -
             | 
| 15 | 
            -
            BUG: Sometimes the primers are reversed (the first comes second)
         | 
| 16 | 
            -
            BUG: Blocks with NNNs are picked and treated as semi-specific. 
         | 
| 17 | 
            -
            BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1". 
         | 
| 18 | 
            -
            TODO: If reading from a reference file, only get one reference to align when the region is queried several times
         | 
| 19 | 
            -
            TODO: Add a parameter file file to tweak the alignments. 
         | 
| 20 | 
            -
             | 
| 21 | 
            -
             |