RubyGems - bio-polyploid-tools - Versions diffs - 0.7.3 → 0.8.0 - Mend

bio-polyploid-tools 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

checksums.yaml +5 -5
data/.travis.yml +17 -0
data/Gemfile +10 -7
data/README.md +44 -0
data/Rakefile +14 -14
data/VERSION +1 -1
data/bin/bfr.rb +2 -2
data/bin/blast_triads.rb +166 -0
data/bin/blast_triads_promoters.rb +192 -0
data/bin/find_homoeologue_variations.rb +385 -0
data/bin/get_longest_hsp_blastx_triads.rb +66 -0
data/bin/hexaploid_primers.rb +2 -2
data/bin/homokaryot_primers.rb +2 -2
data/bin/mafft_triads.rb +120 -0
data/bin/mafft_triads_promoters.rb +403 -0
data/bin/polymarker.rb +73 -17
data/bin/polymarker_capillary.rb +416 -0
data/bin/snp_position_to_polymarker.rb +5 -3
data/bin/snps_between_bams.rb +0 -29
data/bin/vcfLineToTable.rb +56 -0
data/bio-polyploid-tools.gemspec +74 -32
data/lib/bio/BFRTools.rb +1 -0
data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
data/lib/bio/PolyploidTools/SNP.rb +58 -18
data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
data/lib/bio/db/blast.rb +112 -0
data/lib/bio/db/exonerate.rb +4 -5
data/lib/bio/db/primer3.rb +83 -14
data/test/data/BS00068396_51_blast.tab +4 -0
data/test/data/BS00068396_51_contigs.nhr +0 -0
data/test/data/BS00068396_51_contigs.nin +0 -0
data/test/data/BS00068396_51_contigs.nsq +0 -0
data/test/data/BS00068396_51_for_polymarker.fa +1 -0
data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
data/test/data/S22380157.vcf +67 -0
data/test/data/S58861868/LIB1716.bam +0 -0
data/test/data/S58861868/LIB1716.sam +651 -0
data/test/data/S58861868/LIB1719.bam +0 -0
data/test/data/S58861868/LIB1719.sam +805 -0
data/test/data/S58861868/LIB1721.bam +0 -0
data/test/data/S58861868/LIB1721.sam +1790 -0
data/test/data/S58861868/LIB1722.bam +0 -0
data/test/data/S58861868/LIB1722.sam +1271 -0
data/test/data/S58861868/S58861868.fa +16 -0
data/test/data/S58861868/S58861868.fa.fai +1 -0
data/test/data/S58861868/S58861868.vcf +76 -0
data/test/data/S58861868/header.txt +9 -0
data/test/data/S58861868/merged.bam +0 -0
data/test/data/S58861868/merged_reheader.bam +0 -0
data/test/data/S58861868/merged_reheader.bam.bai +0 -0
data/test/data/bfr_out_test.csv +5 -5
data/test/data/headerMergeed.txt +9 -0
data/test/data/headerS2238015 +1 -0
data/test/data/mergedLibs.bam +0 -0
data/test/data/mergedLibsReheader.bam +0 -0
data/test/data/mergedLibsSorted.bam +0 -0
data/test/data/mergedLibsSorted.bam.bai +0 -0
data/test/test_bfr.rb +26 -34
data/test/test_blast.rb +47 -0
data/test/test_exonearate.rb +4 -9
data/test/test_snp_parsing.rb +42 -22
metadata +81 -20
data/Gemfile.lock +0 -67

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: adcaebc142757300631df98a5f672cae5dc76cb7
-  data.tar.gz: 05e3acefabb42d5ea3c84236f4b79ebf338c6306
+SHA256:
+  metadata.gz: '08be9c740b45561cf8de023e6ca63bb6be4ae63e6f89bd1eb4b149da9cf47334'
+  data.tar.gz: 94aa0d62f15ad380a35fe2c4bbcd870f2cb984f04c76aa825084b9ab97431d8b
 SHA512:
-  metadata.gz: c1fa0e9e177bad1633fc5c4d3fd3f3fe2c2b4fc9915dfeeea943d7559d8850061f9437f703a7fa462163eb7f52431b747216d540ed73e64559ca20cfc7fe471b
-  data.tar.gz: 4db9d4d8b404378b39af64978d82671d94b5babc42ea4542c9f76b880793c23a5283945dffc4d76d2ba68761c3d30e1b434f867513e8a71c3f60f61b3885cf58
+  metadata.gz: 6f15740cb929555b6627eac53dc12b28d75c10709e271a23aef06935c11fb83bf99479afe68d8db5e5bac8d9ecc06c62ac8f17fc4e3066e8ae6de1094b3fb042
+  data.tar.gz: 7a8cee46ca1ecf4a6ed71b497005f32f851067667c59e36a6b91bea3e8153c9beee4a765866f0849ae0fe83378cc241372fde6368f6fddc11e426a0a12415c36

data/.travis.yml ADDED

@@ -0,0 +1,17 @@
+language: ruby
+sudo: false
+addons:
+  apt:
+    packages:
+    - zlib1g-dev
+    - libncurses5-dev
+    - libtinfo-dev
+    - exonerate
+rvm:
+  - 2.1.10
+  - 2.2.5
+  - 2.3.5
+  - 2.4.2
+before_install:
+  - export RUBYOPT="-W1"

data/Gemfile CHANGED

@@ -3,15 +3,18 @@ source "http://rubygems.org"
 # Example:
 #   gem "activesupport", ">= 2.3.5"
-gem "bio", ">= 1.4.3"
-gem "bio-samtools", ">= 2.0.4"
-gem "rake"
-gem "jeweler"
+gem "bio", ">= 1.5.1"
+gem "bio-samtools", ">= 2.6.2"
+#gem "rake"
 gem "systemu", ">=2.5.2"
 group :development do
-#  gem "shoulda", ">= 0"
-#  gem "shoulda-context"
-#  gem	 "shoulda-matchers"
+	gem "shoulda", ">= 2.10"
+	gem 'test-unit'
+	if RUBY_VERSION.start_with?("2.1") or RUBY_VERSION.start_with?("2.2") or RUBY_VERSION.start_with?("2.0")
+		gem "jeweler", "= 2.0.1"
+	else
+		gem "juwelier"
+	end
 end

data/README.md CHANGED

@@ -52,6 +52,43 @@ Usage: polymarker.rb [options]
     -P, --primers_to_order			 If present, saves a file named primers_to_order which contains the KASP tails
 ```
+## Input formats
+The following formats are used to define the marker sequences:
+### Marker list
+If the option ```--marker_list FILE``` is used, the SNP and the flanking sequence is included in the file. The format contains 3 columns (the order is important):
+* **snp_name** The ID of the marker. Must be unique.
+* **target chromosome** for the specific primers. Must be in line with the chromosome selection critieria.
+* **sequence** The sequence flanking the SNP with the SNP highligted on square brackets (```[]```) and the two alleles separated by a forward slash (```/```).
+#### Example:
+```
+BS00068396_51,2A,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
+```
+### SNP list
+If the flanking sequence is unknow, but the position on a reference is available,  the option ```--snp_list``` can be used and the FASTA file with the reference sequence must be provided with the option ```--reference```. This is to allow the use of a different assembly or set of contigs used for the discovery of the SNPs that are different to the reference given in the option ```--contigs```. The format contains the following positional columns:
+* **scaffold** The sacffold where the SNP is.
+* **reference allele** The base in the reference (may or may not be the same as in the reference file.
+* **position** Position of the SNP. The first base in the scaffold is base 1.
+* **alternative allele** The base in the alternative allele.
+* **target chromosome** for the specific primers. Must be in line with the chromosome selection critieria.
+####Example
+```
+IWGSC_CSS_1AL_scaff_110,C,519,A,2A
+```
+This file format can be used with ```snp_positions_to_polymarker.rb``` to produce the input for the option```--marker_list```.
 ###Custom reference sequences.
 By default, the contigs and pseudomolecules from [ensembl](ftp://ftp.ensemblgenomes.org/pub/release-25/plants/fasta/triticum_aestivum/dna/Triticum_aestivum.IWGSC2.25.dna.genome.fa.gz
 ) are used. However, it is possible to use a custom reference. To define the chromosome where each contig belongs the argument ```arm_selection``` is used.  The defailt uses ids like: ```IWGSC_CSS_1AL_scaff_110```, where the third field, separated by underscores is used. A simple way to add costum references is to rename the fasta file to follow that convention. Another way is to use the option ```--arm_selection arm_selection_first_two```, where only the first two characters in each contig is used as identifier, useful when pseudomolecules are named after the chromosomes (ie: ">1A" in the fasta file).
@@ -71,6 +108,13 @@ end
 The function should return a 2 character string, when the first is the chromosome number and the second the chromosome group. The symbol in the hash is the name to be used in the argument ```--arm_selection```.  If you want your parser to be added to the distribution, feel free to fork and make a pull request.
+##Using blast
+To use blast instead of exonerate, use the following command:
+```
+./bin/polymarker.rb --contigs test/data/BS00068396_51_contigs.fa --marker_list test/data/BS00068396_51_for_polymarker.fa  --aligner blast  -a arm_selection_first_two
+```
 ##Release Notes

data/Rakefile CHANGED

@@ -12,16 +12,25 @@ begin
 end
 require 'rake'
-require 'jeweler'
-Jeweler::Tasks.new do |gem|
+if RUBY_VERSION.start_with?("2.1") or RUBY_VERSION.start_with?("2.2") or RUBY_VERSION.start_with?("2.0")
+  require 'jeweler'
+  @taskClass = Jeweler
+else
+  require 'juwelier'
+  @taskClass = Juwelier
+end
+@taskClass::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
    gem.name = "bio-polyploid-tools"
   gem.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
   gem.license = "MIT"
   gem.summary = %Q{Tool to work with polyploids, NGS and molecular biology}
-  gem.description = %Q{Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat}
-   gem.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
+  gem.description = %Q{Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat}
+   gem.email = "ricardo.ramirez-gonzalez@jic.ac.uk"
   gem.authors = ["Ricardo H.  Ramirez-Gonzalez"]
   # Include your dependencies below. Runtime dependencies are required when using your gem,
   # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
@@ -29,7 +38,7 @@ Jeweler::Tasks.new do |gem|
   #  gem.add_development_dependency 'rspec', '> 1.2.3'
 #  gem.extensions = "ext/mkrf_conf.rb"
 end
-Jeweler::RubygemsDotOrgTasks.new
+@taskClass::RubygemsDotOrgTasks.new
 require 'rake/testtask'
 Rake::TestTask.new(:test) do |test|
@@ -50,12 +59,3 @@ end
 task :default => :test
-#require 'rdoc/task'
-##RDoc::Task.new do |rdoc|
-#  version = File.exist?('VERSION') ? File.read('VERSION') : ""
-#  rdoc.rdoc_dir = 'rdoc'
-#  rdoc.title = "bio-samtools #{version}"
-#  rdoc.rdoc_files.include('README*')
-#  rdoc.rdoc_files.include('lib/**/*.rb')
-#end

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.7.3
1	+ 0.8.0

data/bin/bfr.rb CHANGED

@@ -50,11 +50,11 @@ OptionParser.new do |opts|
     options[:bulk_2] = o
   end
-  opts.on("-m", "--chunk_size FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
+  opts.on("-m", "--chunk_size FILE", "Number of chunks to divde the SNP calling. Useful to run in a cluster.") do |o|
     options[:chunk_size] = o.to_i
   end
-  opts.on("-n", "--chunk FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
+  opts.on("-n", "--chunk FILE", "Chunk number. Must be less than chunk_size. ") do |o|
     options[:chunk] = o.to_i
   end

data/bin/blast_triads.rb ADDED

@@ -0,0 +1,166 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'bio'
+require 'csv'
+require 'bio-blastxmlparser'
+require 'fileutils'
+require 'tmpdir'
+options = {}
+options[:identity] = 50
+options[:min_bases] = 200
+options[:split_token] = "-"
+options[:tmp_folder]  = Dir.mktmpdir
+options[:program]  = "blastn"
+options[:random_sample] = 0
+OptionParser.new do |opts|
+  opts.banner = "Usage: filter_blat.rb [options]"
+  opts.on("-i", "--identity FLOAT", "Minimum percentage identity") do |o|
+    options[:identity] = o.to_f
+  end
+  opts.on("-c", "--min_bases int", "Minimum alignment length (default 200)") do |o|
+    options[:min_bases] = o.to_i
+  end
+  opts.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
+    options[:triads] = o
+  end
+  opts.on("-f", "--sequences FILE" , "FASTA file containing all the possible sequences. ") do |o|
+    options[:fasta] = o
+  end
+  opts.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
+    options[:split_token] = o
+  end
+  opts.on("-p", "--program blastn|blastp", "The program to use in the alignments. Currntly only supported blastn and blastp") do |o|
+    options[:program] = o
+  end
+  opts.on("-r", "--random_sample INT", "Number of blast to run and keep. If set, only the number of subsets will be run") do |o|
+    options[:random_sample] = o.to_i
+  end
+end.parse!
+def blast_pair_fast(path_a, path_b, out_path, program: "blastn")
+  cmd = "#{program} -query #{path_a} -subject #{path_b} -task #{program} -out #{out_path} -outfmt '5' "
+  #puts cmd
+  executed = system cmd
+  result = []
+  blast_version = nil
+  n = Bio::BlastXMLParser::XmlIterator.new(out_path).to_enum
+  longest = nil
+  max_length = 0
+  max_pident = 0.0
+  max_similarity = 0.0
+  n.each do | iter |
+    iter.each do | hit |
+      align_len = 0
+      identity = 0.0
+      positives = 0.0
+      hit.each do | hsp |
+        align_len += hsp.align_len
+        identity  += hsp.identity
+        positives += hsp.positive if program == "blastp"
+      end
+      if align_len > max_length
+        max_length = align_len
+        max_pident = 100 * identity      / align_len
+        max_similarity = 100 * positives / align_len
+      end
+    end
+  end
+  [max_length, max_pident, max_similarity]
+end
+valid_pairs_A_B = Hash.new
+valid_pairs_A_D = Hash.new
+valid_pairs_B_D = Hash.new
+split_token = options[:split_token]
+sequences = Hash.new
+sequence_count=0
+Bio::FlatFile.open(Bio::FastaFormat, options[:fasta]) do |fasta_file|
+  fasta_file.each do |entry|
+    gene_name = entry.entry_id.split(split_token)[0]
+    sequences[gene_name] = entry unless sequences[gene_name]
+    sequences[gene_name] = entry if entry.length > sequences[gene_name].length
+    sequence_count += 1
+  end
+end
+$stderr.puts "#Loaded #{sequences.length} genes from #{sequence_count} sequences"
+#FileUtils.mkdir_p(options[:tmp_folder])
+$stderr.puts "TMP dir: #{options[:tmp_folder]}"
+a_tmp   = options[:tmp_folder] + "/A.fa"
+b_tmp   = options[:tmp_folder] + "/B.fa"
+d_tmp   = options[:tmp_folder] + "/D.fa"
+out_tmp = options[:tmp_folder] + "/out.blast"
+puts [
+  "group_id" , "query"      , "subject" ,
+  "chr_query", "chr_subject", "aln_type",
+  "length"   , "pident" , "psimilarity"   ].join("\t")
+count_lines = File.foreach(options[:triads]).inject(0) {|c, line| c+1}
+probability =  options[:random_sample] / count_lines.to_f
+probability = 1 if options[:random_sample] == 0
+prng = Random.new
+#puts probability
+CSV.foreach(options[:triads], headers:true ) do |row|
+   a = row['A']
+   b = row['B']
+   d = row['D']
+   triad = row['group_id']
+   save = probability > prng.rand && probability < 1
+   run  = probability == 1 || save
+   next unless run
+   seq_a = sequences[a]
+   seq_b = sequences[b]
+   seq_d = sequences[d]
+   File.open(a_tmp, 'w') {|f| f.write(seq_a) } if seq_a
+   File.open(b_tmp, 'w') {|f| f.write(seq_b) } if seq_b
+   File.open(d_tmp, 'w') {|f| f.write(seq_d) } if seq_d
+   save_folder = "random_sample/#{triad}"
+   if save
+    FileUtils.mkdir_p save_folder
+    FileUtils.cp(a_tmp, save_folder) if seq_a
+    FileUtils.cp(b_tmp, save_folder) if seq_b
+    FileUtils.cp(d_tmp, save_folder) if seq_d
+   end
+   if seq_a and seq_b
+      to_print = [triad, a, b , "A","B","A->B"]
+      to_print << blast_pair_fast(a_tmp, b_tmp, out_tmp, program:options[:program])
+      FileUtils.cp(out_tmp, "#{save_folder}/A_B.xml") if save
+      puts to_print.join("\t")
+   end
+  if seq_a and seq_d
+      to_print = [triad, a, b , "A","D","A->D"]
+      to_print << blast_pair_fast(a_tmp, d_tmp, out_tmp, program:options[:program])
+      puts to_print.join("\t")
+      FileUtils.cp(out_tmp, "#{save_folder}/A_D.xml") if save
+  end
+  if seq_b and seq_d
+      to_print = [triad, a, b , "B","D","B->D"]
+      to_print << blast_pair_fast(b_tmp, d_tmp, out_tmp, program:options[:program])
+      FileUtils.cp(out_tmp, "#{save_folder}/B_D.xml") if save
+      puts to_print.join("\t")
+  end
+end

data/bin/blast_triads_promoters.rb ADDED

@@ -0,0 +1,192 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'bio'
+require 'csv'
+require 'bio-blastxmlparser'
+require 'fileutils'
+require 'tmpdir'
+options = {}
+options[:identity] = 50
+options[:min_bases] = 200
+options[:split_token] = "-"
+options[:tmp_folder]  = Dir.mktmpdir
+options[:program]  = "blastn"
+options[:random_sample] = 0
+options[:cut_promoter_length] = 0
+options[:reverse] = true
+OptionParser.new do |opts|
+  opts.banner = "Usage: filter_blat.rb [options]"
+  opts.on("-i", "--identity FLOAT", "Minimum percentage identity") do |o|
+    options[:identity] = o.to_f
+  end
+  opts.on("-c", "--min_bases int", "Minimum alignment length (default 200)") do |o|
+    options[:min_bases] = o.to_i
+  end
+  opts.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
+    options[:triads] = o
+  end
+  opts.on("-f", "--sequences FILE" , "FASTA file containing all the possible sequences. ") do |o|
+    options[:fasta] = o
+  end
+  opts.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
+    options[:split_token] = o
+  end
+  opts.on("-p", "--program blastn|blastp", "The program to use in the alignments. Currntly only supported blastn and blastp") do |o|
+    options[:program] = o
+  end
+  opts.on("-r", "--random_sample INT", "Number of blast to run and keep. If set, only the number of subsets will be run") do |o|
+    options[:random_sample] = o.to_i
+  end
+  opts.on("-l", "--cut_promoter_length INT", "Bases to consider") do |o|
+    options[:cut_promoter_length] = o.to_i
+  end
+  opts.on("-v", "--reverse T|F", "Reverse the input bases") do |o|
+    if o == 'T'
+      options[:reverse] = true
+    elsif o == 'F'
+      options[:reverse] = false
+    else
+      $stderr.puts "Invalid option for reverse (should be T or F)"
+      exit -1
+    end
+  end
+end.parse!
+def blast_pair_fast(path_a, path_b, out_path, program: "blastn")
+  cmd = "#{program} -query #{path_a} -subject #{path_b} -task #{program} -out #{out_path} -outfmt '5' "
+  #puts cmd
+  executed = system cmd
+  result = []
+  blast_version = nil
+  n = Bio::BlastXMLParser::XmlIterator.new(out_path).to_enum
+  longest = nil
+  max_length = 0
+  max_pident = 0.0
+  n.each do | iter |
+    iter.each do | hit |
+      hit.each do | hsp |
+        if hsp.align_len > max_length
+          max_length = hsp.align_len
+          max_pident = 100 * hsp.identity.to_f / hsp.align_len.to_f
+        end
+      end
+    end
+  end
+  [max_length, max_pident]
+end
+valid_pairs_A_B = Hash.new
+valid_pairs_A_D = Hash.new
+valid_pairs_B_D = Hash.new
+split_token = options[:split_token]
+sequences = Hash.new
+sequence_count=0
+Bio::FlatFile.open(Bio::FastaFormat, options[:fasta]) do |fasta_file|
+  fasta_file.each do |entry|
+    gene_name = entry.entry_id.split(split_token)[0]
+    seq = entry.naseq
+    seq.reverse_complement! if options[:reverse]
+    seq = seq[0,options[:cut_promoter_length]] if options[:cut_promoter_length] > 0
+    entry.data = seq
+    sequences[gene_name] = entry unless sequences[gene_name]
+    sequences[gene_name] = entry if entry.length > sequences[gene_name].length
+    sequence_count += 1
+  end
+end
+$stderr.puts "#Loaded #{sequences.length} genes from #{sequence_count} sequences"
+#FileUtils.mkdir_p(options[:tmp_folder])
+$stderr.puts "TMP dir: #{options[:tmp_folder]}"
+a_tmp   = options[:tmp_folder] + "/A.fa"
+b_tmp   = options[:tmp_folder] + "/B.fa"
+d_tmp   = options[:tmp_folder] + "/D.fa"
+out_tmp = options[:tmp_folder] + "/out.blast"
+puts [
+  "group_id" , "query"      , "subject" ,
+  "chr_query", "chr_subject", "aln_type",
+  "length"   , "pident" , "Ns_query", "Ns_subject", "Ns_total"   ].join("\t")
+count_lines = File.foreach(options[:triads]).inject(0) {|c, line| c+1}
+probability =  options[:random_sample] / count_lines.to_f
+probability = 1 if options[:random_sample] == 0
+prng = Random.new
+#puts probability
+prom_len = options[:cut_promoter_length]
+CSV.foreach(options[:triads], headers:true ) do |row|
+   a = row['A']
+   b = row['B']
+   d = row['D']
+   triad = row['group_id'].to_i
+   triad_folder = triad/100
+   save = probability > prng.rand && probability < 1
+   run  = probability == 1 || save
+   next unless run
+   seq_a = sequences[a]
+   seq_b = sequences[b]
+   seq_d = sequences[d]
+   File.open(a_tmp, 'w') {|f| f.write(seq_a) } if seq_a
+   File.open(b_tmp, 'w') {|f| f.write(seq_b) } if seq_b
+   File.open(d_tmp, 'w') {|f| f.write(seq_d) } if seq_d
+   ns_a = seq_a.seq.count('Nn') if seq_a
+   ns_b = seq_b.seq.count('Nn') if seq_b
+   ns_d = seq_d.seq.count('Nn') if seq_d
+   save_folder = "blast_alignments_#{prom_len}/#{triad_folder}/#{triad}"
+   #if save
+    FileUtils.mkdir_p save_folder
+    FileUtils.cp(a_tmp, save_folder) if seq_a
+    FileUtils.cp(b_tmp, save_folder) if seq_b
+    FileUtils.cp(d_tmp, save_folder) if seq_d
+   #end
+   if seq_a and seq_b
+      to_print = [triad, a, b , "A","B","A->B"]
+      to_print << blast_pair_fast(a_tmp, b_tmp, out_tmp, program:options[:program])
+      to_print << ns_a
+      to_print << ns_b
+      to_print << ns_a + ns_b
+      FileUtils.cp(out_tmp, "#{save_folder}/A_B.xml") #if save
+      puts to_print.join("\t")
+   end
+  if seq_a and seq_d
+      to_print = [triad, a, b , "A","D","A->D"]
+      to_print << blast_pair_fast(a_tmp, d_tmp, out_tmp, program:options[:program])
+      to_print << ns_a
+      to_print << ns_d
+      to_print << ns_a + ns_d
+      FileUtils.cp(out_tmp, "#{save_folder}/A_D.xml") #if save
+      puts to_print.join("\t")
+  end
+  if seq_b and seq_d
+      to_print = [triad, a, b , "B","D","B->D"]
+      to_print << blast_pair_fast(b_tmp, d_tmp, out_tmp, program:options[:program])
+      to_print << ns_b
+      to_print << ns_d
+      to_print << ns_b + ns_d
+      FileUtils.cp(out_tmp, "#{save_folder}/B_D.xml") #if save
+      puts to_print.join("\t")
+  end
+end