RubyGems - seqtrimnext - Versions diffs - 2.0.45 → 2.0.46 - Mend

seqtrimnext 2.0.45 → 2.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

data/History.txt +4 -0
data/Manifest.txt +7 -2
data/bin/filter_database.rb +39 -0
data/bin/join_big_illumina_paired.sh +122 -0
data/bin/seqtrimnext +2 -1
data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +87 -121
data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +4 -1
data/lib/seqtrimnext/classes/graph_stats.rb +7 -2
data/lib/seqtrimnext/classes/seqtrim.rb +3 -2
data/lib/seqtrimnext/classes/sequence_with_action.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +2 -2
data/lib/seqtrimnext/plugins/plugin_adapters.rb +2 -2
data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +165 -0
data/lib/seqtrimnext/plugins/plugin_amplicons.rb +2 -2
data/lib/seqtrimnext/plugins/plugin_contaminants.rb +3 -3
data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_key.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_linker.rb +2 -2
data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_low_quality.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_mids.rb +2 -2
data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +1 -1
data/lib/seqtrimnext/plugins/plugin_short_insert.rb +2 -2
data/lib/seqtrimnext/plugins/plugin_vectors.rb +2 -2
data/lib/seqtrimnext/templates/genomics_illumina.txt +5 -0
data/lib/seqtrimnext/templates/transcriptomics_illumina.txt +8 -0
data/lib/seqtrimnext/utils/hash_stats.rb +2 -1
data/lib/seqtrimnext.rb +1 -1
metadata +14 -5

data/History.txt CHANGED Viewed

@@ -1,3 +1,7 @@
+=== 2.0.46 2012-04-13
+Checkpointing activated. Jobs can be restarted where stopped.
 === 2.0.45 2012-03-05
 Improved LowComplexity plugin to ignore low complexity regions inside low qual regions

data/Manifest.txt CHANGED Viewed

@@ -4,9 +4,11 @@ bin/extract_seqs_from_fasta.rb
 bin/extract_seqs_from_fastq.rb
 bin/fasta2fastq.rb
 bin/fastq2fasta.rb
+bin/filter_database.rb
 bin/gen_qual.rb
 bin/get_seq.rb
 bin/group_by_range.rb
+bin/join_big_illumina_paired.sh
 bin/join_ilumina_paired.rb
 bin/parse_amplicons.rb
 bin/parse_json_results.rb
@@ -18,6 +20,7 @@ bin/seqtrimnext
 bin/split_fastq.rb
 bin/split_ilumina_paired.rb
 bin/split_paired.rb
+History.txt
 lib/seqtrimnext/actions/action_ab_adapter.rb
 lib/seqtrimnext/actions/action_ab_far_adapter.rb
 lib/seqtrimnext/actions/action_ab_left_adapter.rb
@@ -35,13 +38,13 @@ lib/seqtrimnext/actions/action_low_complexity.rb
 lib/seqtrimnext/actions/action_low_high_size.rb
 lib/seqtrimnext/actions/action_low_quality.rb
 lib/seqtrimnext/actions/action_mid.rb
+lib/seqtrimnext/actions/action_middle_adapter.rb
 lib/seqtrimnext/actions/action_multiple_linker.rb
 lib/seqtrimnext/actions/action_paired_reads.rb
 lib/seqtrimnext/actions/action_poly_a.rb
 lib/seqtrimnext/actions/action_poly_t.rb
 lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
 lib/seqtrimnext/actions/action_right_adapter.rb
-lib/seqtrimnext/actions/action_middle_adapter.rb
 lib/seqtrimnext/actions/action_right_primer.rb
 lib/seqtrimnext/actions/action_short_insert.rb
 lib/seqtrimnext/actions/action_unexpected_poly_t.rb
@@ -71,6 +74,7 @@ lib/seqtrimnext/classes/sequence_with_action.rb
 lib/seqtrimnext/plugins/plugin.rb
 lib/seqtrimnext/plugins/plugin_ab_adapters.rb
 lib/seqtrimnext/plugins/plugin_adapters.rb
+lib/seqtrimnext/plugins/plugin_adapters_old.rb
 lib/seqtrimnext/plugins/plugin_amplicons.rb
 lib/seqtrimnext/plugins/plugin_contaminants.rb
 lib/seqtrimnext/plugins/plugin_extract_inserts.rb
@@ -89,9 +93,11 @@ lib/seqtrimnext/plugins/plugin_vectors.rb
 lib/seqtrimnext/templates/amplicons.txt
 lib/seqtrimnext/templates/genomics_454.txt
 lib/seqtrimnext/templates/genomics_454_with_paired.txt
+lib/seqtrimnext/templates/genomics_illumina.txt
 lib/seqtrimnext/templates/low_quality.txt
 lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
 lib/seqtrimnext/templates/transcriptomics_454.txt
+lib/seqtrimnext/templates/transcriptomics_illumina.txt
 lib/seqtrimnext/templates/transcriptomics_plants.txt
 lib/seqtrimnext/utils/extract_samples.rb
 lib/seqtrimnext/utils/fasta2xml.rb
@@ -103,7 +109,6 @@ lib/seqtrimnext/utils/load_qual_in_hash.rb
 lib/seqtrimnext/utils/recover_mid.rb
 lib/seqtrimnext/utils/string_utils.rb
 lib/seqtrimnext.rb
-History.txt
 Manifest.txt
 PostInstall.txt
 Rakefile

data/bin/filter_database.rb ADDED Viewed

@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+require 'scbi_fasta'
+if ARGV.count!=3
+  puts "Usage: #{File.basename($0)} database min_size name_list"
+  exit
+end
+min_size = ARGV[1].to_i
+# read keywords
+keywords=File.read(ARGV[2]).split("\n")
+# convert all to upcase
+keywords.map { |keyword| keyword.upcase!}
+# puts "Search keywords"
+# keywords.each { |keyword| puts keyword}
+fqr=FastaQualFile.new(ARGV[0])
+all=[]
+fqr.each do |n,s,c|
+  keywords.each do |keyword|
+    if s.length<=min_size
+      # all+=c.split(" ")
+      if c.upcase.index(keyword)
+         # puts "[#{s.length.to_s}] - #{n} - #{c}"
+        puts ">#{n} #{c}\n#{s}"
+        break
+      end
+    end
+  end
+end
+# puts all.sort.uniq.reject{|e| e=~/\d/}
+fqr.close

data/bin/join_big_illumina_paired.sh ADDED Viewed

@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# Sort two big illumina files corresponding to paired-end experiment and then join common sequences on different files. Sequences not in common goes to a separate file.
+# cat $1 | awk '{split($0, a, " "); n++; if (n%1==0){printf("%s\t",a[1]);}; printf("%s",$0); if(n%4==0) { printf("\n");} else { printf("\t");} }'
+#
+# exit
+if [ "$#" < 4 ];
+then
+    echo ""
+	echo "Use: $0 file1.fastq file2.fastq base_output_name tmp_dir"
+    echo ""
+	exit
+fi
+base_name=$3
+if [[ -z "$base_name" ]]; then
+    echo "Use a : $base_name doesn't exists"
+    exit -1
+fi
+tmp_dir=$4
+if [[ -z "$4" ]]; then
+    tmp_dir=`pwd`
+fi
+if [[ ! -e "$tmp_dir" ]]; then
+    echo "Tmp dir: $4 doesn't exists"
+    exit -1
+fi
+echo "Using TMPDIR $tmp_dir"
+f1_path=$1
+f2_path=$2
+f1_name=`basename $1`
+f2_name=`basename $2`
+f1_tmp="$tmp_dir/${f1_name}"
+f2_tmp="$tmp_dir/${f2_name}"
+common_names="$tmp_dir/comm.names"
+only_in_1="$tmp_dir/only_in_1.txt"
+only_in_2="$tmp_dir/only_in_2.txt"
+in_both="$tmp_dir/in_both.txt"
+echo "Starting sorting"
+if [[ ! -e "$f1_tmp.sorted" ]]; then
+    echo "Sorting $f1_name"
+    cat $f1_path | awk '{split($0, a, " "); sub(/\/1$/,"\t", a[1]); n++; if (n%4==1){printf("%s",a[1]);}; printf("%s",$0); if(n%4==0) { printf("\n");} else { printf("\t");} }' | sort -T $tmp_dir -k1,1 -t $'\t' > $f1_tmp.sorted &
+fi
+if [[ ! -e "$f2_tmp.sorted" ]]; then
+    echo "Sorting $f2_name"
+    cat $f2_path | awk '{split($0, a, " "); sub(/\/2$/,"\t", a[1]); n++; if (n%4==1){printf("%s",a[1]);}; printf("%s",$0); if(n%4==0) { printf("\n");} else { printf("\t");} }' | sort -T $tmp_dir -k1,1 -t $'\t' > $f2_tmp.sorted &
+fi
+wait
+echo "Starting name extraction"
+if [[ ! -e "$f1_tmp.names" ]]; then
+    echo "Extracting names from $f1_tmp.sorted"
+    # cat $1.sorted | cut -f1 | sed 's/\(.*\)\/1$/\1/' > $1.names &
+    cat $f1_tmp.sorted | cut -f1  > $f1_tmp.names &
+fi
+if [[ ! -e "$f2_tmp.names" ]]; then
+    echo "Extracting names from $f2_tmp.sorted"
+    cat $f2_tmp.sorted | cut -f1  > $f2_tmp.names &
+fi
+wait
+echo "Starting names comparison"
+if [[ ! -e "$common_names" ]]; then
+    echo "Making comm file"
+    # diff $1.names $2.names > names.diff
+    comm $f1_tmp.names $f2_tmp.names > $common_names
+fi
+echo "Starting names extraction"
+# grep '^>' names.diff | cut -d ' ' -f2 | awk '{ printf("%s/2\n",$0) }' > only_in_2.txt &
+# grep '^<' names.diff | cut -d ' ' -f2 | awk '{ printf("%s/1\n",$0) }' > only_in_1.txt &
+grep -P '^[^\t]' $common_names > $only_in_1 &
+grep -P '^\t[^\t]' $common_names |tr -d "\t" > $only_in_2 &
+grep -P '^\t\t[^\t]' $common_names |tr -d "\t" > $in_both &
+wait
+echo "Num seqs only in 1) $f1_name"
+wc -l $only_in_1
+echo "Num seqs only in 2) $f2_name"
+wc -l $only_in_2
+echo "Num seqs in both $f1_name and $f2_name"
+wc -l $in_both
+echo "Starting extracting seqs"
+join -t $'\t' -1 1 -2 1 $only_in_1 $f1_tmp.sorted |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_normal1.fastq &
+join -t $'\t' -1 1 -2 1 $only_in_2 $f2_tmp.sorted |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_normal2.fastq &
+join -t $'\t' -1 1 -2 1 $in_both $f1_tmp.sorted  |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_paired1.fastq &
+join -t $'\t' -1 1 -2 1 $in_both $f2_tmp.sorted  |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_paired2.fastq &
+wait
+rm $f1_tmp.names
+rm $f2_tmp.names
+rm $f1_tmp.sorted
+rm $f2_tmp.sorted
+rm $only_in_2
+rm $only_in_1
+rm $in_both
+rm $common_names

data/bin/seqtrimnext CHANGED Viewed

@@ -57,6 +57,7 @@
 # $: << File.expand_path(ROOT_PATH)
 $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
+$: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib/')
 require 'seqtrimnext'
@@ -272,7 +273,7 @@ optparse = OptionParser.new do |opts|
   end
   options[:skip_report] = false
-  opts.on( '-R', '--no-report', 'Change to no verbose mode. Every sequence will not be written to output log' ) do
+  opts.on( '-R', '--no-report', 'Do not generate final PDF report (gem scbi_seqtrimnext_report required if you want to generate PDF report).' ) do
     options[:skip_report] = true
   end

data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb CHANGED Viewed

@@ -18,29 +18,21 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
     @@params= params
     @@exit = false
+    @@ongoing_stats={}
+    @@ongoing_stats[:sequence_count] = 0
+    @@ongoing_stats[:smallest_sequence_size] = 900000000000000
+    @@ongoing_stats[:biggest_sequence_size] = 0
     @@skip_output=skip_output
     @@chunk_size = chunk_size
-    # puts "CHECKPOINT: #{self.checkpoint}\n"*20
-    checkpoint_exists=File.exists?('scbi_drb_checkpoint')
+    checkpoint_exists=File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
     # @@use_qual = !qual_path.nil? and File.exists?(qual_path)
     @@open_mode='w'
     if checkpoint_exists
       @@open_mode = 'a'
-      if File.exists?(STATS_PATH)
-        # load stats
-        text = File.read(STATS_PATH)
-        # wipe text
-        # text=text.grep(/^\s*[^#]/).to_s
-        # decode json
-        @@full_stats = JSON.parse(text)
-      end
     end
     #open input file
@@ -91,67 +83,38 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
     puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
+    # create stats file
     f = File.open(STATS_PATH,'w')
     f.puts JSON.pretty_generate(@@full_stats)
     f.close
-    r=File.read(STATS_PATH)
+    # if initial files doesn't exists, create it
+    if !File.exists?(File.join(OUTPUT_PATH,'initial_stats.json'))
+      File.open(File.join(OUTPUT_PATH,'initial_stats.json'),'w') do |f|
+        f.puts JSON.pretty_generate(@@ongoing_stats)
+      end
+    end
+    # load stats
+    r=File.read(STATS_PATH)
     stats=JSON::parse(r)
+    # make graphs
     gs=GraphStats.new(stats)
-    #gs=GraphStats.new(@@full_stats)
     #close all files
-    # @@fqr.close
     if @@use_json
       @@json_output.close
     end
     @@errors_file.close
-    # @@rejected_output_file.close
-    # @@paired_output_files.each do |k,file|
-    #   file.close
-    # end
     @@files.each do |k,file|
       file.close
     end
-    # @@paired_qual_output_files.each do |k,file|
-    #            file.close
-    # end
-    # @@sequences_output_files.each do |k,file|
-    #   file.close
-    # end
-    #
-    # @@low_complexity_output_files.each do |k,file|
-    #   file.close
-    # end
-    #
-    # @@sffinfo_files.each do |k,file|
-    #   file.close
-    # end
-    #
-    # @@low_sffinfo_files.each do |k,file|
-    #   file.close
-    # end
-    # @@qual_output_files.each do |k,file|
-    #  file.close
-    # end
-    # more than one MID found
-    # if @@full_stats['mid_id'] && @@full_stats['mid_id'].count>1
-    #
-    # end
     if File.exists?('scbi_drb_checkpoint')
       File.delete('scbi_drb_checkpoint')
     end
@@ -172,21 +135,71 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
   end
   def load_user_checkpoint(checkpoint)
+    # load full_stats from file !!!!!!!!!!!!!
-    # reset count stats since they are repeated by checkpointing
+    if File.exists?(STATS_PATH)
-    if @@full_stats['sequences'] && @@full_stats['repeated']
-      @@full_stats['sequences']['count']['repeated']=0
-    end
-    if @@full_stats['sequences'] && @@full_stats['processed']
-      @@full_stats['sequences']['processed']['count']=0
+      # load stats
+      text = File.read(STATS_PATH)
+      # wipe text
+      # text=text.grep(/^\s*[^#]/).to_s
+      # decode json
+      @@full_stats = JSON.parse(text)
     end
-    if @@full_stats['sequences'] && @@full_stats['total']
-      @@full_stats['sequences']['total']['count']=0
+    # reset count stats since they are repeated by checkpointing
+    # {
+    #   "sequences": {
+    #   "count": {
+    #     "input_count": 1600,
+    #     "output_seqs": 933,
+    #     "rejected": 67
+    #   },
+    #   "rejected": {
+    #     "short insert": 39,
+    #     "contaminated": 26,
+    #     "unexpected vector": 2
+    #   }
+    # }
+    # }
+    if @@full_stats['sequences']
+      if @@full_stats['sequences']['count']
+        # set input count to 0
+        @@full_stats['sequences']['count']['input_count']=0
+        # do not remove outputseqs
+        # @@full_stats['sequences']['count']['output_seqs']=0
+      end
+      # remove rejected due to repetitions from rejected count
+      if @@full_stats['sequences']['rejected']
+        # it there are repeated
+        if (@@full_stats['sequences']['rejected']['repeated'])
+          # if repeated count > 0 and there count exists
+          if (@@full_stats['sequences']['rejected']['repeated'] > 0) and @@full_stats['sequences']['count']
+            # discount repeated from rejected, since they are going to be added again by checkout process
+            @@full_stats['sequences']['count']['rejected'] -= @@full_stats['sequences']['rejected']['repeated']
+          end
+          # set repeated to 0
+          @@full_stats['sequences']['rejected']['repeated']=0
+        end
+      end
     end
+    # puts "Loaded Stats"
+    # puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
+    # TODO - remove sequences from rejected file that were added by cloned
     super
     # return checkpoint
   end
@@ -202,17 +215,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
   # read a work that will not be processed, only to skip until checkpoint
   def trash_checkpointed_work
-    @@chunk_size.times do
-      begin
-        n,f,q,c = @@fqr.next_seq
-      end while (!n.nil? && @@params.repeated_seq?(n))
-      if n.nil?
-        break
-      end
-    end
+    warn "Deprecated: trash_checkpointed_work was deprecated, it is automatic now"
   end
   def next_work
@@ -228,12 +231,16 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
       if !n.nil? && @@params.repeated_seq?(n)
         @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
         @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
         get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
       end
       if !n.nil?
+        @@ongoing_stats[:sequence_count] += 1
+        @@ongoing_stats[:smallest_sequence_size] = [f.size, @@ongoing_stats[:smallest_sequence_size]].min
+        @@ongoing_stats[:biggest_sequence_size] = [f.size, @@ongoing_stats[:smallest_sequence_size]].max
         @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
       end
     end while (!n.nil? && @@params.repeated_seq?(n))
@@ -247,50 +254,9 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
   end
-  # def next_work
-  #
-  #     if @@exit
-  #       return nil
-  #     end
-  #     group = SequenceGroup.new
-  #
-  #     @@chunk_size.times do
-  #       begin
-  #
-  #         n,f,q,c = @@fqr.next_seq
-  #
-  #         if !n.nil? && @@params.repeated_seq?(n)
-  #           @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
-  #           @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
-  #
-  #           get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
-  #
-  #         end
-  #         if !n.nil?
-  #           @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
-  #         end
-  #       end while (!n.nil? && @@params.repeated_seq?(n))
-  #
-  #       if !n.nil?
-  #         # @@full_stats.add_stats({'sequences' => {'count' => {'processed' => 1}}})
-  #         group.push SequenceWithAction.new(n,f.upcase,q,c)
-  #       else
-  #         break
-  #       end
-  #     end
-  #
-  #     # puts "Processing #{group.inspect}"
-  #
-  #     if group.empty?
-  #       return nil
-  #     else
-  #       return group
-  #     end
-  #
-  #   end
   def work_received(obj)
     res = obj
     # collect stats

data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb CHANGED Viewed

@@ -19,6 +19,9 @@
 #
 # $: << File.expand_path(ROOT_PATH)
+$: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
+$: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
 require 'seqtrimnext'
 $SEQTRIM_PATH = ROOT_PATH
@@ -37,7 +40,7 @@ ENV['BLASTDB']=$FORMATTED_DB_PATH
 OUTPUT_PATH='output_files'
 puts "FORMATTED_DB_BLAST in workers: #{$FORMATTED_DB_PATH}"
-# $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
 require 'scbi_mapreduce'
 require 'params'

data/lib/seqtrimnext/classes/graph_stats.rb CHANGED Viewed

@@ -10,8 +10,13 @@ class GraphStats
     init_stats=initial_stats
     if init_stats.nil?
-      r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
-      init_stats= JSON::parse(r)
+      if File.exists?(File.join(OUTPUT_PATH,'initial_stats.json'))
+        r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
+        init_stats= JSON::parse(r)
+      else
+        init_stats=[]
+      end
     end
     # puts init_stats.to_json
     #r=File.read(File.join(File.dirname(__FILE__),'stats.json'))

data/lib/seqtrimnext/classes/seqtrim.rb CHANGED Viewed

@@ -5,8 +5,6 @@
 require 'extract_stats'
-# $: << File.expand_path('~/progs/ruby/gems/scbi_drb/lib')
 require 'scbi_mapreduce'
 require 'seqtrim_work_manager'
 require 'action_manager'
@@ -340,6 +338,9 @@ class Seqtrim
         # server = ScbiMapreduce::Manager.new(ip,port, workers, SeqtrimWorkManager,custom_worker_file, STDOUT,'~/.seqtrimnext')
 				server = ScbiMapreduce::Manager.new(ip,port, workers, SeqtrimWorkManager,custom_worker_file, STDOUT,$SEQTRIMNEXT_INIT)
 				server.chunk_size=chunk_size
+        server.checkpointing=true
+        server.keep_order=true
+        server.retry_stuck_jobs=true
 				server.start_server
         # close sequence reader

data/lib/seqtrimnext/classes/sequence_with_action.rb CHANGED Viewed

@@ -77,7 +77,7 @@ class SequenceWithAction < Sequence
   # Adds a new action to the sequence
     def add_action(a)
-      $LOG.info("Adding action #{a.type} to #{seq_name}")
+      $LOG.debug("Adding action #{a.type} to #{seq_name}")
       @actions.push a

data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb CHANGED Viewed

@@ -25,7 +25,7 @@ class PluginAbAdapters < Plugin
  def do_blasts(seqs)
     # find MIDS  with less results than max_target_seqs value
     blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
-    $LOG.info('BLAST:'+blast.get_blast_cmd)
+    $LOG.debug('BLAST:'+blast.get_blast_cmd)
     fastas=[]
@@ -61,7 +61,7 @@ class PluginAbAdapters < Plugin
      raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
    end
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
     # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_ab')} -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")

data/lib/seqtrimnext/plugins/plugin_adapters.rb CHANGED Viewed

@@ -25,7 +25,7 @@ class PluginAdapters < Plugin
  def do_blasts(seqs)
     # find MIDS  with less results than max_target_seqs value
     blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
-    $LOG.info('BLAST:'+blast.get_blast_cmd)
+    $LOG.debug('BLAST:'+blast.get_blast_cmd)
     fastas=[]
@@ -64,7 +64,7 @@ class PluginAdapters < Plugin
      # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
    end
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
     # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")

data/lib/seqtrimnext/plugins/plugin_adapters_old.rb ADDED Viewed

@@ -0,0 +1,165 @@
+require "plugin"
+########################################################
+# Author: Almudena Bocinos Rioboo
+#
+# Defines the main methods that are necessary to execute PluginAdapters
+# Inherit: Plugin
+########################################################
+class PluginAdaptersOld < Plugin
+  def get_type_adapter(p_start,p_end,seq)
+       #if q_beg is nearer the left, add adapter action by the left,
+       #if q_end esta is nearer the right , add adapter action by  the right
+       #NOTE: If the adapter is very near from left and rigth,
+       #then the sequence isn't valid, because almost sequence is adapter.
+       v1= p_end.to_i
+       v2= p_start.to_i
+        # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start}  insert_end #{seq.insert_end}"
+        # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
+       if (v2+seq.insert_start  < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
+         type = "ActionLeftAdapter"
+       else
+          type = "ActionRightAdapter"
+       end
+       return type
+  end
+  def cut_by_right(adapter,seq)
+    left_size = adapter.q_beg-seq.insert_start+1
+    right_size = seq.insert_end-adapter.q_end+1
+    left_size=0 if (left_size<0)
+    right_size=0 if (right_size<0)
+    return (left_size>(right_size/2).to_i)
+  end
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
+ def execute(seqs)
+   blasts= do_blasts(seqs)
+   seqs.each_with_index do |s,i|
+     exec_seq(s,blasts.querys[i])
+   end
+ end
+ def do_blasts(seqs)
+    # find MIDS  with less results than max_target_seqs value
+    blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
+    $LOG.debug('BLAST:'+blast.get_blast_cmd)
+    fastas=[]
+    seqs.each do |seq|
+     fastas.push ">"+seq.seq_name
+     fastas.push seq.seq_fasta
+    end
+    # fastas=fastas.join("\n")
+    blast_table_results = blast.do_blast(fastas)
+    # puts blast_table_results.inspect
+    return blast_table_results
+ end
+ def exec_seq(seq,blast_query)
+   if blast_query.query_id != seq.seq_name
+     raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
+   end
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
+    # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
+    # blast with only one sequence, no with many sequences from a database
+    #---------------------------------------------------------------------
+    # blast_table_results = blast.do_blast(seq.seq_fasta)             #rise seq to adapterss  executing over blast
+    #blast_table_results = BlastTableResult.new(res)
+    # blast_table_results.inspect
+    adapters=[]
+    # blast_table_results.querys.each do |query|     # first round to save adapters without overlap
+      merge_hits(blast_query,adapters)
+    # end
+    begin
+      adapters2=adapters                            # second round to save adapters without overlap
+      adapters = []
+      merge_hits(adapters2,adapters)
+    end until (adapters2.count == adapters.count)
+    actions=[]
+    adapter_size=0
+    # @stats['adapter_size']={}
+    adapters.each do |ad|                           # adds the correspondent action to the sequence
+       type = get_type_adapter(ad.q_beg,ad.q_end,seq)
+       a = seq.new_action(ad.q_beg,ad.q_end,type)
+       # puts " state left_action #{a.left_action} right_action #{a.right_action}"
+       adapter_size=ad.q_end-ad.q_beg+1
+       if cut_by_right(ad,seq)
+        # puts "action right end1 #{seq.insert_end}"
+        a.right_action=true    #mark rigth action to get the left insert
+      else
+        # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
+        a.left_action = true   #mark left action to get the right insert
+      end
+      a.message = ad.subject_id
+      a.reversed = ad.reversed
+      actions.push a
+      # @stats[:adapter_size]={adapter_size => 1}
+      add_stats('adapter_size',adapter_size)
+    end
+    seq.add_actions(actions)
+    #
+  end
+  #Returns an array with the errors due to parameters are missing
+  def self.check_params(params)
+    errors=[]
+    comment='Blast E-value used as cut-off when searching for adapters or primers'
+    default_value = 1e-6
+		params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
+		comment='Minimum required identity (%) for a reliable adapter'
+		default_value = 95
+		params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
+    comment='Path for adapter database'
+		default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
+		params.check_param(errors,'adapters_db','DB',default_value,comment)
+    return errors
+  end
+end

data/lib/seqtrimnext/plugins/plugin_amplicons.rb CHANGED Viewed

@@ -25,7 +25,7 @@ class PluginAmplicons < Plugin
   def do_blasts(seqs)
     # find MIDS  with less results than max_target_seqs value
     blast=BatchBlast.new("-db #{@params.get_param('primers_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_primers')}")
-    $LOG.info('BLAST:'+blast.get_blast_cmd)
+    $LOG.debug('BLAST:'+blast.get_blast_cmd)
     fastas=[]
@@ -49,7 +49,7 @@ class PluginAmplicons < Plugin
       raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
     end
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for primers into the sequence"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for primers into the sequence"
     # puts blast_query.inspect

data/lib/seqtrimnext/plugins/plugin_contaminants.rb CHANGED Viewed

@@ -36,7 +36,7 @@ class PluginContaminants < Plugin
     blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1")  #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
-    $LOG.info('BLAST:'+blast.get_blast_cmd(:xml))
+    $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
     fastas=[]
@@ -67,7 +67,7 @@ class PluginContaminants < Plugin
       # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
     end
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
     #blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95')  #get contaminants
@@ -143,7 +143,7 @@ class PluginContaminants < Plugin
         end
       else
-        $LOG.info('Contaminant ignored due to genus match: '+c.definition)
+        $LOG.debug('Contaminant ignored due to genus match: '+c.definition)
       end
     end

data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb CHANGED Viewed

@@ -286,7 +286,7 @@ class PluginExtractInserts < Plugin
   def exec_seq(seq)
-     $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
+     $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
      # puts "INSERTO ANTES LINKER INSERT:"+seq.seq_fasta

data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb CHANGED Viewed

@@ -319,7 +319,7 @@ class PluginFindPolyAt < Plugin
   def exec_seq(seq)
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for strings of polyAT's into the sequence with a length indicated by the param <poly_at_length>"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for strings of polyAT's into the sequence with a length indicated by the param <poly_at_length>"
     find_polyT(seq)
     find_polyA(seq)

data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb CHANGED Viewed

@@ -21,7 +21,7 @@ def execute(seqs)
  def exec_seq(seq)
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: searching sequence repeated at input file"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: searching sequence repeated at input file"
     fasta_input=@params.get_param('truncated_input_file')

data/lib/seqtrimnext/plugins/plugin_indeterminations.rb CHANGED Viewed

@@ -149,7 +149,7 @@ class PluginIndeterminations < Plugin
    def exec_seq(seq)
-     $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+"
+     $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+"
      actions=[]

data/lib/seqtrimnext/plugins/plugin_key.rb CHANGED Viewed

@@ -21,7 +21,7 @@ class PluginKey < Plugin
    def exec_seq(seq)
-     $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
+     $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
      # blast_table_results.inspect
      actions=[]

data/lib/seqtrimnext/plugins/plugin_linker.rb CHANGED Viewed

@@ -83,7 +83,7 @@ class PluginLinker < Plugin
      # find MIDS  with less results than max_target_seqs value
      blast = BatchBlast.new("-db #{@params.get_param('linkers_db')}",'blastn'," -task blastn-short  -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}")  #get linkers
-     $LOG.info('BLAST:'+blast.get_blast_cmd)
+     $LOG.debug('BLAST:'+blast.get_blast_cmd)
      fastas=[]
@@ -106,7 +106,7 @@ class PluginLinker < Plugin
     if blast_query.query_id != seq.seq_name
       raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
     end
-     $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
+     $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
      # key_beg,key_end=search_key(seq,0,3)   if false
      # blast = BatchBlast.new("-subject #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn  -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}")  #get linkers

data/lib/seqtrimnext/plugins/plugin_low_high_size.rb CHANGED Viewed

@@ -22,7 +22,7 @@ class PluginLowHighSize < Plugin
   def exec_seq(seq)
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
     min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
     max_size = @params.get_param('max_sequence_size_raw').to_i #max_size is: mean + 2dev

data/lib/seqtrimnext/plugins/plugin_low_quality.rb CHANGED Viewed

@@ -278,7 +278,7 @@ class PluginLowQuality < Plugin
      if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
        $LOG.error " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
      elsif (seq.seq_qual.size>0)
-       $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
+       $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
        @low=@params.get_param('min_quality').to_i

data/lib/seqtrimnext/plugins/plugin_mids.rb CHANGED Viewed

@@ -29,7 +29,7 @@ class PluginMids < Plugin
   def do_blasts(seqs)
      # find MIDS  with less results than max_target_seqs value
      blast = BatchBlast.new("-db #{@params.get_param('mids_db')}",'blastn'," -task blastn-short    -perc_identity #{@params.get_param('blast_percent_mids')} -max_target_seqs 4 ")  #get mids
-     $LOG.info('BLAST:'+blast.get_blast_cmd)
+     $LOG.debug('BLAST:'+blast.get_blast_cmd)
      fastas=[]
@@ -54,7 +54,7 @@ class PluginMids < Plugin
     end
-     $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for mids into the sequence"
+     $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for mids into the sequence"
      # blast_table_results = blast.do_blast(seq.seq_fasta[0..SIZE_SEARCH_MID])             # execute blast to find mids

data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb CHANGED Viewed

@@ -27,7 +27,7 @@ class PluginRemAditArtifacts < Plugin
   def exec_seq(seq)
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
     seq2 = seq.seq_fasta
     first = 0
     last = seq2.size-1

data/lib/seqtrimnext/plugins/plugin_short_insert.rb CHANGED Viewed

@@ -96,7 +96,7 @@ class PluginShortInsert < Plugin
   def exec_seq(seq)
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
     # puts "inserto #{seq.insert_start}, #{seq.insert_end} size #{seq.seq_fasta.size}"
     if (seq.seq_fasta.size > 0)
@@ -183,7 +183,7 @@ class PluginShortInsert < Plugin
   #Begins the plugin1's execution to warn if the inserted is so short
    def execute_no_cut_quality(seq)
-     $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
+     $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"

data/lib/seqtrimnext/plugins/plugin_vectors.rb CHANGED Viewed

@@ -38,7 +38,7 @@ class PluginVectors < Plugin
     # find MIDS  with less results than max_target_seqs value
     blast = BatchBlast.new("-db #{@params.get_param('vectors_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_vectors')} -perc_identity #{@params.get_param('blast_percent_vectors')} -culling_limit 1")  #get vectors
-    $LOG.info('BLAST:'+blast.get_blast_cmd)
+    $LOG.debug('BLAST:'+blast.get_blast_cmd)
     fastas=[]
@@ -62,7 +62,7 @@ class PluginVectors < Plugin
      # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
    end
-    $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
+    $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
     #blast contra contaminantes

data/lib/seqtrimnext/templates/genomics_illumina.txt ADDED Viewed

@@ -0,0 +1,5 @@
+# ======================================
+# General parameters GENOMICS illumina
+# ======================================
+plugin_list = PluginLowHighSize,PluginIndeterminations,PluginContaminants,PluginLowQuality

data/lib/seqtrimnext/templates/transcriptomics_illumina.txt ADDED Viewed

@@ -0,0 +1,8 @@
+# ======================================
+# General parameters
+# ======================================
+plugin_list = PluginLowHighSize,PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginLowQuality,PluginLowComplexity
+contaminants_db="contaminants.fasta cont_ribosome.fasta"

data/lib/seqtrimnext/utils/hash_stats.rb CHANGED Viewed

@@ -15,8 +15,9 @@ def add_stats(h_stats)
 		add_stats.each do |property,hash_value|
 			h[plugin_hash][property]={} if h[plugin_hash][property].nil?
+      # values need to be in string format because of later loading from json file
 			hash_value.each do |value, count|
-				h[plugin_hash][property][value]=(h[plugin_hash][property][value]||0) + count
+				h[plugin_hash][property][value.to_s]=(h[plugin_hash][property][value.to_s]||0) + count
 			end
 		end
 	end

data/lib/seqtrimnext.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module Seqtrimnext
   # SEQTRIM_VERSION_STAGE = 'b'
   # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
-   VERSION = '2.0.45'
+   VERSION = '2.0.46'
   SEQTRIM_VERSION = VERSION

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: seqtrimnext
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 2.0.45
+  version: 2.0.46
 platform: ruby
 authors:
 - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-03-05 00:00:00 Z
+date: 2012-04-13 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: narray
@@ -143,9 +143,11 @@ executables:
 - extract_seqs_from_fastq.rb
 - fasta2fastq.rb
 - fastq2fasta.rb
+- filter_database.rb
 - gen_qual.rb
 - get_seq.rb
 - group_by_range.rb
+- join_big_illumina_paired.sh
 - join_ilumina_paired.rb
 - parse_amplicons.rb
 - parse_json_results.rb
@@ -160,14 +162,16 @@ executables:
 extensions: []
 extra_rdoc_files:
+- History.txt
 - lib/seqtrimnext/templates/amplicons.txt
 - lib/seqtrimnext/templates/genomics_454.txt
 - lib/seqtrimnext/templates/genomics_454_with_paired.txt
+- lib/seqtrimnext/templates/genomics_illumina.txt
 - lib/seqtrimnext/templates/low_quality.txt
 - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
 - lib/seqtrimnext/templates/transcriptomics_454.txt
+- lib/seqtrimnext/templates/transcriptomics_illumina.txt
 - lib/seqtrimnext/templates/transcriptomics_plants.txt
-- History.txt
 - Manifest.txt
 - PostInstall.txt
 files:
@@ -177,9 +181,11 @@ files:
 - bin/extract_seqs_from_fastq.rb
 - bin/fasta2fastq.rb
 - bin/fastq2fasta.rb
+- bin/filter_database.rb
 - bin/gen_qual.rb
 - bin/get_seq.rb
 - bin/group_by_range.rb
+- bin/join_big_illumina_paired.sh
 - bin/join_ilumina_paired.rb
 - bin/parse_amplicons.rb
 - bin/parse_json_results.rb
@@ -191,6 +197,7 @@ files:
 - bin/split_fastq.rb
 - bin/split_ilumina_paired.rb
 - bin/split_paired.rb
+- History.txt
 - lib/seqtrimnext/actions/action_ab_adapter.rb
 - lib/seqtrimnext/actions/action_ab_far_adapter.rb
 - lib/seqtrimnext/actions/action_ab_left_adapter.rb
@@ -208,13 +215,13 @@ files:
 - lib/seqtrimnext/actions/action_low_high_size.rb
 - lib/seqtrimnext/actions/action_low_quality.rb
 - lib/seqtrimnext/actions/action_mid.rb
+- lib/seqtrimnext/actions/action_middle_adapter.rb
 - lib/seqtrimnext/actions/action_multiple_linker.rb
 - lib/seqtrimnext/actions/action_paired_reads.rb
 - lib/seqtrimnext/actions/action_poly_a.rb
 - lib/seqtrimnext/actions/action_poly_t.rb
 - lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
 - lib/seqtrimnext/actions/action_right_adapter.rb
-- lib/seqtrimnext/actions/action_middle_adapter.rb
 - lib/seqtrimnext/actions/action_right_primer.rb
 - lib/seqtrimnext/actions/action_short_insert.rb
 - lib/seqtrimnext/actions/action_unexpected_poly_t.rb
@@ -244,6 +251,7 @@ files:
 - lib/seqtrimnext/plugins/plugin.rb
 - lib/seqtrimnext/plugins/plugin_ab_adapters.rb
 - lib/seqtrimnext/plugins/plugin_adapters.rb
+- lib/seqtrimnext/plugins/plugin_adapters_old.rb
 - lib/seqtrimnext/plugins/plugin_amplicons.rb
 - lib/seqtrimnext/plugins/plugin_contaminants.rb
 - lib/seqtrimnext/plugins/plugin_extract_inserts.rb
@@ -262,9 +270,11 @@ files:
 - lib/seqtrimnext/templates/amplicons.txt
 - lib/seqtrimnext/templates/genomics_454.txt
 - lib/seqtrimnext/templates/genomics_454_with_paired.txt
+- lib/seqtrimnext/templates/genomics_illumina.txt
 - lib/seqtrimnext/templates/low_quality.txt
 - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
 - lib/seqtrimnext/templates/transcriptomics_454.txt
+- lib/seqtrimnext/templates/transcriptomics_illumina.txt
 - lib/seqtrimnext/templates/transcriptomics_plants.txt
 - lib/seqtrimnext/utils/extract_samples.rb
 - lib/seqtrimnext/utils/fasta2xml.rb
@@ -276,7 +286,6 @@ files:
 - lib/seqtrimnext/utils/recover_mid.rb
 - lib/seqtrimnext/utils/string_utils.rb
 - lib/seqtrimnext.rb
-- History.txt
 - Manifest.txt
 - PostInstall.txt
 - Rakefile