fastq-factory 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
 - data/bin/fastq-factory +4 -4
 - data/fastq-factory.gemspec +2 -2
 - data/lib/fastq-factory.rb +6 -2
 - data/lib/generate_quality_metrics.rb +35 -19
 - data/lib/miseq_run_stats.rb +41 -2
 - data/lib/trim_and_correct.rb +27 -19
 - metadata +3 -3
 
    
        data/VERSION
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            0.1. 
     | 
| 
      
 1 
     | 
    
         
            +
            0.1.5
         
     | 
    
        data/bin/fastq-factory
    CHANGED
    
    | 
         @@ -26,8 +26,8 @@ reverse_reads_suffix, reverse_reads_file_extension = options[:reverse_reads_suff 
     | 
|
| 
       26 
26 
     | 
    
         
             
            sample_map = extract_file_prefixes_and_sample_name(options[:sample_map_file], options[:directory])
         
     | 
| 
       27 
27 
     | 
    
         
             
            # check sequence files exist
         
     | 
| 
       28 
28 
     | 
    
         
             
            sample_map.keys.each do |sample_prefix|
         
     | 
| 
       29 
     | 
    
         
            -
              file_exists?("#{sample_prefix}#{options[:forward_reads_suffix]}" 
     | 
| 
       30 
     | 
    
         
            -
              file_exists?("#{sample_prefix}#{options[:reverse_reads_suffix]}" 
     | 
| 
      
 29 
     | 
    
         
            +
              file_exists?(options[:directory], "#{sample_prefix}#{options[:forward_reads_suffix]}")
         
     | 
| 
      
 30 
     | 
    
         
            +
              file_exists?(options[:directory], "#{sample_prefix}#{options[:reverse_reads_suffix]}")
         
     | 
| 
       31 
31 
     | 
    
         
             
            end
         
     | 
| 
       32 
32 
     | 
    
         
             
            if options[:trim]
         
     | 
| 
       33 
33 
     | 
    
         
             
              fastq_quality_trimmer_path = find_executable("fastq_quality_trimmer", options[:fastq_quality_trimmer_dir])
         
     | 
| 
         @@ -40,8 +40,8 @@ end 
     | 
|
| 
       40 
40 
     | 
    
         | 
| 
       41 
41 
     | 
    
         
             
            if options[:metrics]
         
     | 
| 
       42 
42 
     | 
    
         
             
              sample_map.keys.each do |sample_prefix|
         
     | 
| 
       43 
     | 
    
         
            -
                file_exists?("#{sample_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}",  
     | 
| 
       44 
     | 
    
         
            -
                file_exists?("#{sample_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}",  
     | 
| 
      
 43 
     | 
    
         
            +
                file_exists?(options[:directory], "#{sample_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", "#{sample_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
         
     | 
| 
      
 44 
     | 
    
         
            +
                file_exists?(options[:directory], "#{sample_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", "#{sample_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
         
     | 
| 
       45 
45 
     | 
    
         
             
              end
         
     | 
| 
       46 
46 
     | 
    
         
             
              generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale], options[:metrics_quality_cutoff])
         
     | 
| 
       47 
47 
     | 
    
         
             
            end
         
     | 
    
        data/fastq-factory.gemspec
    CHANGED
    
    | 
         @@ -5,11 +5,11 @@ 
     | 
|
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       7 
7 
     | 
    
         
             
              s.name = "fastq-factory"
         
     | 
| 
       8 
     | 
    
         
            -
              s.version = "0.1. 
     | 
| 
      
 8 
     | 
    
         
            +
              s.version = "0.1.5"
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         
     | 
| 
       11 
11 
     | 
    
         
             
              s.authors = ["Anthony Underwood"]
         
     | 
| 
       12 
     | 
    
         
            -
              s.date = "2012-08- 
     | 
| 
      
 12 
     | 
    
         
            +
              s.date = "2012-08-29"
         
     | 
| 
       13 
13 
     | 
    
         
             
              s.description = "This tool can process fastq files, using fastq_quality_trimmer and quake to correct fastq files and then provide a quality asssessment of the data"
         
     | 
| 
       14 
14 
     | 
    
         
             
              s.email = "anthony.underwood@hpa.org.uk"
         
     | 
| 
       15 
15 
     | 
    
         
             
              s.executables = ["fastq-factory"]
         
     | 
    
        data/lib/fastq-factory.rb
    CHANGED
    
    | 
         @@ -10,8 +10,12 @@ def extract_file_prefixes_and_sample_name(sample_map_file, directory) 
     | 
|
| 
       10 
10 
     | 
    
         
             
              return sample_map
         
     | 
| 
       11 
11 
     | 
    
         
             
            end
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
            def file_exists?( 
     | 
| 
       14 
     | 
    
         
            -
               
     | 
| 
      
 13 
     | 
    
         
            +
            def file_exists?(directory, *filenames)
         
     | 
| 
      
 14 
     | 
    
         
            +
              at_least_one_file_found = false
         
     | 
| 
      
 15 
     | 
    
         
            +
              filenames.each do |filename|
         
     | 
| 
      
 16 
     | 
    
         
            +
                at_least_one_file_found = true  if File.exists?("#{directory}/#{filename}")
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
              abort("You specified a file(s): #{filenames.join(", ")}. At least one of these must exist! Please check your sample map file") unless at_least_one_file_found
         
     | 
| 
       15 
19 
     | 
    
         
             
            end
         
     | 
| 
       16 
20 
     | 
    
         | 
| 
       17 
21 
     | 
    
         
             
            def find_executable(executable_name, directory = nil)
         
     | 
| 
         @@ -6,41 +6,57 @@ include MiseqRunStats 
     | 
|
| 
       6 
6 
     | 
    
         
             
            def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
         
     | 
| 
       7 
7 
     | 
    
         
             
              if File.exists?("#{directory}/ResequencingRunStatistics.xml")
         
     | 
| 
       8 
8 
     | 
    
         
             
                puts "Assessing quality from Miseq run stats file"
         
     | 
| 
       9 
     | 
    
         
            -
                 
     | 
| 
      
 9 
     | 
    
         
            +
                run_stats = parse_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values)
         
     | 
| 
      
 10 
     | 
    
         
            +
              elsif File.exists?("#{directory}/AssemblyRunStatistics.xml")
         
     | 
| 
      
 11 
     | 
    
         
            +
                puts "Assessing quality from Miseq run stats file"
         
     | 
| 
      
 12 
     | 
    
         
            +
                run_stats = parse_assembly_run_stats("#{directory}/AssemblyRunStatistics.xml", sample_map.values)
         
     | 
| 
       10 
13 
     | 
    
         
             
              else
         
     | 
| 
       11 
     | 
    
         
            -
                 
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
      
 14 
     | 
    
         
            +
                run_stats = ResequencingRunStats.new
         
     | 
| 
      
 15 
     | 
    
         
            +
                run_stats.sample_stats = Hash.new
         
     | 
| 
       13 
16 
     | 
    
         
             
                sample_map.values.each do |sample_name|
         
     | 
| 
       14 
     | 
    
         
            -
                   
     | 
| 
      
 17 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name] = ResequencingSampleStats.new
         
     | 
| 
       15 
18 
     | 
    
         
             
                end
         
     | 
| 
       16 
19 
     | 
    
         
             
              end
         
     | 
| 
       17 
20 
     | 
    
         | 
| 
      
 21 
     | 
    
         
            +
              forward_reads_trimmed_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2')
         
     | 
| 
      
 22 
     | 
    
         
            +
              reverse_reads_trimmed_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2')
         
     | 
| 
       18 
23 
     | 
    
         | 
| 
       19 
24 
     | 
    
         
             
              forward_reads_trimmed_corrected_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2')
         
     | 
| 
       20 
25 
     | 
    
         
             
              reverse_reads_trimmed_corrected_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2')
         
     | 
| 
       21 
26 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
27 
     | 
    
         
             
              sample_map.each do |read_file_prefix, sample_name|
         
     | 
| 
       24 
28 
     | 
    
         
             
                puts "Assesing quality for #{sample_name}"
         
     | 
| 
       25 
     | 
    
         
            -
                 
     | 
| 
       26 
     | 
    
         
            -
                 
     | 
| 
       27 
     | 
    
         
            -
                 
     | 
| 
       28 
     | 
    
         
            -
                 
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
      
 29 
     | 
    
         
            +
                run_stats.sample_stats[sample_name].fastq_stats = Hash.new
         
     | 
| 
      
 30 
     | 
    
         
            +
                run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff)
         
     | 
| 
      
 31 
     | 
    
         
            +
                run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff)
         
     | 
| 
      
 32 
     | 
    
         
            +
                if File.exists?("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}")
         
     | 
| 
      
 33 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
         
     | 
| 
      
 35 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
         
     | 
| 
      
 37 
     | 
    
         
            +
                else
         
     | 
| 
      
 38 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}",quality_scale, quality_cutoff)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
         
     | 
| 
      
 40 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}",quality_scale, quality_cutoff)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
       32 
44 
     | 
    
         
             
              end
         
     | 
| 
       33 
45 
     | 
    
         
             
              # print out data
         
     | 
| 
       34 
46 
     | 
    
         
             
              output_file = File.open("#{directory}/summary_stats.txt", "w")
         
     | 
| 
       35 
47 
     | 
    
         
             
              # print headers
         
     | 
| 
       36 
     | 
    
         
            -
              output_file.puts "run name\tnumber of bases(Gb)\tnumber of clusters\tsample name\tdirection\tnumber of clusters\tnumber of forward reads aligned\tnumber of reverse reads aligned\tcoverage\tnumber of snps\tmean quality\tread base where qual falls below 30\tpercent reduction compared to raw"
         
     | 
| 
       37 
     | 
    
         
            -
              output_file.puts "#{directory.match(/.*\/(.+?)$/).captures.first}\t#{ 
     | 
| 
       38 
     | 
    
         
            -
               
     | 
| 
       39 
     | 
    
         
            -
                sample_stats =  
     | 
| 
       40 
     | 
    
         
            -
                 
     | 
| 
      
 48 
     | 
    
         
            +
              output_file.puts "run name\tnumber of bases(Gb)\tnumber of clusters\tsample name\tdirection\tnumber of clusters\tnumber of forward reads aligned\tnumber of reverse reads aligned\tcoverage\tnumber of snps\tnumber of contigs\tmean contig size\tn50\tnumber of bases\tmean quality\tread base where qual falls below 30\tpercent reduction compared to raw"
         
     | 
| 
      
 49 
     | 
    
         
            +
              output_file.puts "#{directory.match(/.*\/(.+?)$/).captures.first}\t#{run_stats.number_of_bases}\t#{run_stats.number_of_clusters}"
         
     | 
| 
      
 50 
     | 
    
         
            +
              run_stats.sample_stats.keys.sort.each do |sample_name|
         
     | 
| 
      
 51 
     | 
    
         
            +
                sample_stats = run_stats.sample_stats[sample_name]
         
     | 
| 
      
 52 
     | 
    
         
            +
                if sample_stats.class == Struct::ResequencingSampleStats
         
     | 
| 
      
 53 
     | 
    
         
            +
                  output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t#{sample_stats.number_of_forward_reads_aligned}\t#{sample_stats.number_of_reverse_reads_aligned}\t#{sample_stats.coverage}\t#{sample_stats.number_of_snps}"
         
     | 
| 
      
 54 
     | 
    
         
            +
                elsif sample_stats.class == Struct::AssemblySampleStats
         
     | 
| 
      
 55 
     | 
    
         
            +
                  output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t\t\t\t\t#{sample_stats.number_of_contigs}\t#{sample_stats.mean_contig_size}\t#{sample_stats.n50}\t#{sample_stats.number_of_bases}"
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
       41 
57 
     | 
    
         
             
                ["forward", "reverse", "forward-trim_corrected", "reverse-trim_corrected"].each do |direction|
         
     | 
| 
       42 
     | 
    
         
            -
                  fastq_stats =  
     | 
| 
       43 
     | 
    
         
            -
                  output_file.puts "\t\t\t\t#{direction}\t\t\t\t\t\t#{fastq_stats.mean_quality}\t#{fastq_stats.position_where_quality_lt_20}\t#{fastq_stats.percentage_compared_to_raw}"
         
     | 
| 
      
 58 
     | 
    
         
            +
                  fastq_stats = run_stats.sample_stats[sample_name].fastq_stats[direction]
         
     | 
| 
      
 59 
     | 
    
         
            +
                  output_file.puts "\t\t\t\t#{direction}\t\t\t\t\t\t\t\t\t\t#{fastq_stats.mean_quality}\t#{fastq_stats.position_where_quality_lt_20}\t#{fastq_stats.percentage_compared_to_raw}"
         
     | 
| 
       44 
60 
     | 
    
         
             
                end
         
     | 
| 
       45 
61 
     | 
    
         
             
              end
         
     | 
| 
       46 
62 
     | 
    
         
             
              output_file.close
         
     | 
    
        data/lib/miseq_run_stats.rb
    CHANGED
    
    | 
         @@ -1,7 +1,9 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module MiseqRunStats
         
     | 
| 
       2 
2 
     | 
    
         
             
              require 'nokogiri'
         
     | 
| 
       3 
3 
     | 
    
         
             
              ResequencingRunStats = Struct.new("ResequencingRunStats", :number_of_bases, :number_of_clusters, :sample_stats)
         
     | 
| 
       4 
     | 
    
         
            -
               
     | 
| 
      
 4 
     | 
    
         
            +
              ResequencingSampleStats = Struct.new("ResequencingSampleStats", :sample_name, :number_of_clusters, :number_of_forward_reads_aligned, :number_of_reverse_reads_aligned, :coverage, :number_of_snps, :fastq_stats)
         
     | 
| 
      
 5 
     | 
    
         
            +
              AssemblyRunStats = Struct.new("AssemblyRunStats", :number_of_bases, :number_of_clusters, :sample_stats)
         
     | 
| 
      
 6 
     | 
    
         
            +
              AssemblySampleStats = Struct.new("AssemblySampleStats", :sample_name, :number_of_clusters, :number_of_contigs, :mean_contig_size, :n50, :number_of_bases, :fastq_stats)
         
     | 
| 
       5 
7 
     | 
    
         
             
              def parse_resequencing_run_stats(xml_file, original_sample_names = nil)
         
     | 
| 
       6 
8 
     | 
    
         
             
                xml = Nokogiri::XML(File.read(xml_file))
         
     | 
| 
       7 
9 
     | 
    
         
             
                resequencing_run_stats = ResequencingRunStats.new
         
     | 
| 
         @@ -16,7 +18,7 @@ module MiseqRunStats 
     | 
|
| 
       16 
18 
     | 
    
         
             
                  sample_name = summarised_samples_stats.search('SampleName').text
         
     | 
| 
       17 
19 
     | 
    
         
             
                  sample_name = original_sample_names.select{|original_sample_name| sample_name =~ /#{original_sample_name}/}.first unless original_sample_names.nil? # alter sample name to original sample name if supplies as an array
         
     | 
| 
       18 
20 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
                  resequencing_run_stats.sample_stats[sample_name] =  
     | 
| 
      
 21 
     | 
    
         
            +
                  resequencing_run_stats.sample_stats[sample_name] = ResequencingSampleStats.new
         
     | 
| 
       20 
22 
     | 
    
         
             
                  resequencing_run_stats.sample_stats[sample_name].sample_name = sample_name
         
     | 
| 
       21 
23 
     | 
    
         
             
                  resequencing_run_stats.sample_stats[sample_name].number_of_clusters = summarised_samples_stats.search('NumberOfClustersPF').text
         
     | 
| 
       22 
24 
     | 
    
         
             
                  resequencing_run_stats.sample_stats[sample_name].number_of_forward_reads_aligned = summarised_samples_stats.search('ClustersAlignedR1').text
         
     | 
| 
         @@ -26,4 +28,41 @@ module MiseqRunStats 
     | 
|
| 
       26 
28 
     | 
    
         
             
                end
         
     | 
| 
       27 
29 
     | 
    
         
             
                return resequencing_run_stats
         
     | 
| 
       28 
30 
     | 
    
         
             
              end
         
     | 
| 
      
 31 
     | 
    
         
            +
              def parse_assembly_run_stats(xml_file, original_sample_names = nil)
         
     | 
| 
      
 32 
     | 
    
         
            +
                xml = Nokogiri::XML(File.read(xml_file))
         
     | 
| 
      
 33 
     | 
    
         
            +
                assembly_run_stats = AssemblyRunStats.new
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                xml.search('//RunStats').each do |run_stats|
         
     | 
| 
      
 36 
     | 
    
         
            +
                  assembly_run_stats.number_of_bases = run_stats.search('YieldInBasesPF').text.to_f/1000000000
         
     | 
| 
      
 37 
     | 
    
         
            +
                  assembly_run_stats.number_of_clusters = run_stats.search('NumberOfClustersPF').text.to_i
         
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                # get un-named contig data
         
     | 
| 
      
 41 
     | 
    
         
            +
                assembly_stats = Array.new
         
     | 
| 
      
 42 
     | 
    
         
            +
                xml.search('//AssemblyStatistics').each do |assembly_sample_stats|
         
     | 
| 
      
 43 
     | 
    
         
            +
                  number_of_contigs = assembly_sample_stats.search('NumberOfContigs').text.to_i
         
     | 
| 
      
 44 
     | 
    
         
            +
                  mean_contig_size = assembly_sample_stats.search('MeanContigLength').text.to_f.to_i
         
     | 
| 
      
 45 
     | 
    
         
            +
                  n50 = assembly_sample_stats.search('N50').text.to_i
         
     | 
| 
      
 46 
     | 
    
         
            +
                  number_of_bases = assembly_sample_stats.search('BaseCount').text.to_i
         
     | 
| 
      
 47 
     | 
    
         
            +
                  assembly_stats << {:number_of_contigs  => number_of_contigs, :mean_contig_size => mean_contig_size, :n50 => n50, :number_of_bases => number_of_bases}
         
     | 
| 
      
 48 
     | 
    
         
            +
                end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                assembly_run_stats.sample_stats = Hash.new
         
     | 
| 
      
 51 
     | 
    
         
            +
                xml.search('//SampleStatistics').each do |sample_stats|
         
     | 
| 
      
 52 
     | 
    
         
            +
                  sample_name = sample_stats.search('SampleName').text
         
     | 
| 
      
 53 
     | 
    
         
            +
                  sample_name = original_sample_names.select{|original_sample_name| sample_name =~ /#{original_sample_name}/}.first unless original_sample_names.nil? # alter sample name to original sample name if supplies as an array
         
     | 
| 
      
 54 
     | 
    
         
            +
                  next if sample_name.nil?
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name] = AssemblySampleStats.new
         
     | 
| 
      
 57 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name].sample_name = sample_name
         
     | 
| 
      
 58 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name].number_of_clusters = sample_stats.search('NumberOfClustersPF').text
         
     | 
| 
      
 59 
     | 
    
         
            +
                  assembly_sample_stats = assembly_stats.shift
         
     | 
| 
      
 60 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name].number_of_contigs = assembly_sample_stats[:number_of_contigs]
         
     | 
| 
      
 61 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name].mean_contig_size = assembly_sample_stats[:mean_contig_size]
         
     | 
| 
      
 62 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name].n50 = assembly_sample_stats[:n50]
         
     | 
| 
      
 63 
     | 
    
         
            +
                  assembly_run_stats.sample_stats[sample_name].number_of_bases = assembly_sample_stats[:number_of_bases]
         
     | 
| 
      
 64 
     | 
    
         
            +
             
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
                return assembly_run_stats
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
       29 
68 
     | 
    
         
             
            end
         
     | 
    
        data/lib/trim_and_correct.rb
    CHANGED
    
    | 
         @@ -23,29 +23,37 @@ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward 
     | 
|
| 
       23 
23 
     | 
    
         
             
                `#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}`
         
     | 
| 
       24 
24 
     | 
    
         
             
              end
         
     | 
| 
       25 
25 
     | 
    
         
             
              sample_map.each do |sample_file_prefix, sample_name|
         
     | 
| 
       26 
     | 
    
         
            -
                 
     | 
| 
      
 26 
     | 
    
         
            +
                if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
         
     | 
| 
      
 27 
     | 
    
         
            +
                  `perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}`
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
       27 
29 
     | 
    
         
             
              end
         
     | 
| 
       28 
30 
     | 
    
         | 
| 
       29 
31 
     | 
    
         
             
              # cleanup and rename files
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
       30 
33 
     | 
    
         
             
              sample_map.each do |sample_file_prefix, sample_name|
         
     | 
| 
       31 
     | 
    
         
            -
                 
     | 
| 
       32 
     | 
    
         
            -
                 
     | 
| 
       33 
     | 
    
         
            -
                 
     | 
| 
       34 
     | 
    
         
            -
                 
     | 
| 
       35 
     | 
    
         
            -
                 
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
      
 34 
     | 
    
         
            +
                File.delete("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
         
     | 
| 
      
 35 
     | 
    
         
            +
                File.delete("#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
         
     | 
| 
      
 36 
     | 
    
         
            +
                File.delete("orphaned_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
         
     | 
| 
      
 37 
     | 
    
         
            +
                File.delete("orphaned_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
         
     | 
| 
      
 38 
     | 
    
         
            +
                if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
         
     | 
| 
      
 39 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
         
     | 
| 
      
 40 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
         
     | 
| 
      
 41 
     | 
    
         
            +
                  File.delete("error_model.paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.txt")
         
     | 
| 
      
 42 
     | 
    
         
            +
                  File.delete("error_model.paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.txt")
         
     | 
| 
      
 43 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.stats.txt")
         
     | 
| 
      
 44 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}")
         
     | 
| 
      
 45 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.stats.txt")
         
     | 
| 
      
 46 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}")
         
     | 
| 
      
 47 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
         
     | 
| 
      
 48 
     | 
    
         
            +
                  File.delete("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
         
     | 
| 
      
 49 
     | 
    
         
            +
                  File.delete("orphaned_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
         
     | 
| 
      
 50 
     | 
    
         
            +
                  File.delete("orphaned_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
         
     | 
| 
      
 51 
     | 
    
         
            +
                  File.rename("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
         
     | 
| 
      
 52 
     | 
    
         
            +
                  File.rename("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
         
     | 
| 
      
 53 
     | 
    
         
            +
                else
         
     | 
| 
      
 54 
     | 
    
         
            +
                  File.rename("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}")
         
     | 
| 
      
 55 
     | 
    
         
            +
                  File.rename("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
       49 
57 
     | 
    
         
             
              end
         
     | 
| 
       50 
58 
     | 
    
         
             
            end
         
     | 
| 
       51 
59 
     | 
    
         | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: fastq-factory
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.1. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.5
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,7 +9,7 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2012-08- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2012-08-29 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: trollop
         
     | 
| 
         @@ -165,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       165 
165 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       166 
166 
     | 
    
         
             
                  segments:
         
     | 
| 
       167 
167 
     | 
    
         
             
                  - 0
         
     | 
| 
       168 
     | 
    
         
            -
                  hash:  
     | 
| 
      
 168 
     | 
    
         
            +
                  hash: 2435527591364603339
         
     | 
| 
       169 
169 
     | 
    
         
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
       170 
170 
     | 
    
         
             
              none: false
         
     | 
| 
       171 
171 
     | 
    
         
             
              requirements:
         
     |