fastq-factory 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
data/bin/fastq-factory CHANGED
@@ -15,6 +15,9 @@ options = Trollop::options do
15
15
  opt :quality_scale, "The Phred quality scale, 33 or 64", :type => :integer, :required => true
16
16
  opt :fastq_quality_trimmer_dir, "The directory where fastq_quality_trimmer can be found", :short => "-F", :type => :string
17
17
  opt :quake_dir, "The directory where the quake executable can be found", :short => "-Q", :type => :string
18
+ opt :trim_point_fraction, "The fraction of the read length below which a read will be discarded if it is shorter than teh value after trimming", :short => "-T", :type => :float, :default => 0.6
19
+ opt :trim_quality_cutoff, "The quality used as a cutoff with which to trim a read from the 3' end", :short => "-E", :type => :integer, :default => 20
20
+ opt :metrics_quality_cutoff, "The quality value which will be used to determine at which position the read falls below this valeu in a 5 position window", :short => "-W", :type => :integer, :default => 30
18
21
  end
19
22
 
20
23
  forward_reads_suffix, forward_reads_file_extension = options[:forward_reads_suffix].match(/(.+)\.(.+?)$/).captures
@@ -32,7 +35,7 @@ if options[:trim]
32
35
  quake_path = find_executable("quake.py", options[:quake_dir])
33
36
  abort ("Can not find quake.py. You can specifiy the directory where this can be found using the -Q option") unless quake_path
34
37
  write_out_fastq_trim_script
35
- trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path)
38
+ trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path, options[:trim_point_fraction], options[:trim_quality_cutoff])
36
39
  end
37
40
 
38
41
  if options[:metrics]
@@ -40,5 +43,5 @@ if options[:metrics]
40
43
  file_exists?("#{sample_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", options[:directory])
41
44
  file_exists?("#{sample_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", options[:directory])
42
45
  end
43
- generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale])
46
+ generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale], options[:metrics_quality_cutoff])
44
47
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "fastq-factory"
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Anthony Underwood"]
12
- s.date = "2012-08-23"
12
+ s.date = "2012-08-24"
13
13
  s.description = "This tool can process fastq files, using fastq_quality_trimmer and quake to correct fastq files and then provide a quality asssessment of the data"
14
14
  s.email = "anthony.underwood@hpa.org.uk"
15
15
  s.executables = ["fastq-factory"]
@@ -3,8 +3,9 @@ include FastqAssessment
3
3
  require 'miseq_run_stats'
4
4
  include MiseqRunStats
5
5
 
6
- def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale)
6
+ def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
7
7
  if File.exists?("#{directory}/ResequencingRunStatistics.xml")
8
+ puts "Assessing quality from Miseq run stats file"
8
9
  resequencing_run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values)
9
10
  else
10
11
  resequencing_run_stats = ResequencingRunStats.new
@@ -20,13 +21,13 @@ def generate_quality_metrics(sample_map, directory, forward_reads_suffix, revers
20
21
 
21
22
 
22
23
  sample_map.each do |read_file_prefix, sample_name|
23
- puts sample_name
24
+ puts "Assesing quality for #{sample_name}"
24
25
  resequencing_run_stats.sample_stats[sample_name].fastq_stats = Hash.new
25
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale)
26
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale)
27
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale)
26
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff)
27
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff)
28
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
28
29
  resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
29
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale)
30
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
30
31
  resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
31
32
  end
32
33
  # print out data
@@ -1,25 +1,29 @@
1
- def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path)
1
+ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff)
2
2
  Dir.chdir(directory)
3
3
  # trimming
4
4
  sample_map.each do |sample_file_prefix, sample_name|
5
5
  puts "Trimming files for #{sample_name}"
6
+ #determine read length
7
+ read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}")
8
+ trim_point = (trim_point_fraction * read_length).to_i
6
9
 
7
- system("#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t 20 -l 90 -Q #{quality_scale} -v")
8
- system("#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t 20 -l 90 -Q #{quality_scale} -v")
9
- system("perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
10
+ `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
11
+ `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
12
+ `perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}`
10
13
  end
11
14
 
12
15
  # quake correction
13
16
  # write file for quake
14
17
  sample_map.each do |sample_file_prefix, sample_name|
18
+ puts "Error correcting files for #{sample_name}"
15
19
  output_file = File.open("quake_file_list.txt","w")
16
20
  output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}"
17
21
  output_file.close
18
22
  # run quake
19
- system("#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}")
23
+ `#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}`
20
24
  end
21
25
  sample_map.each do |sample_file_prefix, sample_name|
22
- system("perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
26
+ `perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}`
23
27
  end
24
28
 
25
29
  # cleanup and rename files
@@ -43,4 +47,18 @@ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward
43
47
  system("mv paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} #{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
44
48
  system("mv paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension} #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
45
49
  end
50
+ end
51
+
52
+ def calculate_read_length(filename)
53
+ read_length = nil
54
+ File.open(filename) do |f|
55
+ f.each do |line|
56
+ line.chomp!
57
+ if line =~ /^[GATCgatc]/
58
+ read_length = line.size
59
+ break
60
+ end
61
+ end
62
+ end
63
+ return read_length - 1
46
64
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastq-factory
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-23 00:00:00.000000000 Z
12
+ date: 2012-08-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: trollop
@@ -165,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
165
165
  version: '0'
166
166
  segments:
167
167
  - 0
168
- hash: -3475117921358810705
168
+ hash: -2858636493634267725
169
169
  required_rubygems_version: !ruby/object:Gem::Requirement
170
170
  none: false
171
171
  requirements: