fastq-factory 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
data/bin/fastq-factory CHANGED
@@ -15,6 +15,9 @@ options = Trollop::options do
15
15
  opt :quality_scale, "The Phred quality scale, 33 or 64", :type => :integer, :required => true
16
16
  opt :fastq_quality_trimmer_dir, "The directory where fastq_quality_trimmer can be found", :short => "-F", :type => :string
17
17
  opt :quake_dir, "The directory where the quake executable can be found", :short => "-Q", :type => :string
18
+ opt :trim_point_fraction, "The fraction of the read length below which a read will be discarded if it is shorter than teh value after trimming", :short => "-T", :type => :float, :default => 0.6
19
+ opt :trim_quality_cutoff, "The quality used as a cutoff with which to trim a read from the 3' end", :short => "-E", :type => :integer, :default => 20
20
+ opt :metrics_quality_cutoff, "The quality value which will be used to determine at which position the read falls below this valeu in a 5 position window", :short => "-W", :type => :integer, :default => 30
18
21
  end
19
22
 
20
23
  forward_reads_suffix, forward_reads_file_extension = options[:forward_reads_suffix].match(/(.+)\.(.+?)$/).captures
@@ -32,7 +35,7 @@ if options[:trim]
32
35
  quake_path = find_executable("quake.py", options[:quake_dir])
33
36
  abort ("Can not find quake.py. You can specifiy the directory where this can be found using the -Q option") unless quake_path
34
37
  write_out_fastq_trim_script
35
- trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path)
38
+ trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path, options[:trim_point_fraction], options[:trim_quality_cutoff])
36
39
  end
37
40
 
38
41
  if options[:metrics]
@@ -40,5 +43,5 @@ if options[:metrics]
40
43
  file_exists?("#{sample_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", options[:directory])
41
44
  file_exists?("#{sample_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", options[:directory])
42
45
  end
43
- generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale])
46
+ generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale], options[:metrics_quality_cutoff])
44
47
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "fastq-factory"
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Anthony Underwood"]
12
- s.date = "2012-08-23"
12
+ s.date = "2012-08-24"
13
13
  s.description = "This tool can process fastq files, using fastq_quality_trimmer and quake to correct fastq files and then provide a quality asssessment of the data"
14
14
  s.email = "anthony.underwood@hpa.org.uk"
15
15
  s.executables = ["fastq-factory"]
@@ -3,8 +3,9 @@ include FastqAssessment
3
3
  require 'miseq_run_stats'
4
4
  include MiseqRunStats
5
5
 
6
- def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale)
6
+ def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
7
7
  if File.exists?("#{directory}/ResequencingRunStatistics.xml")
8
+ puts "Assessing quality from Miseq run stats file"
8
9
  resequencing_run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values)
9
10
  else
10
11
  resequencing_run_stats = ResequencingRunStats.new
@@ -20,13 +21,13 @@ def generate_quality_metrics(sample_map, directory, forward_reads_suffix, revers
20
21
 
21
22
 
22
23
  sample_map.each do |read_file_prefix, sample_name|
23
- puts sample_name
24
+ puts "Assesing quality for #{sample_name}"
24
25
  resequencing_run_stats.sample_stats[sample_name].fastq_stats = Hash.new
25
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale)
26
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale)
27
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale)
26
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff)
27
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff)
28
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
28
29
  resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
29
- resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale)
30
+ resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
30
31
  resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
31
32
  end
32
33
  # print out data
@@ -1,25 +1,29 @@
1
- def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path)
1
+ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff)
2
2
  Dir.chdir(directory)
3
3
  # trimming
4
4
  sample_map.each do |sample_file_prefix, sample_name|
5
5
  puts "Trimming files for #{sample_name}"
6
+ #determine read length
7
+ read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}")
8
+ trim_point = (trim_point_fraction * read_length).to_i
6
9
 
7
- system("#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t 20 -l 90 -Q #{quality_scale} -v")
8
- system("#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t 20 -l 90 -Q #{quality_scale} -v")
9
- system("perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}")
10
+ `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
11
+ `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
12
+ `perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}`
10
13
  end
11
14
 
12
15
  # quake correction
13
16
  # write file for quake
14
17
  sample_map.each do |sample_file_prefix, sample_name|
18
+ puts "Error correcting files for #{sample_name}"
15
19
  output_file = File.open("quake_file_list.txt","w")
16
20
  output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}"
17
21
  output_file.close
18
22
  # run quake
19
- system("#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}")
23
+ `#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}`
20
24
  end
21
25
  sample_map.each do |sample_file_prefix, sample_name|
22
- system("perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
26
+ `perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}`
23
27
  end
24
28
 
25
29
  # cleanup and rename files
@@ -43,4 +47,18 @@ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward
43
47
  system("mv paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} #{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
44
48
  system("mv paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension} #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
45
49
  end
50
+ end
51
+
52
+ def calculate_read_length(filename)
53
+ read_length = nil
54
+ File.open(filename) do |f|
55
+ f.each do |line|
56
+ line.chomp!
57
+ if line =~ /^[GATCgatc]/
58
+ read_length = line.size
59
+ break
60
+ end
61
+ end
62
+ end
63
+ return read_length - 1
46
64
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastq-factory
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-23 00:00:00.000000000 Z
12
+ date: 2012-08-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: trollop
@@ -165,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
165
165
  version: '0'
166
166
  segments:
167
167
  - 0
168
- hash: -3475117921358810705
168
+ hash: -2858636493634267725
169
169
  required_rubygems_version: !ruby/object:Gem::Requirement
170
170
  none: false
171
171
  requirements: