fastq-factory 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/fastq-factory +5 -2
- data/fastq-factory.gemspec +2 -2
- data/lib/generate_quality_metrics.rb +7 -6
- data/lib/trim_and_correct.rb +24 -6
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
data/bin/fastq-factory
CHANGED
@@ -15,6 +15,9 @@ options = Trollop::options do
|
|
15
15
|
opt :quality_scale, "The Phred quality scale, 33 or 64", :type => :integer, :required => true
|
16
16
|
opt :fastq_quality_trimmer_dir, "The directory where fastq_quality_trimmer can be found", :short => "-F", :type => :string
|
17
17
|
opt :quake_dir, "The directory where the quake executable can be found", :short => "-Q", :type => :string
|
18
|
+
opt :trim_point_fraction, "The fraction of the read length below which a read will be discarded if it is shorter than teh value after trimming", :short => "-T", :type => :float, :default => 0.6
|
19
|
+
opt :trim_quality_cutoff, "The quality used as a cutoff with which to trim a read from the 3' end", :short => "-E", :type => :integer, :default => 20
|
20
|
+
opt :metrics_quality_cutoff, "The quality value which will be used to determine at which position the read falls below this valeu in a 5 position window", :short => "-W", :type => :integer, :default => 30
|
18
21
|
end
|
19
22
|
|
20
23
|
forward_reads_suffix, forward_reads_file_extension = options[:forward_reads_suffix].match(/(.+)\.(.+?)$/).captures
|
@@ -32,7 +35,7 @@ if options[:trim]
|
|
32
35
|
quake_path = find_executable("quake.py", options[:quake_dir])
|
33
36
|
abort ("Can not find quake.py. You can specifiy the directory where this can be found using the -Q option") unless quake_path
|
34
37
|
write_out_fastq_trim_script
|
35
|
-
trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path)
|
38
|
+
trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path, options[:trim_point_fraction], options[:trim_quality_cutoff])
|
36
39
|
end
|
37
40
|
|
38
41
|
if options[:metrics]
|
@@ -40,5 +43,5 @@ if options[:metrics]
|
|
40
43
|
file_exists?("#{sample_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", options[:directory])
|
41
44
|
file_exists?("#{sample_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", options[:directory])
|
42
45
|
end
|
43
|
-
generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale])
|
46
|
+
generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale], options[:metrics_quality_cutoff])
|
44
47
|
end
|
data/fastq-factory.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "fastq-factory"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Anthony Underwood"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-24"
|
13
13
|
s.description = "This tool can process fastq files, using fastq_quality_trimmer and quake to correct fastq files and then provide a quality asssessment of the data"
|
14
14
|
s.email = "anthony.underwood@hpa.org.uk"
|
15
15
|
s.executables = ["fastq-factory"]
|
@@ -3,8 +3,9 @@ include FastqAssessment
|
|
3
3
|
require 'miseq_run_stats'
|
4
4
|
include MiseqRunStats
|
5
5
|
|
6
|
-
def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale)
|
6
|
+
def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
|
7
7
|
if File.exists?("#{directory}/ResequencingRunStatistics.xml")
|
8
|
+
puts "Assessing quality from Miseq run stats file"
|
8
9
|
resequencing_run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values)
|
9
10
|
else
|
10
11
|
resequencing_run_stats = ResequencingRunStats.new
|
@@ -20,13 +21,13 @@ def generate_quality_metrics(sample_map, directory, forward_reads_suffix, revers
|
|
20
21
|
|
21
22
|
|
22
23
|
sample_map.each do |read_file_prefix, sample_name|
|
23
|
-
puts sample_name
|
24
|
+
puts "Assesing quality for #{sample_name}"
|
24
25
|
resequencing_run_stats.sample_stats[sample_name].fastq_stats = Hash.new
|
25
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale)
|
26
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale)
|
27
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale)
|
26
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff)
|
27
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff)
|
28
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
|
28
29
|
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
|
29
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale)
|
30
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
|
30
31
|
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
|
31
32
|
end
|
32
33
|
# print out data
|
data/lib/trim_and_correct.rb
CHANGED
@@ -1,25 +1,29 @@
|
|
1
|
-
def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path)
|
1
|
+
def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff)
|
2
2
|
Dir.chdir(directory)
|
3
3
|
# trimming
|
4
4
|
sample_map.each do |sample_file_prefix, sample_name|
|
5
5
|
puts "Trimming files for #{sample_name}"
|
6
|
+
#determine read length
|
7
|
+
read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}")
|
8
|
+
trim_point = (trim_point_fraction * read_length).to_i
|
6
9
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
+
`#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
|
11
|
+
`#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
|
12
|
+
`perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}`
|
10
13
|
end
|
11
14
|
|
12
15
|
# quake correction
|
13
16
|
# write file for quake
|
14
17
|
sample_map.each do |sample_file_prefix, sample_name|
|
18
|
+
puts "Error correcting files for #{sample_name}"
|
15
19
|
output_file = File.open("quake_file_list.txt","w")
|
16
20
|
output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}"
|
17
21
|
output_file.close
|
18
22
|
# run quake
|
19
|
-
|
23
|
+
`#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}`
|
20
24
|
end
|
21
25
|
sample_map.each do |sample_file_prefix, sample_name|
|
22
|
-
|
26
|
+
`perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}`
|
23
27
|
end
|
24
28
|
|
25
29
|
# cleanup and rename files
|
@@ -43,4 +47,18 @@ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward
|
|
43
47
|
system("mv paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} #{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
|
44
48
|
system("mv paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension} #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
|
45
49
|
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def calculate_read_length(filename)
|
53
|
+
read_length = nil
|
54
|
+
File.open(filename) do |f|
|
55
|
+
f.each do |line|
|
56
|
+
line.chomp!
|
57
|
+
if line =~ /^[GATCgatc]/
|
58
|
+
read_length = line.size
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
return read_length - 1
|
46
64
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastq-factory
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: trollop
|
@@ -165,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
165
|
version: '0'
|
166
166
|
segments:
|
167
167
|
- 0
|
168
|
-
hash: -
|
168
|
+
hash: -2858636493634267725
|
169
169
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
170
|
none: false
|
171
171
|
requirements:
|