fastq-factory 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/fastq-factory +5 -2
- data/fastq-factory.gemspec +2 -2
- data/lib/generate_quality_metrics.rb +7 -6
- data/lib/trim_and_correct.rb +24 -6
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
data/bin/fastq-factory
CHANGED
@@ -15,6 +15,9 @@ options = Trollop::options do
|
|
15
15
|
opt :quality_scale, "The Phred quality scale, 33 or 64", :type => :integer, :required => true
|
16
16
|
opt :fastq_quality_trimmer_dir, "The directory where fastq_quality_trimmer can be found", :short => "-F", :type => :string
|
17
17
|
opt :quake_dir, "The directory where the quake executable can be found", :short => "-Q", :type => :string
|
18
|
+
opt :trim_point_fraction, "The fraction of the read length below which a read will be discarded if it is shorter than teh value after trimming", :short => "-T", :type => :float, :default => 0.6
|
19
|
+
opt :trim_quality_cutoff, "The quality used as a cutoff with which to trim a read from the 3' end", :short => "-E", :type => :integer, :default => 20
|
20
|
+
opt :metrics_quality_cutoff, "The quality value which will be used to determine at which position the read falls below this valeu in a 5 position window", :short => "-W", :type => :integer, :default => 30
|
18
21
|
end
|
19
22
|
|
20
23
|
forward_reads_suffix, forward_reads_file_extension = options[:forward_reads_suffix].match(/(.+)\.(.+?)$/).captures
|
@@ -32,7 +35,7 @@ if options[:trim]
|
|
32
35
|
quake_path = find_executable("quake.py", options[:quake_dir])
|
33
36
|
abort ("Can not find quake.py. You can specifiy the directory where this can be found using the -Q option") unless quake_path
|
34
37
|
write_out_fastq_trim_script
|
35
|
-
trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path)
|
38
|
+
trim_and_correct_fastqs(sample_map, options[:directory], forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, options[:quality_scale], fastq_quality_trimmer_path, quake_path, options[:trim_point_fraction], options[:trim_quality_cutoff])
|
36
39
|
end
|
37
40
|
|
38
41
|
if options[:metrics]
|
@@ -40,5 +43,5 @@ if options[:metrics]
|
|
40
43
|
file_exists?("#{sample_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", options[:directory])
|
41
44
|
file_exists?("#{sample_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", options[:directory])
|
42
45
|
end
|
43
|
-
generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale])
|
46
|
+
generate_quality_metrics(sample_map, options[:directory], options[:forward_reads_suffix], options[:reverse_reads_suffix], options[:quality_scale], options[:metrics_quality_cutoff])
|
44
47
|
end
|
data/fastq-factory.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "fastq-factory"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Anthony Underwood"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-24"
|
13
13
|
s.description = "This tool can process fastq files, using fastq_quality_trimmer and quake to correct fastq files and then provide a quality asssessment of the data"
|
14
14
|
s.email = "anthony.underwood@hpa.org.uk"
|
15
15
|
s.executables = ["fastq-factory"]
|
@@ -3,8 +3,9 @@ include FastqAssessment
|
|
3
3
|
require 'miseq_run_stats'
|
4
4
|
include MiseqRunStats
|
5
5
|
|
6
|
-
def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale)
|
6
|
+
def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff)
|
7
7
|
if File.exists?("#{directory}/ResequencingRunStatistics.xml")
|
8
|
+
puts "Assessing quality from Miseq run stats file"
|
8
9
|
resequencing_run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values)
|
9
10
|
else
|
10
11
|
resequencing_run_stats = ResequencingRunStats.new
|
@@ -20,13 +21,13 @@ def generate_quality_metrics(sample_map, directory, forward_reads_suffix, revers
|
|
20
21
|
|
21
22
|
|
22
23
|
sample_map.each do |read_file_prefix, sample_name|
|
23
|
-
puts sample_name
|
24
|
+
puts "Assesing quality for #{sample_name}"
|
24
25
|
resequencing_run_stats.sample_stats[sample_name].fastq_stats = Hash.new
|
25
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale)
|
26
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale)
|
27
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale)
|
26
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff)
|
27
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff)
|
28
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
|
28
29
|
resequencing_run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}")
|
29
|
-
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale)
|
30
|
+
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff)
|
30
31
|
resequencing_run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}")
|
31
32
|
end
|
32
33
|
# print out data
|
data/lib/trim_and_correct.rb
CHANGED
@@ -1,25 +1,29 @@
|
|
1
|
-
def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path)
|
1
|
+
def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff)
|
2
2
|
Dir.chdir(directory)
|
3
3
|
# trimming
|
4
4
|
sample_map.each do |sample_file_prefix, sample_name|
|
5
5
|
puts "Trimming files for #{sample_name}"
|
6
|
+
#determine read length
|
7
|
+
read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}")
|
8
|
+
trim_point = (trim_point_fraction * read_length).to_i
|
6
9
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
+
`#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
|
11
|
+
`#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v`
|
12
|
+
`perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}`
|
10
13
|
end
|
11
14
|
|
12
15
|
# quake correction
|
13
16
|
# write file for quake
|
14
17
|
sample_map.each do |sample_file_prefix, sample_name|
|
18
|
+
puts "Error correcting files for #{sample_name}"
|
15
19
|
output_file = File.open("quake_file_list.txt","w")
|
16
20
|
output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}"
|
17
21
|
output_file.close
|
18
22
|
# run quake
|
19
|
-
|
23
|
+
`#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}`
|
20
24
|
end
|
21
25
|
sample_map.each do |sample_file_prefix, sample_name|
|
22
|
-
|
26
|
+
`perl /Volumes/NGS2_DataRAID/projects/MRSA/scripts/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}`
|
23
27
|
end
|
24
28
|
|
25
29
|
# cleanup and rename files
|
@@ -43,4 +47,18 @@ def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward
|
|
43
47
|
system("mv paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} #{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")
|
44
48
|
system("mv paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension} #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}")
|
45
49
|
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def calculate_read_length(filename)
|
53
|
+
read_length = nil
|
54
|
+
File.open(filename) do |f|
|
55
|
+
f.each do |line|
|
56
|
+
line.chomp!
|
57
|
+
if line =~ /^[GATCgatc]/
|
58
|
+
read_length = line.size
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
return read_length - 1
|
46
64
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastq-factory
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: trollop
|
@@ -165,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
165
|
version: '0'
|
166
166
|
segments:
|
167
167
|
- 0
|
168
|
-
hash: -
|
168
|
+
hash: -2858636493634267725
|
169
169
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
170
|
none: false
|
171
171
|
requirements:
|