nugen_barcode_splitter 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/base_adder +22 -0
- data/bin/nugen_barcode_splitter +156 -0
- data/lib/nugen_barcode_splitter/fastq.rb +26 -0
- data/lib/nugen_barcode_splitter/nugen_template.rb +42 -0
- data/lib/nugen_barcode_splitter/sample_sheet.rb +41 -0
- data/lib/nugen_barcode_splitter/statistics.rb +37 -0
- data/lib/nugen_barcode_splitter.rb +10 -0
- data/test/fixtures/Lane4.log +56 -0
- data/test/fixtures/barcode_4.txt +5 -0
- data/test/fixtures/barcode_5.txt +5 -0
- data/test/fixtures/barcode_6.txt +5 -0
- data/test/fixtures/barcode_7.txt +5 -0
- data/test/fixtures/barcode_8.txt +5 -0
- data/test/fixtures/invalid.fq +20 -0
- data/test/fixtures/sample_sheet.csv +21 -0
- data/test/fixtures/test.fq +20 -0
- data/test/test_mutations_caller_pipeline.rb +55 -0
- metadata +75 -0
data/bin/base_adder
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "nugen_barcode_splitter"
|
3
|
+
|
4
|
+
usage =<<EOF
|
5
|
+
#{$0} file.fq number_of_bases out.fq
|
6
|
+
|
7
|
+
-_-_-_-_- #{$0} -_-_-_-_-
|
8
|
+
|
9
|
+
Adds N^P bases to the beginning of the
|
10
|
+
fastq file. The Quality is set on the
|
11
|
+
lowest value.
|
12
|
+
EOF
|
13
|
+
|
14
|
+
file_dir = ARGV[0]
|
15
|
+
number_of_bases = ARGV[1].to_i
|
16
|
+
out_dir = ARGV[2]
|
17
|
+
|
18
|
+
fastq = Fastq.new("file_dir")
|
19
|
+
fastq.add(number_of_bases, out_dir)
|
20
|
+
fastq.close
|
21
|
+
|
22
|
+
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "yaml"
|
4
|
+
require "optparse"
|
5
|
+
|
6
|
+
usage =<<EOF
|
7
|
+
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
8
|
+
_____________________________________________________________________________
|
9
|
+
|
10
|
+
Example sample_sheet_nugen.csv :
|
11
|
+
|
12
|
+
FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject
|
13
|
+
C0ED3ACXX,4,S1,hg19,ACCC,RNA Seq,N,,,33333
|
14
|
+
C0ED3ACXX,4,S2,hg19,GAGT,RNA Seq,N,,,44444
|
15
|
+
C0ED3ACXX,4,S3,hg19,CGTA,RNA Seq,N,,,33333
|
16
|
+
C0ED3ACXX,4,S4,hg19,TTAG,RNA Seq,N,,,44444
|
17
|
+
C0ED3ACXX,5,S5,hg19,AGGG,RNA Seq,N,,,33333
|
18
|
+
C0ED3ACXX,5,S6,hg19,GTCA,RNA Seq,N,,,44444
|
19
|
+
C0ED3ACXX,6,S7,hg19,CCAT,RNA Seq,N,,,33333
|
20
|
+
|
21
|
+
Note: The sample names must be alphanumerical!
|
22
|
+
_____________________________________________________________________________
|
23
|
+
|
24
|
+
EOF
|
25
|
+
|
26
|
+
options = {
|
27
|
+
:project_dir => nil,
|
28
|
+
:out_dir => nil,
|
29
|
+
:sample_sheet => nil,
|
30
|
+
:eol_only? => "",
|
31
|
+
:bel_only? => "",
|
32
|
+
:mismatches => "",
|
33
|
+
:keep_barcode => "",
|
34
|
+
:fastq_multx => "fastq_multx",
|
35
|
+
:debug => false
|
36
|
+
}
|
37
|
+
|
38
|
+
optparse = OptionParser.new do |opts|
|
39
|
+
opts.banner = usage
|
40
|
+
|
41
|
+
opts.on("-p", "--project_dir", :REQUIRED, String,
|
42
|
+
"Illumina project directory (../Unaligned/ProjectXXX/)") do |i|
|
43
|
+
options[:project_dir] = i if i
|
44
|
+
end
|
45
|
+
|
46
|
+
opts.on("-o", "--out_dir", :REQUIRED, String,
|
47
|
+
"The desired output directory") do |i|
|
48
|
+
options[:out_dir] = i if i
|
49
|
+
end
|
50
|
+
|
51
|
+
opts.on("-s","--sample_sheet", :REQUIRED, String,
|
52
|
+
"Please provide your sample_sheet") do |i|
|
53
|
+
options[:sample_sheet] = i if i
|
54
|
+
end
|
55
|
+
|
56
|
+
opts.on("-e","--end_of_line",
|
57
|
+
"Limit the search for the barcode to the end of the line DEFAULT:false") do |i|
|
58
|
+
options[:eol_only?] = "-e"
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on("-b","--begin_of_line",
|
62
|
+
"Limit the search for the barcode to the start of the line DEFAULT:false") do |i|
|
63
|
+
options[:bol_only?] = "-b"
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-k","--keep_barcode",
|
67
|
+
"Do not trim of the barcode DEFAULT:false") do |i|
|
68
|
+
options[:keep_barcode] = "-x"
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on("-m","--mismatches NUM", Integer, "Number of mismatches (Default:1)") do |i|
|
72
|
+
options[:mismatches] = "-m #{i}" if i
|
73
|
+
end
|
74
|
+
|
75
|
+
opts.on("-x", "--fastq_multx DIR", String) do |i|
|
76
|
+
options[:fastq_multx] = i if i
|
77
|
+
end
|
78
|
+
|
79
|
+
opts.on("-d", "--debug", "Debug mode!") do |i|
|
80
|
+
options[:debug] = true
|
81
|
+
end
|
82
|
+
|
83
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
84
|
+
puts opts
|
85
|
+
exit
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
optparse.parse!
|
91
|
+
mandatory = [:project_dir, :out_dir, :sample_sheet]
|
92
|
+
missing = mandatory.select{ |param| options[param].nil? }
|
93
|
+
if !missing.empty?
|
94
|
+
puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
|
95
|
+
puts optparse
|
96
|
+
exit
|
97
|
+
end
|
98
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
99
|
+
puts $!.to_s
|
100
|
+
puts optparse
|
101
|
+
exit
|
102
|
+
end
|
103
|
+
|
104
|
+
STDERR.puts "CURRENT OPTIONS:"
|
105
|
+
STDERR.puts options.to_yaml
|
106
|
+
|
107
|
+
# Read out sample_sheet
|
108
|
+
sample_sheet = SampleSheet.new(options[:sample_sheet])
|
109
|
+
sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
|
110
|
+
|
111
|
+
# Prepare template
|
112
|
+
multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
|
113
|
+
nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
|
114
|
+
|
115
|
+
Dir.glob(options[:project_dir]).each do |p|
|
116
|
+
next unless File.directory? p
|
117
|
+
next unless p =~ /Sample_Lane/
|
118
|
+
outdir = ""
|
119
|
+
|
120
|
+
Dir.glob(p).each do |fwd|
|
121
|
+
next if File.directory fwd
|
122
|
+
next unless fwd =~ /Lane[1-8]_NoIndex_L[0-9]{3}_R1_[0-9]{3}.fastq.gz/
|
123
|
+
rev = fwd.gsub(/_R1_/, "_R2_")
|
124
|
+
tmp = fwd.split("/")
|
125
|
+
tmp = tmp[-1].split("_")
|
126
|
+
lane = tmp[0].qsub(/\D/,"")
|
127
|
+
number = tmp[4].qsub(/\D/,"")
|
128
|
+
barcodes = "#{options[:out_dir]}/barcode_#{lane}.txt"
|
129
|
+
outdir = options[:outdir] + "/Lane#{lane}"
|
130
|
+
begin
|
131
|
+
FileUtils.mkdir_p outdir unless File.directory? outdir
|
132
|
+
rescue Exception => e
|
133
|
+
STDERR.puts e.message
|
134
|
+
end
|
135
|
+
cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, rev)
|
136
|
+
if options[:debug]
|
137
|
+
puts cmd if options[:debug]
|
138
|
+
else
|
139
|
+
status = system(cmd)
|
140
|
+
raise "Calling the template did not succeed!" if !status
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Add the trimmed bases
|
145
|
+
Dir.glob(outdir).each do |fwd|
|
146
|
+
next unless fwd =~ /R1_[0-9]{3}./
|
147
|
+
cmd = `base_adder #{fwd} 4 #{fwd}_added`
|
148
|
+
if options[:debug]
|
149
|
+
puts cmd if options[:debug]
|
150
|
+
else
|
151
|
+
status = system(cmd)
|
152
|
+
raise "Calling the template did not succeed!" if !status
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Fastq
|
2
|
+
def initialize(filename)
|
3
|
+
@filehandle = File.open(filename, "r")
|
4
|
+
line = @filehandle.readline()
|
5
|
+
raise RuntimeError, "Invalid fastq file!" if !line.include?("@")
|
6
|
+
@filehandle.pos = 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def add(num, outdir)
|
10
|
+
bases = "N" * num
|
11
|
+
qualities = "@" * num
|
12
|
+
outfile = File.open(outdir, 'w')
|
13
|
+
|
14
|
+
while !@filehandle.eof?
|
15
|
+
outfile.write(@filehandle.readline)
|
16
|
+
outfile.write("#{bases}" + @filehandle.readline)
|
17
|
+
outfile.write(@filehandle.readline)
|
18
|
+
outfile.write("#{qualities}" + @filehandle.readline)
|
19
|
+
end
|
20
|
+
outfile.close()
|
21
|
+
end
|
22
|
+
|
23
|
+
def close()
|
24
|
+
@filehandle.close()
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "erubis"
|
2
|
+
|
3
|
+
class NugenTemplate
|
4
|
+
|
5
|
+
def initialize(fastq_multx, options)
|
6
|
+
@template =<<EOF
|
7
|
+
|
8
|
+
#!/bin/bash
|
9
|
+
#\$ -pe DJ 4
|
10
|
+
#\$ -l h_vmem=6G
|
11
|
+
#\$ -j y
|
12
|
+
#\$ -N fq.Lane<%= @lane %>.<%= @number %>
|
13
|
+
#\$ -o <%= @lane_dir %>/nugen_demultiplexing.log
|
14
|
+
|
15
|
+
#{fastq_multx} #{options} -B <%= @barcodes %> \\
|
16
|
+
<(gunzip -c <%= @fwd %>) <(gunzip -c <%= @rev %>) \\
|
17
|
+
-o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq
|
18
|
+
EOF
|
19
|
+
end
|
20
|
+
|
21
|
+
def fill(lane, number, lane_dir, barcodes, fwd, rev)
|
22
|
+
|
23
|
+
context = {
|
24
|
+
:lane => lane,
|
25
|
+
:number => number,
|
26
|
+
:lane_dir => lane_dir,
|
27
|
+
:barcodes => barcodes,
|
28
|
+
:fwd => fwd,
|
29
|
+
:rev => rev,
|
30
|
+
:r1 => "R1_#{number}",
|
31
|
+
:r2 => "R2_#{number}"
|
32
|
+
}
|
33
|
+
|
34
|
+
eruby = Erubis::Eruby.new(@template)
|
35
|
+
eruby.evaluate(context)
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_s
|
39
|
+
template = "#{@template.chomp()}"
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
# Samplesheets are suppose to look like this
|
4
|
+
=begin
|
5
|
+
FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
|
6
|
+
C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
7
|
+
C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
8
|
+
=end
|
9
|
+
|
10
|
+
class SampleSheet
|
11
|
+
|
12
|
+
def initialize(samplesheet)
|
13
|
+
@lanes = []
|
14
|
+
@sample_id = []
|
15
|
+
@barcodes = []
|
16
|
+
|
17
|
+
CSV.foreach(samplesheet, {:headers => :first_row}) do |row|
|
18
|
+
@lanes << row["Lane"]
|
19
|
+
@sample_id << row["SampleID"].gsub(/\W/,"X")
|
20
|
+
@barcodes << row["Index"]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :lanes, :sample_id, :barcodes
|
25
|
+
|
26
|
+
def create_barcode_txt(prefix)
|
27
|
+
current_lane = "dummy"
|
28
|
+
handler = File.new(prefix,'w')
|
29
|
+
@lanes.each_with_index do |lane, i|
|
30
|
+
if current_lane != lane
|
31
|
+
outfile = "#{prefix}_#{lane}.txt"
|
32
|
+
current_lane = lane
|
33
|
+
handler = File.new(outfile,'w')
|
34
|
+
handler.write("# SampleName Barcode \n")
|
35
|
+
end
|
36
|
+
handler.write("#{@sample_id[i]} #{@barcodes[i]} \n")
|
37
|
+
end
|
38
|
+
File.delete(prefix)
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Statistics
|
2
|
+
|
3
|
+
def initialize(lane_log, barcodes)
|
4
|
+
@num_reads = Array.new(barcodes.length(),0)
|
5
|
+
@num_unmatched = 0
|
6
|
+
@total = 0
|
7
|
+
@barcodes = barcodes
|
8
|
+
File.open(lane_log).each do |line|
|
9
|
+
next if line.include?("Id")
|
10
|
+
next if line.empty?
|
11
|
+
line = line.split("\t")
|
12
|
+
case line[0]
|
13
|
+
when "unmatched"
|
14
|
+
@num_unmatched += line[1].to_i
|
15
|
+
when "total"
|
16
|
+
@total += line[1].to_i
|
17
|
+
else
|
18
|
+
if i = @barcodes.index(line[0])
|
19
|
+
@num_reads[i]+= line[1].to_i
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_accessor :num_reads, :num_unmatched, :total, :barcodes
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
str = "Statistics: \nBarcode\t# of reads\n"
|
29
|
+
@barcodes.each_with_index do |code, i|
|
30
|
+
str += "#{code}:\t#{@num_reads[i]} \n"
|
31
|
+
end
|
32
|
+
percent = (100 / @total.to_f) * @num_unmatched.to_f
|
33
|
+
percent = (percent.to_f * 100).round / 100.to_f
|
34
|
+
str += "Unmatched:\t#{@num_unmatched}\t(#{percent.to_f}%)\n"
|
35
|
+
str += "Total:\t#{@total}"
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
Id Count File(s)
|
2
|
+
R1 1063349 nugen/R1_001.R1.fq nugen/R2_001.R1.fq
|
3
|
+
R2 1002895 nugen/R1_001.R2.fq nugen/R2_001.R2.fq
|
4
|
+
R12 809924 nugen/R1_001.R12.fq nugen/R2_001.R12.fq
|
5
|
+
R22 933773 nugen/R1_001.R22.fq nugen/R2_001.R22.fq
|
6
|
+
unmatched 180059 nugen/R1_001.unmatched.fq nugen/R2_001.unmatched.fq
|
7
|
+
total 3990000
|
8
|
+
Id Count File(s)
|
9
|
+
R1 1046860 nugen/R1_002.R1.fq nugen/R2_002.R1.fq
|
10
|
+
R2 996545 nugen/R1_002.R2.fq nugen/R2_002.R2.fq
|
11
|
+
R12 807472 nugen/R1_002.R12.fq nugen/R2_002.R12.fq
|
12
|
+
R22 831454 nugen/R1_002.R22.fq nugen/R2_002.R22.fq
|
13
|
+
unmatched 307669 nugen/R1_002.unmatched.fq nugen/R2_002.unmatched.fq
|
14
|
+
total 3990000
|
15
|
+
Id Count File(s)
|
16
|
+
R1 1086329 nugen/R1_003.R1.fq nugen/R2_003.R1.fq
|
17
|
+
R2 951836 nugen/R1_003.R2.fq nugen/R2_003.R2.fq
|
18
|
+
R12 836954 nugen/R1_003.R12.fq nugen/R2_003.R12.fq
|
19
|
+
R22 895243 nugen/R1_003.R22.fq nugen/R2_003.R22.fq
|
20
|
+
unmatched 219638 nugen/R1_003.unmatched.fq nugen/R2_003.unmatched.fq
|
21
|
+
total 3990000
|
22
|
+
Id Count File(s)
|
23
|
+
R1 1083328 nugen/R1_004.R1.fq nugen/R2_004.R1.fq
|
24
|
+
R2 976776 nugen/R1_004.R2.fq nugen/R2_004.R2.fq
|
25
|
+
R12 827858 nugen/R1_004.R12.fq nugen/R2_004.R12.fq
|
26
|
+
R22 974454 nugen/R1_004.R22.fq nugen/R2_004.R22.fq
|
27
|
+
unmatched 127584 nugen/R1_004.unmatched.fq nugen/R2_004.unmatched.fq
|
28
|
+
total 3990000
|
29
|
+
Id Count File(s)
|
30
|
+
R1 1096656 nugen/R1_005.R1.fq nugen/R2_005.R1.fq
|
31
|
+
R2 867273 nugen/R1_005.R2.fq nugen/R2_005.R2.fq
|
32
|
+
R12 849238 nugen/R1_005.R12.fq nugen/R2_005.R12.fq
|
33
|
+
R22 858169 nugen/R1_005.R22.fq nugen/R2_005.R22.fq
|
34
|
+
unmatched 318664 nugen/R1_005.unmatched.fq nugen/R2_005.unmatched.fq
|
35
|
+
total 3990000
|
36
|
+
Id Count File(s)
|
37
|
+
R1 963137 nugen/R1_006.R1.fq nugen/R2_006.R1.fq
|
38
|
+
R2 738468 nugen/R1_006.R2.fq nugen/R2_006.R2.fq
|
39
|
+
R12 751315 nugen/R1_006.R12.fq nugen/R2_006.R12.fq
|
40
|
+
R22 817436 nugen/R1_006.R22.fq nugen/R2_006.R22.fq
|
41
|
+
unmatched 719644 nugen/R1_006.unmatched.fq nugen/R2_006.unmatched.fq
|
42
|
+
total 3990000
|
43
|
+
Id Count File(s)
|
44
|
+
R1 1037730 nugen/R1_007.R1.fq nugen/R2_007.R1.fq
|
45
|
+
R2 774785 nugen/R1_007.R2.fq nugen/R2_007.R2.fq
|
46
|
+
R12 800517 nugen/R1_007.R12.fq nugen/R2_007.R12.fq
|
47
|
+
R22 793770 nugen/R1_007.R22.fq nugen/R2_007.R22.fq
|
48
|
+
unmatched 583198 nugen/R1_007.unmatched.fq nugen/R2_007.unmatched.fq
|
49
|
+
total 3990000
|
50
|
+
Id Count File(s)
|
51
|
+
R1 1156538 nugen/R1_008.R1.fq nugen/R2_008.R1.fq
|
52
|
+
R2 919772 nugen/R1_008.R2.fq nugen/R2_008.R2.fq
|
53
|
+
R12 875997 nugen/R1_008.R12.fq nugen/R2_008.R12.fq
|
54
|
+
R22 879468 nugen/R1_008.R22.fq nugen/R2_008.R22.fq
|
55
|
+
unmatched 158225 nugen/R1_008.unmatched.fq nugen/R2_008.unmatched.fq
|
56
|
+
total 3990000
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
|
2
|
+
NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
|
3
|
+
+
|
4
|
+
####################################################################################################
|
5
|
+
>HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
|
6
|
+
TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
|
7
|
+
+
|
8
|
+
CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
|
9
|
+
@HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
|
10
|
+
AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
|
11
|
+
+
|
12
|
+
BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
|
13
|
+
@HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
|
14
|
+
TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
|
15
|
+
+
|
16
|
+
CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
|
17
|
+
@HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
|
18
|
+
TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
|
19
|
+
+
|
20
|
+
@C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
|
2
|
+
C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
3
|
+
C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
4
|
+
C0ED3ACXX,4,R-1-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
|
5
|
+
C0ED3ACXX,4,R-2-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
|
6
|
+
C0ED3ACXX,5,R-3,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
|
7
|
+
C0ED3ACXX,5,R-4,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
|
8
|
+
C0ED3ACXX,5,R-3-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
|
9
|
+
C0ED3ACXX,5,R-4-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
|
10
|
+
C0ED3ACXX,6,R-5,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
11
|
+
C0ED3ACXX,6,R-6,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
12
|
+
C0ED3ACXX,6,R-5-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
|
13
|
+
C0ED3ACXX,6,R-6-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
|
14
|
+
C0ED3ACXX,7,R-7,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
|
15
|
+
C0ED3ACXX,7,R-8,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
|
16
|
+
C0ED3ACXX,7,R-7-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
|
17
|
+
C0ED3ACXX,7,R-8-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
|
18
|
+
C0ED3ACXX,8,R-9,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
19
|
+
C0ED3ACXX,8,R-10,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
20
|
+
C0ED3ACXX,8,R-9-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
|
21
|
+
C0ED3ACXX,8,R-10-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
@HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
|
2
|
+
NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
|
3
|
+
+
|
4
|
+
####################################################################################################
|
5
|
+
@HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
|
6
|
+
TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
|
7
|
+
+
|
8
|
+
CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
|
9
|
+
@HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
|
10
|
+
AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
|
11
|
+
+
|
12
|
+
BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
|
13
|
+
@HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
|
14
|
+
TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
|
15
|
+
+
|
16
|
+
CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
|
17
|
+
@HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
|
18
|
+
TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
|
19
|
+
+
|
20
|
+
@C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'nugen_barcode_splitter'
|
3
|
+
|
4
|
+
class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_nugen_template
|
10
|
+
template = NugenTemplate.new("fastq-multx", "")
|
11
|
+
assert template.to_s.include?("fastq-multx")
|
12
|
+
assert template.to_s.include?("<%= @fwd %>")
|
13
|
+
temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", "rev")
|
14
|
+
assert_equal(temp.to_s, "\n #!/bin/bash\n \#$ -pe DJ 4\n \#$ -l h_vmem=6G\n \#$ -j y\n \#$ -N fq.Lane_3.33\n \#$ -o ~/Lane3//nugen_demultiplexing.log\n\n fastq-multx -B bc \\\n fwd rev \\\n -o R1_33.%.fq R2_33.%.fq\n")
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_fastq
|
18
|
+
fastq_file = "test/fixtures/invalid.fq"
|
19
|
+
assert_raise RuntimeError do
|
20
|
+
Fastq.new(fastq_file)
|
21
|
+
end
|
22
|
+
fastq_file = "test/fixtures/test.fq"
|
23
|
+
assert_nothing_raised do
|
24
|
+
fastq = Fastq.new(fastq_file)
|
25
|
+
end
|
26
|
+
fastq = Fastq.new(fastq_file)
|
27
|
+
out_file = "test/fixtures/added.fq"
|
28
|
+
fastq.add(4, out_file )
|
29
|
+
assert(!File.zero?(out_file), "#{out_file} is empty!")
|
30
|
+
test = File.open(out_file,'r')
|
31
|
+
line = test.readlines[1]
|
32
|
+
test.pos = 0
|
33
|
+
assert(line.start_with?("NNNN"), "Reads do not start with NNNN")
|
34
|
+
line = test.readlines[3]
|
35
|
+
assert(line.start_with?("@@@@"), "Reads do not start with @@@@")
|
36
|
+
File.delete(out_file)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_sample_sheet
|
40
|
+
samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
|
41
|
+
assert_equal(samplesheet.barcodes[3], "TTAG")
|
42
|
+
assert_equal(samplesheet.sample_id[4], "RX3")
|
43
|
+
|
44
|
+
samplesheet.create_barcode_txt("test/fixtures/barcode")
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_statistics
|
49
|
+
log_file = "test/fixtures/Lane4.log"
|
50
|
+
stats = Statistics.new(log_file, ["R1", "R2", "R12", "R22"])
|
51
|
+
assert_equal(stats.total, 31920000)
|
52
|
+
assert_equal(stats.num_reads[0], 8533927)
|
53
|
+
assert_equal(stats.num_unmatched, 2614681)
|
54
|
+
end
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nugen_barcode_splitter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Katharina Hayer
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-15 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! "This gem is designed to demultiplex reads\n produced
|
15
|
+
by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
|
16
|
+
email:
|
17
|
+
- katharinaehayer@gmail.com
|
18
|
+
executables:
|
19
|
+
- base_adder
|
20
|
+
- nugen_barcode_splitter
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- bin/base_adder
|
25
|
+
- bin/nugen_barcode_splitter
|
26
|
+
- lib/nugen_barcode_splitter.rb
|
27
|
+
- lib/nugen_barcode_splitter/fastq.rb
|
28
|
+
- lib/nugen_barcode_splitter/nugen_template.rb
|
29
|
+
- lib/nugen_barcode_splitter/sample_sheet.rb
|
30
|
+
- lib/nugen_barcode_splitter/statistics.rb
|
31
|
+
- test/fixtures/Lane4.log
|
32
|
+
- test/fixtures/barcode_4.txt
|
33
|
+
- test/fixtures/barcode_5.txt
|
34
|
+
- test/fixtures/barcode_6.txt
|
35
|
+
- test/fixtures/barcode_7.txt
|
36
|
+
- test/fixtures/barcode_8.txt
|
37
|
+
- test/fixtures/invalid.fq
|
38
|
+
- test/fixtures/sample_sheet.csv
|
39
|
+
- test/fixtures/test.fq
|
40
|
+
- test/test_mutations_caller_pipeline.rb
|
41
|
+
homepage: https://github.com/khayer/nugen_barcode_splitter
|
42
|
+
licenses: []
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project: nugen_barcode_splitter
|
61
|
+
rubygems_version: 1.8.10
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Nugen Barcode Splitter for Illumina
|
65
|
+
test_files:
|
66
|
+
- test/fixtures/Lane4.log
|
67
|
+
- test/fixtures/barcode_4.txt
|
68
|
+
- test/fixtures/barcode_5.txt
|
69
|
+
- test/fixtures/barcode_6.txt
|
70
|
+
- test/fixtures/barcode_7.txt
|
71
|
+
- test/fixtures/barcode_8.txt
|
72
|
+
- test/fixtures/invalid.fq
|
73
|
+
- test/fixtures/sample_sheet.csv
|
74
|
+
- test/fixtures/test.fq
|
75
|
+
- test/test_mutations_caller_pipeline.rb
|