nugen_barcode_splitter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/base_adder +22 -0
- data/bin/nugen_barcode_splitter +156 -0
- data/lib/nugen_barcode_splitter/fastq.rb +26 -0
- data/lib/nugen_barcode_splitter/nugen_template.rb +42 -0
- data/lib/nugen_barcode_splitter/sample_sheet.rb +41 -0
- data/lib/nugen_barcode_splitter/statistics.rb +37 -0
- data/lib/nugen_barcode_splitter.rb +10 -0
- data/test/fixtures/Lane4.log +56 -0
- data/test/fixtures/barcode_4.txt +5 -0
- data/test/fixtures/barcode_5.txt +5 -0
- data/test/fixtures/barcode_6.txt +5 -0
- data/test/fixtures/barcode_7.txt +5 -0
- data/test/fixtures/barcode_8.txt +5 -0
- data/test/fixtures/invalid.fq +20 -0
- data/test/fixtures/sample_sheet.csv +21 -0
- data/test/fixtures/test.fq +20 -0
- data/test/test_mutations_caller_pipeline.rb +55 -0
- metadata +75 -0
data/bin/base_adder
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "nugen_barcode_splitter"
|
3
|
+
|
4
|
+
usage =<<EOF
|
5
|
+
#{$0} file.fq number_of_bases out.fq
|
6
|
+
|
7
|
+
-_-_-_-_- #{$0} -_-_-_-_-
|
8
|
+
|
9
|
+
Adds N^P bases to the beginning of the
|
10
|
+
fastq file. The Quality is set on the
|
11
|
+
lowest value.
|
12
|
+
EOF
|
13
|
+
|
14
|
+
file_dir = ARGV[0]
|
15
|
+
number_of_bases = ARGV[1].to_i
|
16
|
+
out_dir = ARGV[2]
|
17
|
+
|
18
|
+
fastq = Fastq.new("file_dir")
|
19
|
+
fastq.add(number_of_bases, out_dir)
|
20
|
+
fastq.close
|
21
|
+
|
22
|
+
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "yaml"
|
4
|
+
require "optparse"
|
5
|
+
|
6
|
+
usage =<<EOF
|
7
|
+
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
8
|
+
_____________________________________________________________________________
|
9
|
+
|
10
|
+
Example sample_sheet_nugen.csv :
|
11
|
+
|
12
|
+
FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject
|
13
|
+
C0ED3ACXX,4,S1,hg19,ACCC,RNA Seq,N,,,33333
|
14
|
+
C0ED3ACXX,4,S2,hg19,GAGT,RNA Seq,N,,,44444
|
15
|
+
C0ED3ACXX,4,S3,hg19,CGTA,RNA Seq,N,,,33333
|
16
|
+
C0ED3ACXX,4,S4,hg19,TTAG,RNA Seq,N,,,44444
|
17
|
+
C0ED3ACXX,5,S5,hg19,AGGG,RNA Seq,N,,,33333
|
18
|
+
C0ED3ACXX,5,S6,hg19,GTCA,RNA Seq,N,,,44444
|
19
|
+
C0ED3ACXX,6,S7,hg19,CCAT,RNA Seq,N,,,33333
|
20
|
+
|
21
|
+
Note: The sample names must be alphanumerical!
|
22
|
+
_____________________________________________________________________________
|
23
|
+
|
24
|
+
EOF
|
25
|
+
|
26
|
+
options = {
|
27
|
+
:project_dir => nil,
|
28
|
+
:out_dir => nil,
|
29
|
+
:sample_sheet => nil,
|
30
|
+
:eol_only? => "",
|
31
|
+
:bel_only? => "",
|
32
|
+
:mismatches => "",
|
33
|
+
:keep_barcode => "",
|
34
|
+
:fastq_multx => "fastq_multx",
|
35
|
+
:debug => false
|
36
|
+
}
|
37
|
+
|
38
|
+
optparse = OptionParser.new do |opts|
|
39
|
+
opts.banner = usage
|
40
|
+
|
41
|
+
opts.on("-p", "--project_dir", :REQUIRED, String,
|
42
|
+
"Illumina project directory (../Unaligned/ProjectXXX/)") do |i|
|
43
|
+
options[:project_dir] = i if i
|
44
|
+
end
|
45
|
+
|
46
|
+
opts.on("-o", "--out_dir", :REQUIRED, String,
|
47
|
+
"The desired output directory") do |i|
|
48
|
+
options[:out_dir] = i if i
|
49
|
+
end
|
50
|
+
|
51
|
+
opts.on("-s","--sample_sheet", :REQUIRED, String,
|
52
|
+
"Please provide your sample_sheet") do |i|
|
53
|
+
options[:sample_sheet] = i if i
|
54
|
+
end
|
55
|
+
|
56
|
+
opts.on("-e","--end_of_line",
|
57
|
+
"Limit the search for the barcode to the end of the line DEFAULT:false") do |i|
|
58
|
+
options[:eol_only?] = "-e"
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on("-b","--begin_of_line",
|
62
|
+
"Limit the search for the barcode to the start of the line DEFAULT:false") do |i|
|
63
|
+
options[:bol_only?] = "-b"
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-k","--keep_barcode",
|
67
|
+
"Do not trim of the barcode DEFAULT:false") do |i|
|
68
|
+
options[:keep_barcode] = "-x"
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on("-m","--mismatches NUM", Integer, "Number of mismatches (Default:1)") do |i|
|
72
|
+
options[:mismatches] = "-m #{i}" if i
|
73
|
+
end
|
74
|
+
|
75
|
+
opts.on("-x", "--fastq_multx DIR", String) do |i|
|
76
|
+
options[:fastq_multx] = i if i
|
77
|
+
end
|
78
|
+
|
79
|
+
opts.on("-d", "--debug", "Debug mode!") do |i|
|
80
|
+
options[:debug] = true
|
81
|
+
end
|
82
|
+
|
83
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
84
|
+
puts opts
|
85
|
+
exit
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
optparse.parse!
|
91
|
+
mandatory = [:project_dir, :out_dir, :sample_sheet]
|
92
|
+
missing = mandatory.select{ |param| options[param].nil? }
|
93
|
+
if !missing.empty?
|
94
|
+
puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
|
95
|
+
puts optparse
|
96
|
+
exit
|
97
|
+
end
|
98
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
99
|
+
puts $!.to_s
|
100
|
+
puts optparse
|
101
|
+
exit
|
102
|
+
end
|
103
|
+
|
104
|
+
STDERR.puts "CURRENT OPTIONS:"
|
105
|
+
STDERR.puts options.to_yaml
|
106
|
+
|
107
|
+
# Read out sample_sheet
|
108
|
+
sample_sheet = SampleSheet.new(options[:sample_sheet])
|
109
|
+
sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
|
110
|
+
|
111
|
+
# Prepare template
|
112
|
+
multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
|
113
|
+
nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
|
114
|
+
|
115
|
+
Dir.glob(options[:project_dir]).each do |p|
|
116
|
+
next unless File.directory? p
|
117
|
+
next unless p =~ /Sample_Lane/
|
118
|
+
outdir = ""
|
119
|
+
|
120
|
+
Dir.glob(p).each do |fwd|
|
121
|
+
next if File.directory fwd
|
122
|
+
next unless fwd =~ /Lane[1-8]_NoIndex_L[0-9]{3}_R1_[0-9]{3}.fastq.gz/
|
123
|
+
rev = fwd.gsub(/_R1_/, "_R2_")
|
124
|
+
tmp = fwd.split("/")
|
125
|
+
tmp = tmp[-1].split("_")
|
126
|
+
lane = tmp[0].qsub(/\D/,"")
|
127
|
+
number = tmp[4].qsub(/\D/,"")
|
128
|
+
barcodes = "#{options[:out_dir]}/barcode_#{lane}.txt"
|
129
|
+
outdir = options[:outdir] + "/Lane#{lane}"
|
130
|
+
begin
|
131
|
+
FileUtils.mkdir_p outdir unless File.directory? outdir
|
132
|
+
rescue Exception => e
|
133
|
+
STDERR.puts e.message
|
134
|
+
end
|
135
|
+
cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, rev)
|
136
|
+
if options[:debug]
|
137
|
+
puts cmd if options[:debug]
|
138
|
+
else
|
139
|
+
status = system(cmd)
|
140
|
+
raise "Calling the template did not succeed!" if !status
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Add the trimmed bases
|
145
|
+
Dir.glob(outdir).each do |fwd|
|
146
|
+
next unless fwd =~ /R1_[0-9]{3}./
|
147
|
+
cmd = `base_adder #{fwd} 4 #{fwd}_added`
|
148
|
+
if options[:debug]
|
149
|
+
puts cmd if options[:debug]
|
150
|
+
else
|
151
|
+
status = system(cmd)
|
152
|
+
raise "Calling the template did not succeed!" if !status
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Fastq
|
2
|
+
def initialize(filename)
|
3
|
+
@filehandle = File.open(filename, "r")
|
4
|
+
line = @filehandle.readline()
|
5
|
+
raise RuntimeError, "Invalid fastq file!" if !line.include?("@")
|
6
|
+
@filehandle.pos = 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def add(num, outdir)
|
10
|
+
bases = "N" * num
|
11
|
+
qualities = "@" * num
|
12
|
+
outfile = File.open(outdir, 'w')
|
13
|
+
|
14
|
+
while !@filehandle.eof?
|
15
|
+
outfile.write(@filehandle.readline)
|
16
|
+
outfile.write("#{bases}" + @filehandle.readline)
|
17
|
+
outfile.write(@filehandle.readline)
|
18
|
+
outfile.write("#{qualities}" + @filehandle.readline)
|
19
|
+
end
|
20
|
+
outfile.close()
|
21
|
+
end
|
22
|
+
|
23
|
+
def close()
|
24
|
+
@filehandle.close()
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "erubis"
|
2
|
+
|
3
|
+
class NugenTemplate
|
4
|
+
|
5
|
+
def initialize(fastq_multx, options)
|
6
|
+
@template =<<EOF
|
7
|
+
|
8
|
+
#!/bin/bash
|
9
|
+
#\$ -pe DJ 4
|
10
|
+
#\$ -l h_vmem=6G
|
11
|
+
#\$ -j y
|
12
|
+
#\$ -N fq.Lane<%= @lane %>.<%= @number %>
|
13
|
+
#\$ -o <%= @lane_dir %>/nugen_demultiplexing.log
|
14
|
+
|
15
|
+
#{fastq_multx} #{options} -B <%= @barcodes %> \\
|
16
|
+
<(gunzip -c <%= @fwd %>) <(gunzip -c <%= @rev %>) \\
|
17
|
+
-o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq
|
18
|
+
EOF
|
19
|
+
end
|
20
|
+
|
21
|
+
def fill(lane, number, lane_dir, barcodes, fwd, rev)
|
22
|
+
|
23
|
+
context = {
|
24
|
+
:lane => lane,
|
25
|
+
:number => number,
|
26
|
+
:lane_dir => lane_dir,
|
27
|
+
:barcodes => barcodes,
|
28
|
+
:fwd => fwd,
|
29
|
+
:rev => rev,
|
30
|
+
:r1 => "R1_#{number}",
|
31
|
+
:r2 => "R2_#{number}"
|
32
|
+
}
|
33
|
+
|
34
|
+
eruby = Erubis::Eruby.new(@template)
|
35
|
+
eruby.evaluate(context)
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_s
|
39
|
+
template = "#{@template.chomp()}"
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
# Samplesheets are suppose to look like this
|
4
|
+
=begin
|
5
|
+
FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
|
6
|
+
C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
7
|
+
C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
8
|
+
=end
|
9
|
+
|
10
|
+
class SampleSheet
|
11
|
+
|
12
|
+
def initialize(samplesheet)
|
13
|
+
@lanes = []
|
14
|
+
@sample_id = []
|
15
|
+
@barcodes = []
|
16
|
+
|
17
|
+
CSV.foreach(samplesheet, {:headers => :first_row}) do |row|
|
18
|
+
@lanes << row["Lane"]
|
19
|
+
@sample_id << row["SampleID"].gsub(/\W/,"X")
|
20
|
+
@barcodes << row["Index"]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :lanes, :sample_id, :barcodes
|
25
|
+
|
26
|
+
def create_barcode_txt(prefix)
|
27
|
+
current_lane = "dummy"
|
28
|
+
handler = File.new(prefix,'w')
|
29
|
+
@lanes.each_with_index do |lane, i|
|
30
|
+
if current_lane != lane
|
31
|
+
outfile = "#{prefix}_#{lane}.txt"
|
32
|
+
current_lane = lane
|
33
|
+
handler = File.new(outfile,'w')
|
34
|
+
handler.write("# SampleName Barcode \n")
|
35
|
+
end
|
36
|
+
handler.write("#{@sample_id[i]} #{@barcodes[i]} \n")
|
37
|
+
end
|
38
|
+
File.delete(prefix)
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Statistics
|
2
|
+
|
3
|
+
def initialize(lane_log, barcodes)
|
4
|
+
@num_reads = Array.new(barcodes.length(),0)
|
5
|
+
@num_unmatched = 0
|
6
|
+
@total = 0
|
7
|
+
@barcodes = barcodes
|
8
|
+
File.open(lane_log).each do |line|
|
9
|
+
next if line.include?("Id")
|
10
|
+
next if line.empty?
|
11
|
+
line = line.split("\t")
|
12
|
+
case line[0]
|
13
|
+
when "unmatched"
|
14
|
+
@num_unmatched += line[1].to_i
|
15
|
+
when "total"
|
16
|
+
@total += line[1].to_i
|
17
|
+
else
|
18
|
+
if i = @barcodes.index(line[0])
|
19
|
+
@num_reads[i]+= line[1].to_i
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_accessor :num_reads, :num_unmatched, :total, :barcodes
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
str = "Statistics: \nBarcode\t# of reads\n"
|
29
|
+
@barcodes.each_with_index do |code, i|
|
30
|
+
str += "#{code}:\t#{@num_reads[i]} \n"
|
31
|
+
end
|
32
|
+
percent = (100 / @total.to_f) * @num_unmatched.to_f
|
33
|
+
percent = (percent.to_f * 100).round / 100.to_f
|
34
|
+
str += "Unmatched:\t#{@num_unmatched}\t(#{percent.to_f}%)\n"
|
35
|
+
str += "Total:\t#{@total}"
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
Id Count File(s)
|
2
|
+
R1 1063349 nugen/R1_001.R1.fq nugen/R2_001.R1.fq
|
3
|
+
R2 1002895 nugen/R1_001.R2.fq nugen/R2_001.R2.fq
|
4
|
+
R12 809924 nugen/R1_001.R12.fq nugen/R2_001.R12.fq
|
5
|
+
R22 933773 nugen/R1_001.R22.fq nugen/R2_001.R22.fq
|
6
|
+
unmatched 180059 nugen/R1_001.unmatched.fq nugen/R2_001.unmatched.fq
|
7
|
+
total 3990000
|
8
|
+
Id Count File(s)
|
9
|
+
R1 1046860 nugen/R1_002.R1.fq nugen/R2_002.R1.fq
|
10
|
+
R2 996545 nugen/R1_002.R2.fq nugen/R2_002.R2.fq
|
11
|
+
R12 807472 nugen/R1_002.R12.fq nugen/R2_002.R12.fq
|
12
|
+
R22 831454 nugen/R1_002.R22.fq nugen/R2_002.R22.fq
|
13
|
+
unmatched 307669 nugen/R1_002.unmatched.fq nugen/R2_002.unmatched.fq
|
14
|
+
total 3990000
|
15
|
+
Id Count File(s)
|
16
|
+
R1 1086329 nugen/R1_003.R1.fq nugen/R2_003.R1.fq
|
17
|
+
R2 951836 nugen/R1_003.R2.fq nugen/R2_003.R2.fq
|
18
|
+
R12 836954 nugen/R1_003.R12.fq nugen/R2_003.R12.fq
|
19
|
+
R22 895243 nugen/R1_003.R22.fq nugen/R2_003.R22.fq
|
20
|
+
unmatched 219638 nugen/R1_003.unmatched.fq nugen/R2_003.unmatched.fq
|
21
|
+
total 3990000
|
22
|
+
Id Count File(s)
|
23
|
+
R1 1083328 nugen/R1_004.R1.fq nugen/R2_004.R1.fq
|
24
|
+
R2 976776 nugen/R1_004.R2.fq nugen/R2_004.R2.fq
|
25
|
+
R12 827858 nugen/R1_004.R12.fq nugen/R2_004.R12.fq
|
26
|
+
R22 974454 nugen/R1_004.R22.fq nugen/R2_004.R22.fq
|
27
|
+
unmatched 127584 nugen/R1_004.unmatched.fq nugen/R2_004.unmatched.fq
|
28
|
+
total 3990000
|
29
|
+
Id Count File(s)
|
30
|
+
R1 1096656 nugen/R1_005.R1.fq nugen/R2_005.R1.fq
|
31
|
+
R2 867273 nugen/R1_005.R2.fq nugen/R2_005.R2.fq
|
32
|
+
R12 849238 nugen/R1_005.R12.fq nugen/R2_005.R12.fq
|
33
|
+
R22 858169 nugen/R1_005.R22.fq nugen/R2_005.R22.fq
|
34
|
+
unmatched 318664 nugen/R1_005.unmatched.fq nugen/R2_005.unmatched.fq
|
35
|
+
total 3990000
|
36
|
+
Id Count File(s)
|
37
|
+
R1 963137 nugen/R1_006.R1.fq nugen/R2_006.R1.fq
|
38
|
+
R2 738468 nugen/R1_006.R2.fq nugen/R2_006.R2.fq
|
39
|
+
R12 751315 nugen/R1_006.R12.fq nugen/R2_006.R12.fq
|
40
|
+
R22 817436 nugen/R1_006.R22.fq nugen/R2_006.R22.fq
|
41
|
+
unmatched 719644 nugen/R1_006.unmatched.fq nugen/R2_006.unmatched.fq
|
42
|
+
total 3990000
|
43
|
+
Id Count File(s)
|
44
|
+
R1 1037730 nugen/R1_007.R1.fq nugen/R2_007.R1.fq
|
45
|
+
R2 774785 nugen/R1_007.R2.fq nugen/R2_007.R2.fq
|
46
|
+
R12 800517 nugen/R1_007.R12.fq nugen/R2_007.R12.fq
|
47
|
+
R22 793770 nugen/R1_007.R22.fq nugen/R2_007.R22.fq
|
48
|
+
unmatched 583198 nugen/R1_007.unmatched.fq nugen/R2_007.unmatched.fq
|
49
|
+
total 3990000
|
50
|
+
Id Count File(s)
|
51
|
+
R1 1156538 nugen/R1_008.R1.fq nugen/R2_008.R1.fq
|
52
|
+
R2 919772 nugen/R1_008.R2.fq nugen/R2_008.R2.fq
|
53
|
+
R12 875997 nugen/R1_008.R12.fq nugen/R2_008.R12.fq
|
54
|
+
R22 879468 nugen/R1_008.R22.fq nugen/R2_008.R22.fq
|
55
|
+
unmatched 158225 nugen/R1_008.unmatched.fq nugen/R2_008.unmatched.fq
|
56
|
+
total 3990000
|
@@ -0,0 +1,20 @@
|
|
1
|
+
>HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
|
2
|
+
NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
|
3
|
+
+
|
4
|
+
####################################################################################################
|
5
|
+
>HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
|
6
|
+
TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
|
7
|
+
+
|
8
|
+
CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
|
9
|
+
@HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
|
10
|
+
AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
|
11
|
+
+
|
12
|
+
BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
|
13
|
+
@HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
|
14
|
+
TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
|
15
|
+
+
|
16
|
+
CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
|
17
|
+
@HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
|
18
|
+
TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
|
19
|
+
+
|
20
|
+
@C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
|
2
|
+
C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
3
|
+
C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
4
|
+
C0ED3ACXX,4,R-1-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
|
5
|
+
C0ED3ACXX,4,R-2-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
|
6
|
+
C0ED3ACXX,5,R-3,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
|
7
|
+
C0ED3ACXX,5,R-4,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
|
8
|
+
C0ED3ACXX,5,R-3-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
|
9
|
+
C0ED3ACXX,5,R-4-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
|
10
|
+
C0ED3ACXX,6,R-5,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
11
|
+
C0ED3ACXX,6,R-6,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
12
|
+
C0ED3ACXX,6,R-5-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
|
13
|
+
C0ED3ACXX,6,R-6-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
|
14
|
+
C0ED3ACXX,7,R-7,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
|
15
|
+
C0ED3ACXX,7,R-8,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
|
16
|
+
C0ED3ACXX,7,R-7-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
|
17
|
+
C0ED3ACXX,7,R-8-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
|
18
|
+
C0ED3ACXX,8,R-9,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
|
19
|
+
C0ED3ACXX,8,R-10,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
|
20
|
+
C0ED3ACXX,8,R-9-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
|
21
|
+
C0ED3ACXX,8,R-10-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
@HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
|
2
|
+
NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
|
3
|
+
+
|
4
|
+
####################################################################################################
|
5
|
+
@HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
|
6
|
+
TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
|
7
|
+
+
|
8
|
+
CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
|
9
|
+
@HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
|
10
|
+
AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
|
11
|
+
+
|
12
|
+
BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
|
13
|
+
@HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
|
14
|
+
TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
|
15
|
+
+
|
16
|
+
CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
|
17
|
+
@HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
|
18
|
+
TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
|
19
|
+
+
|
20
|
+
@C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'nugen_barcode_splitter'
|
3
|
+
|
4
|
+
class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_nugen_template
|
10
|
+
template = NugenTemplate.new("fastq-multx", "")
|
11
|
+
assert template.to_s.include?("fastq-multx")
|
12
|
+
assert template.to_s.include?("<%= @fwd %>")
|
13
|
+
temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", "rev")
|
14
|
+
assert_equal(temp.to_s, "\n #!/bin/bash\n \#$ -pe DJ 4\n \#$ -l h_vmem=6G\n \#$ -j y\n \#$ -N fq.Lane_3.33\n \#$ -o ~/Lane3//nugen_demultiplexing.log\n\n fastq-multx -B bc \\\n fwd rev \\\n -o R1_33.%.fq R2_33.%.fq\n")
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_fastq
|
18
|
+
fastq_file = "test/fixtures/invalid.fq"
|
19
|
+
assert_raise RuntimeError do
|
20
|
+
Fastq.new(fastq_file)
|
21
|
+
end
|
22
|
+
fastq_file = "test/fixtures/test.fq"
|
23
|
+
assert_nothing_raised do
|
24
|
+
fastq = Fastq.new(fastq_file)
|
25
|
+
end
|
26
|
+
fastq = Fastq.new(fastq_file)
|
27
|
+
out_file = "test/fixtures/added.fq"
|
28
|
+
fastq.add(4, out_file )
|
29
|
+
assert(!File.zero?(out_file), "#{out_file} is empty!")
|
30
|
+
test = File.open(out_file,'r')
|
31
|
+
line = test.readlines[1]
|
32
|
+
test.pos = 0
|
33
|
+
assert(line.start_with?("NNNN"), "Reads do not start with NNNN")
|
34
|
+
line = test.readlines[3]
|
35
|
+
assert(line.start_with?("@@@@"), "Reads do not start with @@@@")
|
36
|
+
File.delete(out_file)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_sample_sheet
|
40
|
+
samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
|
41
|
+
assert_equal(samplesheet.barcodes[3], "TTAG")
|
42
|
+
assert_equal(samplesheet.sample_id[4], "RX3")
|
43
|
+
|
44
|
+
samplesheet.create_barcode_txt("test/fixtures/barcode")
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_statistics
|
49
|
+
log_file = "test/fixtures/Lane4.log"
|
50
|
+
stats = Statistics.new(log_file, ["R1", "R2", "R12", "R22"])
|
51
|
+
assert_equal(stats.total, 31920000)
|
52
|
+
assert_equal(stats.num_reads[0], 8533927)
|
53
|
+
assert_equal(stats.num_unmatched, 2614681)
|
54
|
+
end
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nugen_barcode_splitter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Katharina Hayer
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-15 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! "This gem is designed to demultiplex reads\n produced
|
15
|
+
by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
|
16
|
+
email:
|
17
|
+
- katharinaehayer@gmail.com
|
18
|
+
executables:
|
19
|
+
- base_adder
|
20
|
+
- nugen_barcode_splitter
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- bin/base_adder
|
25
|
+
- bin/nugen_barcode_splitter
|
26
|
+
- lib/nugen_barcode_splitter.rb
|
27
|
+
- lib/nugen_barcode_splitter/fastq.rb
|
28
|
+
- lib/nugen_barcode_splitter/nugen_template.rb
|
29
|
+
- lib/nugen_barcode_splitter/sample_sheet.rb
|
30
|
+
- lib/nugen_barcode_splitter/statistics.rb
|
31
|
+
- test/fixtures/Lane4.log
|
32
|
+
- test/fixtures/barcode_4.txt
|
33
|
+
- test/fixtures/barcode_5.txt
|
34
|
+
- test/fixtures/barcode_6.txt
|
35
|
+
- test/fixtures/barcode_7.txt
|
36
|
+
- test/fixtures/barcode_8.txt
|
37
|
+
- test/fixtures/invalid.fq
|
38
|
+
- test/fixtures/sample_sheet.csv
|
39
|
+
- test/fixtures/test.fq
|
40
|
+
- test/test_mutations_caller_pipeline.rb
|
41
|
+
homepage: https://github.com/khayer/nugen_barcode_splitter
|
42
|
+
licenses: []
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project: nugen_barcode_splitter
|
61
|
+
rubygems_version: 1.8.10
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Nugen Barcode Splitter for Illumina
|
65
|
+
test_files:
|
66
|
+
- test/fixtures/Lane4.log
|
67
|
+
- test/fixtures/barcode_4.txt
|
68
|
+
- test/fixtures/barcode_5.txt
|
69
|
+
- test/fixtures/barcode_6.txt
|
70
|
+
- test/fixtures/barcode_7.txt
|
71
|
+
- test/fixtures/barcode_8.txt
|
72
|
+
- test/fixtures/invalid.fq
|
73
|
+
- test/fixtures/sample_sheet.csv
|
74
|
+
- test/fixtures/test.fq
|
75
|
+
- test/test_mutations_caller_pipeline.rb
|