nugen_barcode_splitter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/base_adder ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require "nugen_barcode_splitter"
3
+
4
+ usage =<<EOF
5
+ #{$0} file.fq number_of_bases out.fq
6
+
7
+ -_-_-_-_- #{$0} -_-_-_-_-
8
+
9
+ Adds N^P bases to the beginning of the
10
+ fastq file. The Quality is set on the
11
+ lowest value.
12
+ EOF
13
+
14
+ file_dir = ARGV[0]
15
+ number_of_bases = ARGV[1].to_i
16
+ out_dir = ARGV[2]
17
+
18
+ fastq = Fastq.new("file_dir")
19
+ fastq.add(number_of_bases, out_dir)
20
+ fastq.close
21
+
22
+
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env ruby
2
+ require "rubygems"
3
+ require "yaml"
4
+ require "optparse"
5
+
6
+ usage =<<EOF
7
+ #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
8
+ _____________________________________________________________________________
9
+
10
+ Example sample_sheet_nugen.csv :
11
+
12
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject
13
+ C0ED3ACXX,4,S1,hg19,ACCC,RNA Seq,N,,,33333
14
+ C0ED3ACXX,4,S2,hg19,GAGT,RNA Seq,N,,,44444
15
+ C0ED3ACXX,4,S3,hg19,CGTA,RNA Seq,N,,,33333
16
+ C0ED3ACXX,4,S4,hg19,TTAG,RNA Seq,N,,,44444
17
+ C0ED3ACXX,5,S5,hg19,AGGG,RNA Seq,N,,,33333
18
+ C0ED3ACXX,5,S6,hg19,GTCA,RNA Seq,N,,,44444
19
+ C0ED3ACXX,6,S7,hg19,CCAT,RNA Seq,N,,,33333
20
+
21
+ Note: The sample names must be alphanumerical!
22
+ _____________________________________________________________________________
23
+
24
+ EOF
25
+
26
+ options = {
27
+ :project_dir => nil,
28
+ :out_dir => nil,
29
+ :sample_sheet => nil,
30
+ :eol_only? => "",
31
+ :bel_only? => "",
32
+ :mismatches => "",
33
+ :keep_barcode => "",
34
+ :fastq_multx => "fastq_multx",
35
+ :debug => false
36
+ }
37
+
38
+ optparse = OptionParser.new do |opts|
39
+ opts.banner = usage
40
+
41
+ opts.on("-p", "--project_dir", :REQUIRED, String,
42
+ "Illumina project directory (../Unaligned/ProjectXXX/)") do |i|
43
+ options[:project_dir] = i if i
44
+ end
45
+
46
+ opts.on("-o", "--out_dir", :REQUIRED, String,
47
+ "The desired output directory") do |i|
48
+ options[:out_dir] = i if i
49
+ end
50
+
51
+ opts.on("-s","--sample_sheet", :REQUIRED, String,
52
+ "Please provide your sample_sheet") do |i|
53
+ options[:sample_sheet] = i if i
54
+ end
55
+
56
+ opts.on("-e","--end_of_line",
57
+ "Limit the search for the barcode to the end of the line DEFAULT:false") do |i|
58
+ options[:eol_only?] = "-e"
59
+ end
60
+
61
+ opts.on("-b","--begin_of_line",
62
+ "Limit the search for the barcode to the start of the line DEFAULT:false") do |i|
63
+ options[:bol_only?] = "-b"
64
+ end
65
+
66
+ opts.on("-k","--keep_barcode",
67
+ "Do not trim of the barcode DEFAULT:false") do |i|
68
+ options[:keep_barcode] = "-x"
69
+ end
70
+
71
+ opts.on("-m","--mismatches NUM", Integer, "Number of mismatches (Default:1)") do |i|
72
+ options[:mismatches] = "-m #{i}" if i
73
+ end
74
+
75
+ opts.on("-x", "--fastq_multx DIR", String) do |i|
76
+ options[:fastq_multx] = i if i
77
+ end
78
+
79
+ opts.on("-d", "--debug", "Debug mode!") do |i|
80
+ options[:debug] = true
81
+ end
82
+
83
+ opts.on_tail("-h", "--help", "Show this message") do
84
+ puts opts
85
+ exit
86
+ end
87
+ end
88
+
89
+ begin
90
+ optparse.parse!
91
+ mandatory = [:project_dir, :out_dir, :sample_sheet]
92
+ missing = mandatory.select{ |param| options[param].nil? }
93
+ if !missing.empty?
94
+ puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
95
+ puts optparse
96
+ exit
97
+ end
98
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
99
+ puts $!.to_s
100
+ puts optparse
101
+ exit
102
+ end
103
+
104
+ STDERR.puts "CURRENT OPTIONS:"
105
+ STDERR.puts options.to_yaml
106
+
107
+ # Read out sample_sheet
108
+ sample_sheet = SampleSheet.new(options[:sample_sheet])
109
+ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
110
+
111
+ # Prepare template
112
+ multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
113
+ nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
114
+
115
+ Dir.glob(options[:project_dir]).each do |p|
116
+ next unless File.directory? p
117
+ next unless p =~ /Sample_Lane/
118
+ outdir = ""
119
+
120
+ Dir.glob(p).each do |fwd|
121
+ next if File.directory fwd
122
+ next unless fwd =~ /Lane[1-8]_NoIndex_L[0-9]{3}_R1_[0-9]{3}.fastq.gz/
123
+ rev = fwd.gsub(/_R1_/, "_R2_")
124
+ tmp = fwd.split("/")
125
+ tmp = tmp[-1].split("_")
126
+ lane = tmp[0].qsub(/\D/,"")
127
+ number = tmp[4].qsub(/\D/,"")
128
+ barcodes = "#{options[:out_dir]}/barcode_#{lane}.txt"
129
+ outdir = options[:outdir] + "/Lane#{lane}"
130
+ begin
131
+ FileUtils.mkdir_p outdir unless File.directory? outdir
132
+ rescue Exception => e
133
+ STDERR.puts e.message
134
+ end
135
+ cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, rev)
136
+ if options[:debug]
137
+ puts cmd if options[:debug]
138
+ else
139
+ status = system(cmd)
140
+ raise "Calling the template did not succeed!" if !status
141
+ end
142
+ end
143
+
144
+ # Add the trimmed bases
145
+ Dir.glob(outdir).each do |fwd|
146
+ next unless fwd =~ /R1_[0-9]{3}./
147
+ cmd = `base_adder #{fwd} 4 #{fwd}_added`
148
+ if options[:debug]
149
+ puts cmd if options[:debug]
150
+ else
151
+ status = system(cmd)
152
+ raise "Calling the template did not succeed!" if !status
153
+ end
154
+ end
155
+
156
+ end
@@ -0,0 +1,26 @@
1
+ class Fastq
2
+ def initialize(filename)
3
+ @filehandle = File.open(filename, "r")
4
+ line = @filehandle.readline()
5
+ raise RuntimeError, "Invalid fastq file!" if !line.include?("@")
6
+ @filehandle.pos = 0
7
+ end
8
+
9
+ def add(num, outdir)
10
+ bases = "N" * num
11
+ qualities = "@" * num
12
+ outfile = File.open(outdir, 'w')
13
+
14
+ while !@filehandle.eof?
15
+ outfile.write(@filehandle.readline)
16
+ outfile.write("#{bases}" + @filehandle.readline)
17
+ outfile.write(@filehandle.readline)
18
+ outfile.write("#{qualities}" + @filehandle.readline)
19
+ end
20
+ outfile.close()
21
+ end
22
+
23
+ def close()
24
+ @filehandle.close()
25
+ end
26
+ end
@@ -0,0 +1,42 @@
1
+ require "erubis"
2
+
3
+ class NugenTemplate
4
+
5
+ def initialize(fastq_multx, options)
6
+ @template =<<EOF
7
+
8
+ #!/bin/bash
9
+ #\$ -pe DJ 4
10
+ #\$ -l h_vmem=6G
11
+ #\$ -j y
12
+ #\$ -N fq.Lane<%= @lane %>.<%= @number %>
13
+ #\$ -o <%= @lane_dir %>/nugen_demultiplexing.log
14
+
15
+ #{fastq_multx} #{options} -B <%= @barcodes %> \\
16
+ <(gunzip -c <%= @fwd %>) <(gunzip -c <%= @rev %>) \\
17
+ -o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq
18
+ EOF
19
+ end
20
+
21
+ def fill(lane, number, lane_dir, barcodes, fwd, rev)
22
+
23
+ context = {
24
+ :lane => lane,
25
+ :number => number,
26
+ :lane_dir => lane_dir,
27
+ :barcodes => barcodes,
28
+ :fwd => fwd,
29
+ :rev => rev,
30
+ :r1 => "R1_#{number}",
31
+ :r2 => "R2_#{number}"
32
+ }
33
+
34
+ eruby = Erubis::Eruby.new(@template)
35
+ eruby.evaluate(context)
36
+ end
37
+
38
+ def to_s
39
+ template = "#{@template.chomp()}"
40
+ end
41
+
42
+ end
@@ -0,0 +1,41 @@
1
+ require "csv"
2
+
3
+ # Samplesheets are suppose to look like this
4
+ =begin
5
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
6
+ C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
7
+ C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
8
+ =end
9
+
10
+ class SampleSheet
11
+
12
+ def initialize(samplesheet)
13
+ @lanes = []
14
+ @sample_id = []
15
+ @barcodes = []
16
+
17
+ CSV.foreach(samplesheet, {:headers => :first_row}) do |row|
18
+ @lanes << row["Lane"]
19
+ @sample_id << row["SampleID"].gsub(/\W/,"X")
20
+ @barcodes << row["Index"]
21
+ end
22
+ end
23
+
24
+ attr_accessor :lanes, :sample_id, :barcodes
25
+
26
+ def create_barcode_txt(prefix)
27
+ current_lane = "dummy"
28
+ handler = File.new(prefix,'w')
29
+ @lanes.each_with_index do |lane, i|
30
+ if current_lane != lane
31
+ outfile = "#{prefix}_#{lane}.txt"
32
+ current_lane = lane
33
+ handler = File.new(outfile,'w')
34
+ handler.write("# SampleName Barcode \n")
35
+ end
36
+ handler.write("#{@sample_id[i]} #{@barcodes[i]} \n")
37
+ end
38
+ File.delete(prefix)
39
+ end
40
+
41
+ end
@@ -0,0 +1,37 @@
1
+ class Statistics
2
+
3
+ def initialize(lane_log, barcodes)
4
+ @num_reads = Array.new(barcodes.length(),0)
5
+ @num_unmatched = 0
6
+ @total = 0
7
+ @barcodes = barcodes
8
+ File.open(lane_log).each do |line|
9
+ next if line.include?("Id")
10
+ next if line.empty?
11
+ line = line.split("\t")
12
+ case line[0]
13
+ when "unmatched"
14
+ @num_unmatched += line[1].to_i
15
+ when "total"
16
+ @total += line[1].to_i
17
+ else
18
+ if i = @barcodes.index(line[0])
19
+ @num_reads[i]+= line[1].to_i
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ attr_accessor :num_reads, :num_unmatched, :total, :barcodes
26
+
27
+ def to_s
28
+ str = "Statistics: \nBarcode\t# of reads\n"
29
+ @barcodes.each_with_index do |code, i|
30
+ str += "#{code}:\t#{@num_reads[i]} \n"
31
+ end
32
+ percent = (100 / @total.to_f) * @num_unmatched.to_f
33
+ percent = (percent.to_f * 100).round / 100.to_f
34
+ str += "Unmatched:\t#{@num_unmatched}\t(#{percent.to_f}%)\n"
35
+ str += "Total:\t#{@total}"
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ require "nugen_barcode_splitter/nugen_template"
2
+ require "nugen_barcode_splitter/fastq"
3
+ require "nugen_barcode_splitter/sample_sheet"
4
+ require "nugen_barcode_splitter/statistics"
5
+
6
+ class NugenBarcodeSplitter
7
+
8
+ end
9
+
10
+
@@ -0,0 +1,56 @@
1
+ Id Count File(s)
2
+ R1 1063349 nugen/R1_001.R1.fq nugen/R2_001.R1.fq
3
+ R2 1002895 nugen/R1_001.R2.fq nugen/R2_001.R2.fq
4
+ R12 809924 nugen/R1_001.R12.fq nugen/R2_001.R12.fq
5
+ R22 933773 nugen/R1_001.R22.fq nugen/R2_001.R22.fq
6
+ unmatched 180059 nugen/R1_001.unmatched.fq nugen/R2_001.unmatched.fq
7
+ total 3990000
8
+ Id Count File(s)
9
+ R1 1046860 nugen/R1_002.R1.fq nugen/R2_002.R1.fq
10
+ R2 996545 nugen/R1_002.R2.fq nugen/R2_002.R2.fq
11
+ R12 807472 nugen/R1_002.R12.fq nugen/R2_002.R12.fq
12
+ R22 831454 nugen/R1_002.R22.fq nugen/R2_002.R22.fq
13
+ unmatched 307669 nugen/R1_002.unmatched.fq nugen/R2_002.unmatched.fq
14
+ total 3990000
15
+ Id Count File(s)
16
+ R1 1086329 nugen/R1_003.R1.fq nugen/R2_003.R1.fq
17
+ R2 951836 nugen/R1_003.R2.fq nugen/R2_003.R2.fq
18
+ R12 836954 nugen/R1_003.R12.fq nugen/R2_003.R12.fq
19
+ R22 895243 nugen/R1_003.R22.fq nugen/R2_003.R22.fq
20
+ unmatched 219638 nugen/R1_003.unmatched.fq nugen/R2_003.unmatched.fq
21
+ total 3990000
22
+ Id Count File(s)
23
+ R1 1083328 nugen/R1_004.R1.fq nugen/R2_004.R1.fq
24
+ R2 976776 nugen/R1_004.R2.fq nugen/R2_004.R2.fq
25
+ R12 827858 nugen/R1_004.R12.fq nugen/R2_004.R12.fq
26
+ R22 974454 nugen/R1_004.R22.fq nugen/R2_004.R22.fq
27
+ unmatched 127584 nugen/R1_004.unmatched.fq nugen/R2_004.unmatched.fq
28
+ total 3990000
29
+ Id Count File(s)
30
+ R1 1096656 nugen/R1_005.R1.fq nugen/R2_005.R1.fq
31
+ R2 867273 nugen/R1_005.R2.fq nugen/R2_005.R2.fq
32
+ R12 849238 nugen/R1_005.R12.fq nugen/R2_005.R12.fq
33
+ R22 858169 nugen/R1_005.R22.fq nugen/R2_005.R22.fq
34
+ unmatched 318664 nugen/R1_005.unmatched.fq nugen/R2_005.unmatched.fq
35
+ total 3990000
36
+ Id Count File(s)
37
+ R1 963137 nugen/R1_006.R1.fq nugen/R2_006.R1.fq
38
+ R2 738468 nugen/R1_006.R2.fq nugen/R2_006.R2.fq
39
+ R12 751315 nugen/R1_006.R12.fq nugen/R2_006.R12.fq
40
+ R22 817436 nugen/R1_006.R22.fq nugen/R2_006.R22.fq
41
+ unmatched 719644 nugen/R1_006.unmatched.fq nugen/R2_006.unmatched.fq
42
+ total 3990000
43
+ Id Count File(s)
44
+ R1 1037730 nugen/R1_007.R1.fq nugen/R2_007.R1.fq
45
+ R2 774785 nugen/R1_007.R2.fq nugen/R2_007.R2.fq
46
+ R12 800517 nugen/R1_007.R12.fq nugen/R2_007.R12.fq
47
+ R22 793770 nugen/R1_007.R22.fq nugen/R2_007.R22.fq
48
+ unmatched 583198 nugen/R1_007.unmatched.fq nugen/R2_007.unmatched.fq
49
+ total 3990000
50
+ Id Count File(s)
51
+ R1 1156538 nugen/R1_008.R1.fq nugen/R2_008.R1.fq
52
+ R2 919772 nugen/R1_008.R2.fq nugen/R2_008.R2.fq
53
+ R12 875997 nugen/R1_008.R12.fq nugen/R2_008.R12.fq
54
+ R22 879468 nugen/R1_008.R22.fq nugen/R2_008.R22.fq
55
+ unmatched 158225 nugen/R1_008.unmatched.fq nugen/R2_008.unmatched.fq
56
+ total 3990000
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX1 ACCC
3
+ RX2 GAGT
4
+ RX1X2 CGTA
5
+ RX2X2 TTAG
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX3 AGGG
3
+ RX4 GTCA
4
+ RX3X2 CCAT
5
+ RX4X2 TATC
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX5 ACCC
3
+ RX6 GAGT
4
+ RX5X2 CGTA
5
+ RX6X2 TTAG
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX7 AGGG
3
+ RX8 GTCA
4
+ RX7X2 CCAT
5
+ RX8X2 TATC
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX9 ACCC
3
+ RX10 GAGT
4
+ RX9X2 CGTA
5
+ RX10X2 TTAG
@@ -0,0 +1,20 @@
1
+ >HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
2
+ NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
3
+ +
4
+ ####################################################################################################
5
+ >HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
6
+ TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
7
+ +
8
+ CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
9
+ @HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
10
+ AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
11
+ +
12
+ BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
13
+ @HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
14
+ TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
15
+ +
16
+ CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
17
+ @HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
18
+ TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
19
+ +
20
+ @C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
@@ -0,0 +1,21 @@
1
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
2
+ C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
3
+ C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
4
+ C0ED3ACXX,4,R-1-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
5
+ C0ED3ACXX,4,R-2-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
6
+ C0ED3ACXX,5,R-3,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
7
+ C0ED3ACXX,5,R-4,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
8
+ C0ED3ACXX,5,R-3-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
9
+ C0ED3ACXX,5,R-4-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
10
+ C0ED3ACXX,6,R-5,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
11
+ C0ED3ACXX,6,R-6,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
12
+ C0ED3ACXX,6,R-5-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
13
+ C0ED3ACXX,6,R-6-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
14
+ C0ED3ACXX,7,R-7,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
15
+ C0ED3ACXX,7,R-8,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
16
+ C0ED3ACXX,7,R-7-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
17
+ C0ED3ACXX,7,R-8-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
18
+ C0ED3ACXX,8,R-9,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
19
+ C0ED3ACXX,8,R-10,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
20
+ C0ED3ACXX,8,R-9-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
21
+ C0ED3ACXX,8,R-10-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
@@ -0,0 +1,20 @@
1
+ @HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
2
+ NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
3
+ +
4
+ ####################################################################################################
5
+ @HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
6
+ TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
7
+ +
8
+ CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
9
+ @HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
10
+ AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
11
+ +
12
+ BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
13
+ @HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
14
+ TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
15
+ +
16
+ CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
17
+ @HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
18
+ TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
19
+ +
20
+ @C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
@@ -0,0 +1,55 @@
1
+ require 'test/unit'
2
+ require 'nugen_barcode_splitter'
3
+
4
+ class NugenBarcodeSplitterTest < Test::Unit::TestCase
5
+ def setup
6
+
7
+ end
8
+
9
+ def test_nugen_template
10
+ template = NugenTemplate.new("fastq-multx", "")
11
+ assert template.to_s.include?("fastq-multx")
12
+ assert template.to_s.include?("<%= @fwd %>")
13
+ temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", "rev")
14
+ assert_equal(temp.to_s, "\n #!/bin/bash\n \#$ -pe DJ 4\n \#$ -l h_vmem=6G\n \#$ -j y\n \#$ -N fq.Lane_3.33\n \#$ -o ~/Lane3//nugen_demultiplexing.log\n\n fastq-multx -B bc \\\n fwd rev \\\n -o R1_33.%.fq R2_33.%.fq\n")
15
+ end
16
+
17
+ def test_fastq
18
+ fastq_file = "test/fixtures/invalid.fq"
19
+ assert_raise RuntimeError do
20
+ Fastq.new(fastq_file)
21
+ end
22
+ fastq_file = "test/fixtures/test.fq"
23
+ assert_nothing_raised do
24
+ fastq = Fastq.new(fastq_file)
25
+ end
26
+ fastq = Fastq.new(fastq_file)
27
+ out_file = "test/fixtures/added.fq"
28
+ fastq.add(4, out_file )
29
+ assert(!File.zero?(out_file), "#{out_file} is empty!")
30
+ test = File.open(out_file,'r')
31
+ line = test.readlines[1]
32
+ test.pos = 0
33
+ assert(line.start_with?("NNNN"), "Reads do not start with NNNN")
34
+ line = test.readlines[3]
35
+ assert(line.start_with?("@@@@"), "Reads do not start with @@@@")
36
+ File.delete(out_file)
37
+ end
38
+
39
+ def test_sample_sheet
40
+ samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
41
+ assert_equal(samplesheet.barcodes[3], "TTAG")
42
+ assert_equal(samplesheet.sample_id[4], "RX3")
43
+
44
+ samplesheet.create_barcode_txt("test/fixtures/barcode")
45
+
46
+ end
47
+
48
+ def test_statistics
49
+ log_file = "test/fixtures/Lane4.log"
50
+ stats = Statistics.new(log_file, ["R1", "R2", "R12", "R22"])
51
+ assert_equal(stats.total, 31920000)
52
+ assert_equal(stats.num_reads[0], 8533927)
53
+ assert_equal(stats.num_unmatched, 2614681)
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nugen_barcode_splitter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Katharina Hayer
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-15 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: ! "This gem is designed to demultiplex reads\n produced
15
+ by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
16
+ email:
17
+ - katharinaehayer@gmail.com
18
+ executables:
19
+ - base_adder
20
+ - nugen_barcode_splitter
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - bin/base_adder
25
+ - bin/nugen_barcode_splitter
26
+ - lib/nugen_barcode_splitter.rb
27
+ - lib/nugen_barcode_splitter/fastq.rb
28
+ - lib/nugen_barcode_splitter/nugen_template.rb
29
+ - lib/nugen_barcode_splitter/sample_sheet.rb
30
+ - lib/nugen_barcode_splitter/statistics.rb
31
+ - test/fixtures/Lane4.log
32
+ - test/fixtures/barcode_4.txt
33
+ - test/fixtures/barcode_5.txt
34
+ - test/fixtures/barcode_6.txt
35
+ - test/fixtures/barcode_7.txt
36
+ - test/fixtures/barcode_8.txt
37
+ - test/fixtures/invalid.fq
38
+ - test/fixtures/sample_sheet.csv
39
+ - test/fixtures/test.fq
40
+ - test/test_mutations_caller_pipeline.rb
41
+ homepage: https://github.com/khayer/nugen_barcode_splitter
42
+ licenses: []
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project: nugen_barcode_splitter
61
+ rubygems_version: 1.8.10
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: Nugen Barcode Splitter for Illumina
65
+ test_files:
66
+ - test/fixtures/Lane4.log
67
+ - test/fixtures/barcode_4.txt
68
+ - test/fixtures/barcode_5.txt
69
+ - test/fixtures/barcode_6.txt
70
+ - test/fixtures/barcode_7.txt
71
+ - test/fixtures/barcode_8.txt
72
+ - test/fixtures/invalid.fq
73
+ - test/fixtures/sample_sheet.csv
74
+ - test/fixtures/test.fq
75
+ - test/test_mutations_caller_pipeline.rb