nugen_barcode_splitter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/base_adder ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require "nugen_barcode_splitter"
3
+
4
+ usage =<<EOF
5
+ #{$0} file.fq number_of_bases out.fq
6
+
7
+ -_-_-_-_- #{$0} -_-_-_-_-
8
+
9
+ Adds N^P bases to the beginning of the
10
+ fastq file. The Quality is set on the
11
+ lowest value.
12
+ EOF
13
+
14
+ file_dir = ARGV[0]
15
+ number_of_bases = ARGV[1].to_i
16
+ out_dir = ARGV[2]
17
+
18
+ fastq = Fastq.new("file_dir")
19
+ fastq.add(number_of_bases, out_dir)
20
+ fastq.close
21
+
22
+
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env ruby
2
+ require "rubygems"
3
+ require "yaml"
4
+ require "optparse"
5
+
6
+ usage =<<EOF
7
+ #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
8
+ _____________________________________________________________________________
9
+
10
+ Example sample_sheet_nugen.csv :
11
+
12
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject
13
+ C0ED3ACXX,4,S1,hg19,ACCC,RNA Seq,N,,,33333
14
+ C0ED3ACXX,4,S2,hg19,GAGT,RNA Seq,N,,,44444
15
+ C0ED3ACXX,4,S3,hg19,CGTA,RNA Seq,N,,,33333
16
+ C0ED3ACXX,4,S4,hg19,TTAG,RNA Seq,N,,,44444
17
+ C0ED3ACXX,5,S5,hg19,AGGG,RNA Seq,N,,,33333
18
+ C0ED3ACXX,5,S6,hg19,GTCA,RNA Seq,N,,,44444
19
+ C0ED3ACXX,6,S7,hg19,CCAT,RNA Seq,N,,,33333
20
+
21
+ Note: The sample names must be alphanumerical!
22
+ _____________________________________________________________________________
23
+
24
+ EOF
25
+
26
+ options = {
27
+ :project_dir => nil,
28
+ :out_dir => nil,
29
+ :sample_sheet => nil,
30
+ :eol_only? => "",
31
+ :bel_only? => "",
32
+ :mismatches => "",
33
+ :keep_barcode => "",
34
+ :fastq_multx => "fastq_multx",
35
+ :debug => false
36
+ }
37
+
38
+ optparse = OptionParser.new do |opts|
39
+ opts.banner = usage
40
+
41
+ opts.on("-p", "--project_dir", :REQUIRED, String,
42
+ "Illumina project directory (../Unaligned/ProjectXXX/)") do |i|
43
+ options[:project_dir] = i if i
44
+ end
45
+
46
+ opts.on("-o", "--out_dir", :REQUIRED, String,
47
+ "The desired output directory") do |i|
48
+ options[:out_dir] = i if i
49
+ end
50
+
51
+ opts.on("-s","--sample_sheet", :REQUIRED, String,
52
+ "Please provide your sample_sheet") do |i|
53
+ options[:sample_sheet] = i if i
54
+ end
55
+
56
+ opts.on("-e","--end_of_line",
57
+ "Limit the search for the barcode to the end of the line DEFAULT:false") do |i|
58
+ options[:eol_only?] = "-e"
59
+ end
60
+
61
+ opts.on("-b","--begin_of_line",
62
+ "Limit the search for the barcode to the start of the line DEFAULT:false") do |i|
63
+ options[:bol_only?] = "-b"
64
+ end
65
+
66
+ opts.on("-k","--keep_barcode",
67
+ "Do not trim of the barcode DEFAULT:false") do |i|
68
+ options[:keep_barcode] = "-x"
69
+ end
70
+
71
+ opts.on("-m","--mismatches NUM", Integer, "Number of mismatches (Default:1)") do |i|
72
+ options[:mismatches] = "-m #{i}" if i
73
+ end
74
+
75
+ opts.on("-x", "--fastq_multx DIR", String) do |i|
76
+ options[:fastq_multx] = i if i
77
+ end
78
+
79
+ opts.on("-d", "--debug", "Debug mode!") do |i|
80
+ options[:debug] = true
81
+ end
82
+
83
+ opts.on_tail("-h", "--help", "Show this message") do
84
+ puts opts
85
+ exit
86
+ end
87
+ end
88
+
89
+ begin
90
+ optparse.parse!
91
+ mandatory = [:project_dir, :out_dir, :sample_sheet]
92
+ missing = mandatory.select{ |param| options[param].nil? }
93
+ if !missing.empty?
94
+ puts "\nMissing options given or missing in config_file: \n\t#{missing.join(",\n\t")}"
95
+ puts optparse
96
+ exit
97
+ end
98
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
99
+ puts $!.to_s
100
+ puts optparse
101
+ exit
102
+ end
103
+
104
+ STDERR.puts "CURRENT OPTIONS:"
105
+ STDERR.puts options.to_yaml
106
+
107
+ # Read out sample_sheet
108
+ sample_sheet = SampleSheet.new(options[:sample_sheet])
109
+ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
110
+
111
+ # Prepare template
112
+ multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
113
+ nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
114
+
115
+ Dir.glob(options[:project_dir]).each do |p|
116
+ next unless File.directory? p
117
+ next unless p =~ /Sample_Lane/
118
+ outdir = ""
119
+
120
+ Dir.glob(p).each do |fwd|
121
+ next if File.directory fwd
122
+ next unless fwd =~ /Lane[1-8]_NoIndex_L[0-9]{3}_R1_[0-9]{3}.fastq.gz/
123
+ rev = fwd.gsub(/_R1_/, "_R2_")
124
+ tmp = fwd.split("/")
125
+ tmp = tmp[-1].split("_")
126
+ lane = tmp[0].qsub(/\D/,"")
127
+ number = tmp[4].qsub(/\D/,"")
128
+ barcodes = "#{options[:out_dir]}/barcode_#{lane}.txt"
129
+ outdir = options[:outdir] + "/Lane#{lane}"
130
+ begin
131
+ FileUtils.mkdir_p outdir unless File.directory? outdir
132
+ rescue Exception => e
133
+ STDERR.puts e.message
134
+ end
135
+ cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, rev)
136
+ if options[:debug]
137
+ puts cmd if options[:debug]
138
+ else
139
+ status = system(cmd)
140
+ raise "Calling the template did not succeed!" if !status
141
+ end
142
+ end
143
+
144
+ # Add the trimmed bases
145
+ Dir.glob(outdir).each do |fwd|
146
+ next unless fwd =~ /R1_[0-9]{3}./
147
+ cmd = `base_adder #{fwd} 4 #{fwd}_added`
148
+ if options[:debug]
149
+ puts cmd if options[:debug]
150
+ else
151
+ status = system(cmd)
152
+ raise "Calling the template did not succeed!" if !status
153
+ end
154
+ end
155
+
156
+ end
@@ -0,0 +1,26 @@
1
+ class Fastq
2
+ def initialize(filename)
3
+ @filehandle = File.open(filename, "r")
4
+ line = @filehandle.readline()
5
+ raise RuntimeError, "Invalid fastq file!" if !line.include?("@")
6
+ @filehandle.pos = 0
7
+ end
8
+
9
+ def add(num, outdir)
10
+ bases = "N" * num
11
+ qualities = "@" * num
12
+ outfile = File.open(outdir, 'w')
13
+
14
+ while !@filehandle.eof?
15
+ outfile.write(@filehandle.readline)
16
+ outfile.write("#{bases}" + @filehandle.readline)
17
+ outfile.write(@filehandle.readline)
18
+ outfile.write("#{qualities}" + @filehandle.readline)
19
+ end
20
+ outfile.close()
21
+ end
22
+
23
+ def close()
24
+ @filehandle.close()
25
+ end
26
+ end
@@ -0,0 +1,42 @@
1
+ require "erubis"
2
+
3
+ class NugenTemplate
4
+
5
+ def initialize(fastq_multx, options)
6
+ @template =<<EOF
7
+
8
+ #!/bin/bash
9
+ #\$ -pe DJ 4
10
+ #\$ -l h_vmem=6G
11
+ #\$ -j y
12
+ #\$ -N fq.Lane<%= @lane %>.<%= @number %>
13
+ #\$ -o <%= @lane_dir %>/nugen_demultiplexing.log
14
+
15
+ #{fastq_multx} #{options} -B <%= @barcodes %> \\
16
+ <(gunzip -c <%= @fwd %>) <(gunzip -c <%= @rev %>) \\
17
+ -o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq
18
+ EOF
19
+ end
20
+
21
+ def fill(lane, number, lane_dir, barcodes, fwd, rev)
22
+
23
+ context = {
24
+ :lane => lane,
25
+ :number => number,
26
+ :lane_dir => lane_dir,
27
+ :barcodes => barcodes,
28
+ :fwd => fwd,
29
+ :rev => rev,
30
+ :r1 => "R1_#{number}",
31
+ :r2 => "R2_#{number}"
32
+ }
33
+
34
+ eruby = Erubis::Eruby.new(@template)
35
+ eruby.evaluate(context)
36
+ end
37
+
38
+ def to_s
39
+ template = "#{@template.chomp()}"
40
+ end
41
+
42
+ end
@@ -0,0 +1,41 @@
1
+ require "csv"
2
+
3
+ # Samplesheets are suppose to look like this
4
+ =begin
5
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
6
+ C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
7
+ C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
8
+ =end
9
+
10
+ class SampleSheet
11
+
12
+ def initialize(samplesheet)
13
+ @lanes = []
14
+ @sample_id = []
15
+ @barcodes = []
16
+
17
+ CSV.foreach(samplesheet, {:headers => :first_row}) do |row|
18
+ @lanes << row["Lane"]
19
+ @sample_id << row["SampleID"].gsub(/\W/,"X")
20
+ @barcodes << row["Index"]
21
+ end
22
+ end
23
+
24
+ attr_accessor :lanes, :sample_id, :barcodes
25
+
26
+ def create_barcode_txt(prefix)
27
+ current_lane = "dummy"
28
+ handler = File.new(prefix,'w')
29
+ @lanes.each_with_index do |lane, i|
30
+ if current_lane != lane
31
+ outfile = "#{prefix}_#{lane}.txt"
32
+ current_lane = lane
33
+ handler = File.new(outfile,'w')
34
+ handler.write("# SampleName Barcode \n")
35
+ end
36
+ handler.write("#{@sample_id[i]} #{@barcodes[i]} \n")
37
+ end
38
+ File.delete(prefix)
39
+ end
40
+
41
+ end
@@ -0,0 +1,37 @@
1
+ class Statistics
2
+
3
+ def initialize(lane_log, barcodes)
4
+ @num_reads = Array.new(barcodes.length(),0)
5
+ @num_unmatched = 0
6
+ @total = 0
7
+ @barcodes = barcodes
8
+ File.open(lane_log).each do |line|
9
+ next if line.include?("Id")
10
+ next if line.empty?
11
+ line = line.split("\t")
12
+ case line[0]
13
+ when "unmatched"
14
+ @num_unmatched += line[1].to_i
15
+ when "total"
16
+ @total += line[1].to_i
17
+ else
18
+ if i = @barcodes.index(line[0])
19
+ @num_reads[i]+= line[1].to_i
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ attr_accessor :num_reads, :num_unmatched, :total, :barcodes
26
+
27
+ def to_s
28
+ str = "Statistics: \nBarcode\t# of reads\n"
29
+ @barcodes.each_with_index do |code, i|
30
+ str += "#{code}:\t#{@num_reads[i]} \n"
31
+ end
32
+ percent = (100 / @total.to_f) * @num_unmatched.to_f
33
+ percent = (percent.to_f * 100).round / 100.to_f
34
+ str += "Unmatched:\t#{@num_unmatched}\t(#{percent.to_f}%)\n"
35
+ str += "Total:\t#{@total}"
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ require "nugen_barcode_splitter/nugen_template"
2
+ require "nugen_barcode_splitter/fastq"
3
+ require "nugen_barcode_splitter/sample_sheet"
4
+ require "nugen_barcode_splitter/statistics"
5
+
6
+ class NugenBarcodeSplitter
7
+
8
+ end
9
+
10
+
@@ -0,0 +1,56 @@
1
+ Id Count File(s)
2
+ R1 1063349 nugen/R1_001.R1.fq nugen/R2_001.R1.fq
3
+ R2 1002895 nugen/R1_001.R2.fq nugen/R2_001.R2.fq
4
+ R12 809924 nugen/R1_001.R12.fq nugen/R2_001.R12.fq
5
+ R22 933773 nugen/R1_001.R22.fq nugen/R2_001.R22.fq
6
+ unmatched 180059 nugen/R1_001.unmatched.fq nugen/R2_001.unmatched.fq
7
+ total 3990000
8
+ Id Count File(s)
9
+ R1 1046860 nugen/R1_002.R1.fq nugen/R2_002.R1.fq
10
+ R2 996545 nugen/R1_002.R2.fq nugen/R2_002.R2.fq
11
+ R12 807472 nugen/R1_002.R12.fq nugen/R2_002.R12.fq
12
+ R22 831454 nugen/R1_002.R22.fq nugen/R2_002.R22.fq
13
+ unmatched 307669 nugen/R1_002.unmatched.fq nugen/R2_002.unmatched.fq
14
+ total 3990000
15
+ Id Count File(s)
16
+ R1 1086329 nugen/R1_003.R1.fq nugen/R2_003.R1.fq
17
+ R2 951836 nugen/R1_003.R2.fq nugen/R2_003.R2.fq
18
+ R12 836954 nugen/R1_003.R12.fq nugen/R2_003.R12.fq
19
+ R22 895243 nugen/R1_003.R22.fq nugen/R2_003.R22.fq
20
+ unmatched 219638 nugen/R1_003.unmatched.fq nugen/R2_003.unmatched.fq
21
+ total 3990000
22
+ Id Count File(s)
23
+ R1 1083328 nugen/R1_004.R1.fq nugen/R2_004.R1.fq
24
+ R2 976776 nugen/R1_004.R2.fq nugen/R2_004.R2.fq
25
+ R12 827858 nugen/R1_004.R12.fq nugen/R2_004.R12.fq
26
+ R22 974454 nugen/R1_004.R22.fq nugen/R2_004.R22.fq
27
+ unmatched 127584 nugen/R1_004.unmatched.fq nugen/R2_004.unmatched.fq
28
+ total 3990000
29
+ Id Count File(s)
30
+ R1 1096656 nugen/R1_005.R1.fq nugen/R2_005.R1.fq
31
+ R2 867273 nugen/R1_005.R2.fq nugen/R2_005.R2.fq
32
+ R12 849238 nugen/R1_005.R12.fq nugen/R2_005.R12.fq
33
+ R22 858169 nugen/R1_005.R22.fq nugen/R2_005.R22.fq
34
+ unmatched 318664 nugen/R1_005.unmatched.fq nugen/R2_005.unmatched.fq
35
+ total 3990000
36
+ Id Count File(s)
37
+ R1 963137 nugen/R1_006.R1.fq nugen/R2_006.R1.fq
38
+ R2 738468 nugen/R1_006.R2.fq nugen/R2_006.R2.fq
39
+ R12 751315 nugen/R1_006.R12.fq nugen/R2_006.R12.fq
40
+ R22 817436 nugen/R1_006.R22.fq nugen/R2_006.R22.fq
41
+ unmatched 719644 nugen/R1_006.unmatched.fq nugen/R2_006.unmatched.fq
42
+ total 3990000
43
+ Id Count File(s)
44
+ R1 1037730 nugen/R1_007.R1.fq nugen/R2_007.R1.fq
45
+ R2 774785 nugen/R1_007.R2.fq nugen/R2_007.R2.fq
46
+ R12 800517 nugen/R1_007.R12.fq nugen/R2_007.R12.fq
47
+ R22 793770 nugen/R1_007.R22.fq nugen/R2_007.R22.fq
48
+ unmatched 583198 nugen/R1_007.unmatched.fq nugen/R2_007.unmatched.fq
49
+ total 3990000
50
+ Id Count File(s)
51
+ R1 1156538 nugen/R1_008.R1.fq nugen/R2_008.R1.fq
52
+ R2 919772 nugen/R1_008.R2.fq nugen/R2_008.R2.fq
53
+ R12 875997 nugen/R1_008.R12.fq nugen/R2_008.R12.fq
54
+ R22 879468 nugen/R1_008.R22.fq nugen/R2_008.R22.fq
55
+ unmatched 158225 nugen/R1_008.unmatched.fq nugen/R2_008.unmatched.fq
56
+ total 3990000
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX1 ACCC
3
+ RX2 GAGT
4
+ RX1X2 CGTA
5
+ RX2X2 TTAG
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX3 AGGG
3
+ RX4 GTCA
4
+ RX3X2 CCAT
5
+ RX4X2 TATC
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX5 ACCC
3
+ RX6 GAGT
4
+ RX5X2 CGTA
5
+ RX6X2 TTAG
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX7 AGGG
3
+ RX8 GTCA
4
+ RX7X2 CCAT
5
+ RX8X2 TATC
@@ -0,0 +1,5 @@
1
+ # SampleName Barcode
2
+ RX9 ACCC
3
+ RX10 GAGT
4
+ RX9X2 CGTA
5
+ RX10X2 TTAG
@@ -0,0 +1,20 @@
1
+ >HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
2
+ NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
3
+ +
4
+ ####################################################################################################
5
+ >HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
6
+ TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
7
+ +
8
+ CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
9
+ @HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
10
+ AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
11
+ +
12
+ BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
13
+ @HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
14
+ TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
15
+ +
16
+ CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
17
+ @HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
18
+ TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
19
+ +
20
+ @C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
@@ -0,0 +1,21 @@
1
+ FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject,
2
+ C0ED3ACXX,4,R-1,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
3
+ C0ED3ACXX,4,R-2,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
4
+ C0ED3ACXX,4,R-1-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
5
+ C0ED3ACXX,4,R-2-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
6
+ C0ED3ACXX,5,R-3,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
7
+ C0ED3ACXX,5,R-4,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
8
+ C0ED3ACXX,5,R-3-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
9
+ C0ED3ACXX,5,R-4-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
10
+ C0ED3ACXX,6,R-5,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
11
+ C0ED3ACXX,6,R-6,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
12
+ C0ED3ACXX,6,R-5-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
13
+ C0ED3ACXX,6,R-6-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
14
+ C0ED3ACXX,7,R-7,hg19,AGGG,RNA Seq,N,,,47644,Nugen barcodes
15
+ C0ED3ACXX,7,R-8,hg19,GTCA,RNA Seq,N,,,47644,Nugen barcodes
16
+ C0ED3ACXX,7,R-7-2,hg19,CCAT,RNA Seq,N,,,47644,Nugen barcodes
17
+ C0ED3ACXX,7,R-8-2,hg19,TATC,RNA Seq,N,,,47644,Nugen barcodes
18
+ C0ED3ACXX,8,R-9,hg19,ACCC,RNA Seq,N,,,47644,Nugen barcodes
19
+ C0ED3ACXX,8,R-10,hg19,GAGT,RNA Seq,N,,,47644,Nugen barcodes
20
+ C0ED3ACXX,8,R-9-2,hg19,CGTA,RNA Seq,N,,,47644,Nugen barcodes
21
+ C0ED3ACXX,8,R-10-2,hg19,TTAG,RNA Seq,N,,,47644,Nugen barcodes
@@ -0,0 +1,20 @@
1
+ @HWI-ST628:191:c0299abxx:1:1101:1010:2080 1:Y:0:CGATGT
2
+ NGTANNCAAATCAAACACCTGACTCCNNNAANGAGCCNGNATTNTNTGANANNTNNNNNNANNNCTTNTGTNTTTAGAATAAGACAGCCTGTGGGAGTGT
3
+ +
4
+ ####################################################################################################
5
+ @HWI-ST628:191:c0299abxx:1:1101:1030:2115 1:N:0:CGATGT
6
+ TGCACAGACACACACACAAACACATGACGTGCATACACACACACTTGTATACAGACATAAACATACACACACATTTATACATGCATAAACACACACAAAC
7
+ +
8
+ CCCFFFFFHHHHHJJJJJJJJJJJJIJJJHIJJJIIJJJJIJIJJJJHIJJJJJJJJJJIJHHHHHFFFFDDCDDDEFEDDDDDDDDDCDDDDDDDDDDD
9
+ @HWI-ST628:191:c0299abxx:1:1101:1141:2140 1:N:0:CGATGT
10
+ AGGTTTAAGTGAATATAAGAGCAACAATGACATGAACCAGCCAGGCTTTTGCTTGATGCTGTAATTCAGTGTTTGACCCTGCTGACATTTGTGTTGTCAG
11
+ +
12
+ BBCDDFFFHHHHHJIJJJJIJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJJJJJJJJJJJJIJJJJJHHGIJIIHHHHHFFFFFEEEEEEECCDDDDC
13
+ @HWI-ST628:191:c0299abxx:1:1101:1225:2144 1:N:0:CGATGT
14
+ TACAATTATATTAGTATAAAAATGAGCAATTTTTAAAATGAGGCGTGGCATTAAACGTGGCTAATTCATACAAATTCCATATGATAATACTGGTATAAAA
15
+ +
16
+ CCCFFFFFHHHHHJHIIJJJJJJJJJJJIJJJJJJIJJJJJJJJJIJJJJJJJJJJJHJJJJJJHHHHHHHFFFFFFEEEEEEDEDDDEDDED@CDEEEC
17
+ @HWI-ST628:191:c0299abxx:1:1101:1036:2155 1:N:0:CGATGT
18
+ TGTTCAGGCTTTTGTTTTAATCAGAGCCTTTTATTTTTAGGACTAATAACTCTGTCACCCCATTGTTTGGGTTTTTGATGGATGAATATTTTTATTTGGC
19
+ +
20
+ @C@FDFFFHHFBHEHHIIIIIIDH>GGEHIGIIIIIIGBBG>BG@GIIIIIIIIGIHDGIGIHIIHHIIBE?EHHBBBCCDCDEBADDEDCDC<CCDC@>
@@ -0,0 +1,55 @@
1
+ require 'test/unit'
2
+ require 'nugen_barcode_splitter'
3
+
4
+ class NugenBarcodeSplitterTest < Test::Unit::TestCase
5
+ def setup
6
+
7
+ end
8
+
9
+ def test_nugen_template
10
+ template = NugenTemplate.new("fastq-multx", "")
11
+ assert template.to_s.include?("fastq-multx")
12
+ assert template.to_s.include?("<%= @fwd %>")
13
+ temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", "rev")
14
+ assert_equal(temp.to_s, "\n #!/bin/bash\n \#$ -pe DJ 4\n \#$ -l h_vmem=6G\n \#$ -j y\n \#$ -N fq.Lane_3.33\n \#$ -o ~/Lane3//nugen_demultiplexing.log\n\n fastq-multx -B bc \\\n fwd rev \\\n -o R1_33.%.fq R2_33.%.fq\n")
15
+ end
16
+
17
+ def test_fastq
18
+ fastq_file = "test/fixtures/invalid.fq"
19
+ assert_raise RuntimeError do
20
+ Fastq.new(fastq_file)
21
+ end
22
+ fastq_file = "test/fixtures/test.fq"
23
+ assert_nothing_raised do
24
+ fastq = Fastq.new(fastq_file)
25
+ end
26
+ fastq = Fastq.new(fastq_file)
27
+ out_file = "test/fixtures/added.fq"
28
+ fastq.add(4, out_file )
29
+ assert(!File.zero?(out_file), "#{out_file} is empty!")
30
+ test = File.open(out_file,'r')
31
+ line = test.readlines[1]
32
+ test.pos = 0
33
+ assert(line.start_with?("NNNN"), "Reads do not start with NNNN")
34
+ line = test.readlines[3]
35
+ assert(line.start_with?("@@@@"), "Reads do not start with @@@@")
36
+ File.delete(out_file)
37
+ end
38
+
39
+ def test_sample_sheet
40
+ samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
41
+ assert_equal(samplesheet.barcodes[3], "TTAG")
42
+ assert_equal(samplesheet.sample_id[4], "RX3")
43
+
44
+ samplesheet.create_barcode_txt("test/fixtures/barcode")
45
+
46
+ end
47
+
48
+ def test_statistics
49
+ log_file = "test/fixtures/Lane4.log"
50
+ stats = Statistics.new(log_file, ["R1", "R2", "R12", "R22"])
51
+ assert_equal(stats.total, 31920000)
52
+ assert_equal(stats.num_reads[0], 8533927)
53
+ assert_equal(stats.num_unmatched, 2614681)
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nugen_barcode_splitter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Katharina Hayer
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-15 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: ! "This gem is designed to demultiplex reads\n produced
15
+ by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
16
+ email:
17
+ - katharinaehayer@gmail.com
18
+ executables:
19
+ - base_adder
20
+ - nugen_barcode_splitter
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - bin/base_adder
25
+ - bin/nugen_barcode_splitter
26
+ - lib/nugen_barcode_splitter.rb
27
+ - lib/nugen_barcode_splitter/fastq.rb
28
+ - lib/nugen_barcode_splitter/nugen_template.rb
29
+ - lib/nugen_barcode_splitter/sample_sheet.rb
30
+ - lib/nugen_barcode_splitter/statistics.rb
31
+ - test/fixtures/Lane4.log
32
+ - test/fixtures/barcode_4.txt
33
+ - test/fixtures/barcode_5.txt
34
+ - test/fixtures/barcode_6.txt
35
+ - test/fixtures/barcode_7.txt
36
+ - test/fixtures/barcode_8.txt
37
+ - test/fixtures/invalid.fq
38
+ - test/fixtures/sample_sheet.csv
39
+ - test/fixtures/test.fq
40
+ - test/test_mutations_caller_pipeline.rb
41
+ homepage: https://github.com/khayer/nugen_barcode_splitter
42
+ licenses: []
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project: nugen_barcode_splitter
61
+ rubygems_version: 1.8.10
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: Nugen Barcode Splitter for Illumina
65
+ test_files:
66
+ - test/fixtures/Lane4.log
67
+ - test/fixtures/barcode_4.txt
68
+ - test/fixtures/barcode_5.txt
69
+ - test/fixtures/barcode_6.txt
70
+ - test/fixtures/barcode_7.txt
71
+ - test/fixtures/barcode_8.txt
72
+ - test/fixtures/invalid.fq
73
+ - test/fixtures/sample_sheet.csv
74
+ - test/fixtures/test.fq
75
+ - test/test_mutations_caller_pipeline.rb