nugen_barcode_splitter 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
data/bin/nugen_barcode_splitter
CHANGED
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
|
|
5
5
|
require "fileutils"
|
6
6
|
|
7
7
|
usage =<<EOF
|
8
|
-
V E R S I O N - 0 . 0 .
|
8
|
+
V E R S I O N - 0 . 0 . 12 - BETA
|
9
9
|
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
10
10
|
_____________________________________________________________________________
|
11
11
|
|
@@ -114,6 +114,8 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
|
|
114
114
|
multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
|
115
115
|
nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
|
116
116
|
|
117
|
+
statistics = Statistics.new()
|
118
|
+
|
117
119
|
Dir.glob(options[:project_dir]+"/*").each do |p|
|
118
120
|
next unless File.directory? p
|
119
121
|
next unless p =~ /Sample_Lane/
|
@@ -151,30 +153,39 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
151
153
|
end
|
152
154
|
# Merging
|
153
155
|
merger = Merger.new(fwd,rev,outdir,number,barcodes)
|
154
|
-
merger.merge()
|
156
|
+
stats = merger.merge()
|
157
|
+
stats_file = File.open("#{outdir}/nugen_demultiplexing.log", 'a')
|
158
|
+
stats_file.puts(stats)
|
159
|
+
stats_file.close()
|
155
160
|
end
|
156
161
|
|
157
162
|
# Add the trimmed bases
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
163
|
+
Dir.glob(outdir+"/*").each do |f|
|
164
|
+
if f =~ /nugen_demultiplexing.log/
|
165
|
+
sample_sheet.lanes
|
166
|
+
statistics = Statistics.new(f)
|
167
|
+
stats_out = outdir + "/statistics"
|
168
|
+
stats_handler = File.new(stats_out,'w')
|
169
|
+
stats_handler.write(statistics.to_s)
|
170
|
+
stats_handler.close()
|
171
|
+
end
|
172
|
+
next unless f =~ /_updated/
|
173
|
+
name = f.split("/")[-1]
|
174
|
+
id = name.split(".")[1]
|
175
|
+
sample_dir = outdir + "/Sample_#{id}"
|
176
|
+
begin
|
177
|
+
FileUtils.mkdir_p sample_dir unless File.directory? sample_dir
|
178
|
+
rescue Exception => e
|
179
|
+
STDERR.puts e.message
|
180
|
+
end
|
181
|
+
name = name.gsub(/_updated/, "")
|
182
|
+
cmd = `mv #{f} #{sample_dir}/name`
|
183
|
+
if options[:debug]
|
184
|
+
STDERR.puts cmd if options[:debug]
|
185
|
+
else
|
186
|
+
status = system(cmd)
|
187
|
+
raise "Was not able to rename and move file #{f}!" if status!=true
|
188
|
+
end
|
189
|
+
end
|
179
190
|
|
180
191
|
end
|
@@ -49,22 +49,26 @@ class Merger
|
|
49
49
|
#end
|
50
50
|
|
51
51
|
def merge()
|
52
|
+
statistics = Array.new(@sample_ids.length()+2,0)
|
53
|
+
|
52
54
|
fwd_file = Zlib::GzipReader.open(@fwd)
|
53
55
|
rev_file = Zlib::GzipReader.open(@rev)
|
54
56
|
fwd_splitted_files = []
|
55
57
|
rev_splitted_files = []
|
56
58
|
fwd_out_files = []
|
57
59
|
rev_out_files = []
|
58
|
-
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.
|
59
|
-
rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.
|
60
|
+
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
|
61
|
+
rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched.updated.fq",'w')
|
60
62
|
@sample_ids.each_with_index do |sample_id, i|
|
61
63
|
fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
62
64
|
rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
63
65
|
#OUTFILES????
|
64
|
-
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}
|
65
|
-
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}
|
66
|
+
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
|
67
|
+
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
|
66
68
|
end
|
69
|
+
|
67
70
|
fwd_file.each do |fwd_line|
|
71
|
+
statistics[-1] += 1
|
68
72
|
rev_line = rev_file.readline()
|
69
73
|
rev_name = rev_line.split(" ")
|
70
74
|
fwd_name = fwd_line.split(" ")
|
@@ -72,9 +76,11 @@ class Merger
|
|
72
76
|
|
73
77
|
@sample_ids.each_with_index do |sample_id, i|
|
74
78
|
if !fwd_splitted_files[i].eof?
|
75
|
-
compare_line_fwd = fwd_splitted_files[i].readline()
|
76
|
-
|
79
|
+
compare_line_fwd = fwd_splitted_files[i].readline()
|
80
|
+
name_compare_fwd = compare_line_fwd.split(" ")
|
81
|
+
if fwd_line[0] == name_compare_fwd[0] && marker
|
77
82
|
marker = false
|
83
|
+
statistics[i] += 1
|
78
84
|
fwd_out_files[i].write(fwd_line)
|
79
85
|
rev_out_files[i].write(rev_line)
|
80
86
|
for k in 1..3
|
@@ -87,21 +93,31 @@ class Merger
|
|
87
93
|
end
|
88
94
|
fwd_file.lineno = fwd_file.lineno - 1
|
89
95
|
rev_file.lineno = rev_file.lineno - 1
|
96
|
+
else
|
97
|
+
#puts compare_line_fwd
|
98
|
+
fwd_splitted_files[i].pos = fwd_splitted_files[i].pos - compare_line_fwd.length()
|
90
99
|
end
|
91
100
|
end
|
101
|
+
|
92
102
|
if !marker && !rev_splitted_files[i].eof?
|
93
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
94
|
-
|
103
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
104
|
+
name_compare_rev = compare_line_rev.split(" ")
|
105
|
+
if name_compare_rev[0] == name_compare_fwd[0]
|
95
106
|
for k in 1..3
|
96
|
-
rev_splitted_files[i].readline()
|
107
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
97
108
|
end
|
109
|
+
else
|
110
|
+
#puts compare_line_rev
|
111
|
+
rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
98
112
|
end
|
99
113
|
end
|
100
114
|
break if !marker
|
101
115
|
if !rev_splitted_files[i].eof?
|
102
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
103
|
-
|
116
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
117
|
+
name_compare_rev = compare_line_rev.split(" ")
|
118
|
+
if rev_name[0] == name_compare_rev[0] && marker
|
104
119
|
marker = false
|
120
|
+
statistics[i] += 1
|
105
121
|
fwd_out_files[i].write(fwd_line)
|
106
122
|
rev_out_files[i].write(rev_line)
|
107
123
|
for k in 1..3
|
@@ -114,12 +130,15 @@ class Merger
|
|
114
130
|
end
|
115
131
|
fwd_file.lineno = fwd_file.lineno - 1
|
116
132
|
rev_file.lineno = rev_file.lineno - 1
|
133
|
+
else
|
134
|
+
rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
117
135
|
end
|
118
136
|
end
|
119
137
|
break if !marker
|
120
138
|
end
|
121
139
|
|
122
140
|
if marker
|
141
|
+
statistics[-2] += 1
|
123
142
|
fwd_out_unmatched.write(fwd_line)
|
124
143
|
rev_out_unmatched.write(rev_line)
|
125
144
|
for k in 1..3
|
@@ -127,9 +146,12 @@ class Merger
|
|
127
146
|
fwd_out_unmatched.write(fwd_file.readline())
|
128
147
|
end
|
129
148
|
end
|
130
|
-
|
131
149
|
end
|
132
|
-
|
133
|
-
|
150
|
+
stats = ""
|
151
|
+
@sample_ids.each_with_index do |id,i|
|
152
|
+
stats += id +"/t" + statistics[i].to_s + "/n"
|
153
|
+
end
|
154
|
+
stats += "unmatched/t" + statistics[-2].to_s + "/n"
|
155
|
+
stats += "total/t" + statistics[-1].to_s + "/n"
|
134
156
|
end
|
135
157
|
end
|
@@ -3,27 +3,25 @@ require "erubis"
|
|
3
3
|
class NugenTemplate
|
4
4
|
|
5
5
|
def initialize(fastq_multx, options)
|
6
|
-
@template =<<EOF
|
7
|
-
gunzip -c <%= @read %> | #{fastq_multx} \\
|
8
|
-
--bcfile <%= @barcodes %> #{options} <%= @options %> \\
|
9
|
-
--prefix <%= @lane_dir %>/<%= @direction %> \\
|
10
|
-
--suffix ".fq"
|
11
|
-
EOF
|
12
6
|
# @template =<<EOF
|
13
|
-
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
7
|
+
#gunzip -c <%= @read %> | #{fastq_multx} \\
|
8
|
+
# --bcfile <%= @barcodes %> #{options} <%= @options %> \\
|
9
|
+
# --prefix <%= @lane_dir %>/<%= @direction %> \\
|
10
|
+
# --suffix ".fq"
|
17
11
|
#EOF
|
12
|
+
@template =<<EOF
|
13
|
+
#{fastq_multx} #{options} <%= @barcodes %> \\
|
14
|
+
<(gunzip -c <%= @read %>) \\
|
15
|
+
-o <%= @lane_dir %>/<%= @direction %>.%.fq \\
|
16
|
+
>> <%= @lane_dir %>/nugen_demultiplexing.log
|
17
|
+
EOF
|
18
18
|
end
|
19
19
|
|
20
20
|
def fill(lane, number, lane_dir, barcodes, read, is_fwd)
|
21
21
|
if is_fwd
|
22
22
|
direction = "R1_#{number}_"
|
23
|
-
options = "--bol"
|
24
23
|
else
|
25
24
|
direction = "R2_#{number}_"
|
26
|
-
options = "--eol"
|
27
25
|
end
|
28
26
|
context = {
|
29
27
|
:lane => lane,
|
@@ -32,7 +30,7 @@ EOF
|
|
32
30
|
:barcodes => barcodes,
|
33
31
|
:read => read,
|
34
32
|
:direction => direction,
|
35
|
-
|
33
|
+
#:options => options
|
36
34
|
}
|
37
35
|
|
38
36
|
eruby = Erubis::Eruby.new(@template)
|
@@ -54,11 +54,12 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def test_merger
|
57
|
-
merger = Merger.new("test/fixtures/Sample_Lane5/
|
58
|
-
"test/fixtures/Sample_Lane5/
|
57
|
+
merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
|
58
|
+
"test/fixtures/Sample_Lane5/l.gz",
|
59
59
|
"test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
|
60
60
|
assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
61
|
-
|
61
|
+
stats = merger.merge()
|
62
|
+
assert_equal([28, 18, 26, 21, 7],stats)
|
62
63
|
#assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
|
63
64
|
end
|
64
65
|
end
|