nugen_barcode_splitter 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/nugen_barcode_splitter
CHANGED
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
|
|
5
5
|
require "fileutils"
|
6
6
|
|
7
7
|
usage =<<EOF
|
8
|
-
V E R S I O N - 0 . 0 .
|
8
|
+
V E R S I O N - 0 . 0 . 12 - BETA
|
9
9
|
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
10
10
|
_____________________________________________________________________________
|
11
11
|
|
@@ -114,6 +114,8 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
|
|
114
114
|
multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
|
115
115
|
nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
|
116
116
|
|
117
|
+
statistics = Statistics.new()
|
118
|
+
|
117
119
|
Dir.glob(options[:project_dir]+"/*").each do |p|
|
118
120
|
next unless File.directory? p
|
119
121
|
next unless p =~ /Sample_Lane/
|
@@ -151,30 +153,39 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
151
153
|
end
|
152
154
|
# Merging
|
153
155
|
merger = Merger.new(fwd,rev,outdir,number,barcodes)
|
154
|
-
merger.merge()
|
156
|
+
stats = merger.merge()
|
157
|
+
stats_file = File.open("#{outdir}/nugen_demultiplexing.log", 'a')
|
158
|
+
stats_file.puts(stats)
|
159
|
+
stats_file.close()
|
155
160
|
end
|
156
161
|
|
157
162
|
# Add the trimmed bases
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
163
|
+
Dir.glob(outdir+"/*").each do |f|
|
164
|
+
if f =~ /nugen_demultiplexing.log/
|
165
|
+
sample_sheet.lanes
|
166
|
+
statistics = Statistics.new(f)
|
167
|
+
stats_out = outdir + "/statistics"
|
168
|
+
stats_handler = File.new(stats_out,'w')
|
169
|
+
stats_handler.write(statistics.to_s)
|
170
|
+
stats_handler.close()
|
171
|
+
end
|
172
|
+
next unless f =~ /_updated/
|
173
|
+
name = f.split("/")[-1]
|
174
|
+
id = name.split(".")[1]
|
175
|
+
sample_dir = outdir + "/Sample_#{id}"
|
176
|
+
begin
|
177
|
+
FileUtils.mkdir_p sample_dir unless File.directory? sample_dir
|
178
|
+
rescue Exception => e
|
179
|
+
STDERR.puts e.message
|
180
|
+
end
|
181
|
+
name = name.gsub(/_updated/, "")
|
182
|
+
cmd = `mv #{f} #{sample_dir}/name`
|
183
|
+
if options[:debug]
|
184
|
+
STDERR.puts cmd if options[:debug]
|
185
|
+
else
|
186
|
+
status = system(cmd)
|
187
|
+
raise "Was not able to rename and move file #{f}!" if status!=true
|
188
|
+
end
|
189
|
+
end
|
179
190
|
|
180
191
|
end
|
@@ -49,22 +49,26 @@ class Merger
|
|
49
49
|
#end
|
50
50
|
|
51
51
|
def merge()
|
52
|
+
statistics = Array.new(@sample_ids.length()+2,0)
|
53
|
+
|
52
54
|
fwd_file = Zlib::GzipReader.open(@fwd)
|
53
55
|
rev_file = Zlib::GzipReader.open(@rev)
|
54
56
|
fwd_splitted_files = []
|
55
57
|
rev_splitted_files = []
|
56
58
|
fwd_out_files = []
|
57
59
|
rev_out_files = []
|
58
|
-
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.
|
59
|
-
rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.
|
60
|
+
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
|
61
|
+
rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched.updated.fq",'w')
|
60
62
|
@sample_ids.each_with_index do |sample_id, i|
|
61
63
|
fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
62
64
|
rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
63
65
|
#OUTFILES????
|
64
|
-
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}
|
65
|
-
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}
|
66
|
+
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
|
67
|
+
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
|
66
68
|
end
|
69
|
+
|
67
70
|
fwd_file.each do |fwd_line|
|
71
|
+
statistics[-1] += 1
|
68
72
|
rev_line = rev_file.readline()
|
69
73
|
rev_name = rev_line.split(" ")
|
70
74
|
fwd_name = fwd_line.split(" ")
|
@@ -72,9 +76,11 @@ class Merger
|
|
72
76
|
|
73
77
|
@sample_ids.each_with_index do |sample_id, i|
|
74
78
|
if !fwd_splitted_files[i].eof?
|
75
|
-
compare_line_fwd = fwd_splitted_files[i].readline()
|
76
|
-
|
79
|
+
compare_line_fwd = fwd_splitted_files[i].readline()
|
80
|
+
name_compare_fwd = compare_line_fwd.split(" ")
|
81
|
+
if fwd_line[0] == name_compare_fwd[0] && marker
|
77
82
|
marker = false
|
83
|
+
statistics[i] += 1
|
78
84
|
fwd_out_files[i].write(fwd_line)
|
79
85
|
rev_out_files[i].write(rev_line)
|
80
86
|
for k in 1..3
|
@@ -87,21 +93,31 @@ class Merger
|
|
87
93
|
end
|
88
94
|
fwd_file.lineno = fwd_file.lineno - 1
|
89
95
|
rev_file.lineno = rev_file.lineno - 1
|
96
|
+
else
|
97
|
+
#puts compare_line_fwd
|
98
|
+
fwd_splitted_files[i].pos = fwd_splitted_files[i].pos - compare_line_fwd.length()
|
90
99
|
end
|
91
100
|
end
|
101
|
+
|
92
102
|
if !marker && !rev_splitted_files[i].eof?
|
93
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
94
|
-
|
103
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
104
|
+
name_compare_rev = compare_line_rev.split(" ")
|
105
|
+
if name_compare_rev[0] == name_compare_fwd[0]
|
95
106
|
for k in 1..3
|
96
|
-
rev_splitted_files[i].readline()
|
107
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
97
108
|
end
|
109
|
+
else
|
110
|
+
#puts compare_line_rev
|
111
|
+
rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
98
112
|
end
|
99
113
|
end
|
100
114
|
break if !marker
|
101
115
|
if !rev_splitted_files[i].eof?
|
102
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
103
|
-
|
116
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
117
|
+
name_compare_rev = compare_line_rev.split(" ")
|
118
|
+
if rev_name[0] == name_compare_rev[0] && marker
|
104
119
|
marker = false
|
120
|
+
statistics[i] += 1
|
105
121
|
fwd_out_files[i].write(fwd_line)
|
106
122
|
rev_out_files[i].write(rev_line)
|
107
123
|
for k in 1..3
|
@@ -114,12 +130,15 @@ class Merger
|
|
114
130
|
end
|
115
131
|
fwd_file.lineno = fwd_file.lineno - 1
|
116
132
|
rev_file.lineno = rev_file.lineno - 1
|
133
|
+
else
|
134
|
+
rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
117
135
|
end
|
118
136
|
end
|
119
137
|
break if !marker
|
120
138
|
end
|
121
139
|
|
122
140
|
if marker
|
141
|
+
statistics[-2] += 1
|
123
142
|
fwd_out_unmatched.write(fwd_line)
|
124
143
|
rev_out_unmatched.write(rev_line)
|
125
144
|
for k in 1..3
|
@@ -127,9 +146,12 @@ class Merger
|
|
127
146
|
fwd_out_unmatched.write(fwd_file.readline())
|
128
147
|
end
|
129
148
|
end
|
130
|
-
|
131
149
|
end
|
132
|
-
|
133
|
-
|
150
|
+
stats = ""
|
151
|
+
@sample_ids.each_with_index do |id,i|
|
152
|
+
stats += id +"/t" + statistics[i].to_s + "/n"
|
153
|
+
end
|
154
|
+
stats += "unmatched/t" + statistics[-2].to_s + "/n"
|
155
|
+
stats += "total/t" + statistics[-1].to_s + "/n"
|
134
156
|
end
|
135
157
|
end
|
@@ -3,27 +3,25 @@ require "erubis"
|
|
3
3
|
class NugenTemplate
|
4
4
|
|
5
5
|
def initialize(fastq_multx, options)
|
6
|
-
@template =<<EOF
|
7
|
-
gunzip -c <%= @read %> | #{fastq_multx} \\
|
8
|
-
--bcfile <%= @barcodes %> #{options} <%= @options %> \\
|
9
|
-
--prefix <%= @lane_dir %>/<%= @direction %> \\
|
10
|
-
--suffix ".fq"
|
11
|
-
EOF
|
12
6
|
# @template =<<EOF
|
13
|
-
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
7
|
+
#gunzip -c <%= @read %> | #{fastq_multx} \\
|
8
|
+
# --bcfile <%= @barcodes %> #{options} <%= @options %> \\
|
9
|
+
# --prefix <%= @lane_dir %>/<%= @direction %> \\
|
10
|
+
# --suffix ".fq"
|
17
11
|
#EOF
|
12
|
+
@template =<<EOF
|
13
|
+
#{fastq_multx} #{options} <%= @barcodes %> \\
|
14
|
+
<(gunzip -c <%= @read %>) \\
|
15
|
+
-o <%= @lane_dir %>/<%= @direction %>.%.fq \\
|
16
|
+
>> <%= @lane_dir %>/nugen_demultiplexing.log
|
17
|
+
EOF
|
18
18
|
end
|
19
19
|
|
20
20
|
def fill(lane, number, lane_dir, barcodes, read, is_fwd)
|
21
21
|
if is_fwd
|
22
22
|
direction = "R1_#{number}_"
|
23
|
-
options = "--bol"
|
24
23
|
else
|
25
24
|
direction = "R2_#{number}_"
|
26
|
-
options = "--eol"
|
27
25
|
end
|
28
26
|
context = {
|
29
27
|
:lane => lane,
|
@@ -32,7 +30,7 @@ EOF
|
|
32
30
|
:barcodes => barcodes,
|
33
31
|
:read => read,
|
34
32
|
:direction => direction,
|
35
|
-
|
33
|
+
#:options => options
|
36
34
|
}
|
37
35
|
|
38
36
|
eruby = Erubis::Eruby.new(@template)
|
@@ -54,11 +54,12 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def test_merger
|
57
|
-
merger = Merger.new("test/fixtures/Sample_Lane5/
|
58
|
-
"test/fixtures/Sample_Lane5/
|
57
|
+
merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
|
58
|
+
"test/fixtures/Sample_Lane5/l.gz",
|
59
59
|
"test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
|
60
60
|
assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
61
|
-
|
61
|
+
stats = merger.merge()
|
62
|
+
assert_equal([28, 18, 26, 21, 7],stats)
|
62
63
|
#assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
|
63
64
|
end
|
64
65
|
end
|