nugen_barcode_splitter 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/nugen_barcode_splitter
CHANGED
|
@@ -147,13 +147,13 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
|
147
147
|
status = system('bash', '-c', cmd)
|
|
148
148
|
raise "Calling the template for fwd did not succeed!" if !status
|
|
149
149
|
end
|
|
150
|
-
cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
|
|
151
|
-
if options[:debug]
|
|
152
|
-
|
|
153
|
-
else
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
end
|
|
150
|
+
#cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
|
|
151
|
+
#if options[:debug]
|
|
152
|
+
# STDERR.puts cmd if options[:debug]
|
|
153
|
+
#else
|
|
154
|
+
# status = system('bash', '-c', cmd)
|
|
155
|
+
# raise "Calling the template for rev did not succeed!" if !status
|
|
156
|
+
#end
|
|
157
157
|
# Merging
|
|
158
158
|
merger = Merger.new(fwd,rev,outdir,number,barcodes)
|
|
159
159
|
stats = merger.merge()
|
|
@@ -3,8 +3,6 @@ require "zlib"
|
|
|
3
3
|
class Merger
|
|
4
4
|
def initialize(fwd,rev,outdir,number,barcodes)
|
|
5
5
|
# get sampleID
|
|
6
|
-
@values_fwd = []
|
|
7
|
-
@values_rev = []
|
|
8
6
|
@sample_ids = []
|
|
9
7
|
i = 0
|
|
10
8
|
File.open(barcodes).each do |line|
|
|
@@ -19,7 +17,7 @@ class Merger
|
|
|
19
17
|
@number = number
|
|
20
18
|
end
|
|
21
19
|
|
|
22
|
-
attr_accessor :sample_ids
|
|
20
|
+
attr_accessor :sample_ids
|
|
23
21
|
|
|
24
22
|
#def prepare_hash()
|
|
25
23
|
# @sample_ids.each_with_index do |sample_id, i|
|
|
@@ -61,7 +59,7 @@ class Merger
|
|
|
61
59
|
|
|
62
60
|
@sample_ids.each_with_index do |sample_id, i|
|
|
63
61
|
fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
|
64
|
-
rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
|
62
|
+
#rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
|
65
63
|
#OUTFILES????
|
|
66
64
|
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
|
|
67
65
|
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
|
|
@@ -74,11 +72,12 @@ class Merger
|
|
|
74
72
|
fwd_name = fwd_line.split(" ")
|
|
75
73
|
marker = true
|
|
76
74
|
|
|
75
|
+
|
|
77
76
|
@sample_ids.each_with_index do |sample_id, i|
|
|
78
|
-
if !fwd_splitted_files[i].eof?
|
|
77
|
+
if !fwd_splitted_files[i].eof? && marker
|
|
79
78
|
compare_line_fwd = fwd_splitted_files[i].readline()
|
|
80
79
|
name_compare_fwd = compare_line_fwd.split(" ")
|
|
81
|
-
if fwd_name[0] == name_compare_fwd[0]
|
|
80
|
+
if fwd_name[0] == name_compare_fwd[0]
|
|
82
81
|
marker = false
|
|
83
82
|
statistics[i] += 1
|
|
84
83
|
fwd_out_files[i].write(fwd_line)
|
|
@@ -94,49 +93,52 @@ class Merger
|
|
|
94
93
|
fwd_file.lineno = fwd_file.lineno - 1
|
|
95
94
|
rev_file.lineno = rev_file.lineno - 1
|
|
96
95
|
else
|
|
97
|
-
#puts compare_line_fwd
|
|
98
96
|
fwd_splitted_files[i].pos = fwd_splitted_files[i].pos - compare_line_fwd.length()
|
|
99
97
|
end
|
|
100
98
|
end
|
|
101
|
-
|
|
102
|
-
if !marker && !rev_splitted_files[i].eof?
|
|
103
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
|
104
|
-
name_compare_rev = compare_line_rev.split(" ")
|
|
105
|
-
if name_compare_rev[0] == name_compare_fwd[0]
|
|
106
|
-
for k in 1..3
|
|
107
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
|
108
|
-
end
|
|
109
|
-
else
|
|
110
|
-
#puts compare_line_rev
|
|
111
|
-
rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
break if !marker
|
|
115
|
-
if !rev_splitted_files[i].eof?
|
|
116
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
|
117
|
-
name_compare_rev = compare_line_rev.split(" ")
|
|
118
|
-
if rev_name[0] == name_compare_rev[0] && marker
|
|
119
|
-
marker = false
|
|
120
|
-
statistics[i] += 1
|
|
121
|
-
fwd_out_files[i].write(fwd_line)
|
|
122
|
-
rev_out_files[i].write(rev_line)
|
|
123
|
-
for k in 1..3
|
|
124
|
-
rev_file.readline()
|
|
125
|
-
compare_line_rev = rev_splitted_files[i].readline()
|
|
126
|
-
rev_out_files[i].write(compare_line_rev.gsub(/[A-Z]{4}$/,"NNNN")) if k == 1
|
|
127
|
-
rev_out_files[i].write(compare_line_rev) if k == 2
|
|
128
|
-
rev_out_files[i].write(compare_line_rev.gsub(/[\S]{4}$/,"@@@@")) if k == 3
|
|
129
|
-
fwd_out_files[i].write(fwd_file.readline())
|
|
130
|
-
end
|
|
131
|
-
fwd_file.lineno = fwd_file.lineno - 1
|
|
132
|
-
rev_file.lineno = rev_file.lineno - 1
|
|
133
|
-
else
|
|
134
|
-
rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
|
135
|
-
end
|
|
136
|
-
end
|
|
137
|
-
break if !marker
|
|
138
99
|
end
|
|
139
100
|
|
|
101
|
+
# if !marker && !rev_splitted_files[i].eof?
|
|
102
|
+
# compare_line_rev = rev_splitted_files[i].readline()
|
|
103
|
+
# name_compare_rev = compare_line_rev.split(" ")
|
|
104
|
+
# if name_compare_rev[0] == name_compare_fwd[0]
|
|
105
|
+
# for k in 1..3
|
|
106
|
+
# compare_line_rev = rev_splitted_files[i].readline()
|
|
107
|
+
# end
|
|
108
|
+
# else
|
|
109
|
+
# #puts compare_line_rev
|
|
110
|
+
# rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
|
111
|
+
# end
|
|
112
|
+
# end
|
|
113
|
+
# break if !marker
|
|
114
|
+
# if !rev_splitted_files[i].eof? && marker
|
|
115
|
+
# compare_line_rev = rev_splitted_files[i].readline()
|
|
116
|
+
# name_compare_rev = compare_line_rev.split(" ")
|
|
117
|
+
# puts "REV: " + compare_line_rev if i == 2
|
|
118
|
+
# #puts name_compare_rev[0]
|
|
119
|
+
# if rev_name[0] == name_compare_rev[0]
|
|
120
|
+
# marker = false
|
|
121
|
+
# statistics[i] += 1
|
|
122
|
+
# fwd_out_files[i].write(fwd_line)
|
|
123
|
+
# rev_out_files[i].write(rev_line)
|
|
124
|
+
# for k in 1..3
|
|
125
|
+
# rev_file.readline()
|
|
126
|
+
# compare_line_rev = rev_splitted_files[i].readline()
|
|
127
|
+
# rev_out_files[i].write(compare_line_rev.gsub(/[A-Z]{4}$/,"NNNN")) if k == 1
|
|
128
|
+
# rev_out_files[i].write(compare_line_rev) if k == 2
|
|
129
|
+
# rev_out_files[i].write(compare_line_rev.gsub(/[\S]{4}$/,"@@@@")) if k == 3
|
|
130
|
+
# fwd_out_files[i].write(fwd_file.readline())
|
|
131
|
+
# end
|
|
132
|
+
# fwd_file.lineno = fwd_file.lineno - 1
|
|
133
|
+
# rev_file.lineno = rev_file.lineno - 1
|
|
134
|
+
# else
|
|
135
|
+
# rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
|
136
|
+
# end
|
|
137
|
+
# end
|
|
138
|
+
# break if !marker
|
|
139
|
+
#end
|
|
140
|
+
|
|
141
|
+
|
|
140
142
|
if marker
|
|
141
143
|
statistics[-2] += 1
|
|
142
144
|
fwd_out_unmatched.write(fwd_line)
|
|
@@ -11,7 +11,7 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
|
11
11
|
assert template.to_s.include?("fastq-multx")
|
|
12
12
|
assert template.to_s.include?("<%= @read %>")
|
|
13
13
|
temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", true)
|
|
14
|
-
assert_equal(temp.to_s, "fastq-multx
|
|
14
|
+
assert_equal(temp.to_s, "gunzip -c fwd | fastq-multx \\\n --bcfile bc --bol \\\n --prefix ~/Lane3//R1_33. \\\n --suffix \".fq\"\n")
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def test_fastq
|
|
@@ -54,12 +54,17 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
|
54
54
|
end
|
|
55
55
|
|
|
56
56
|
def test_merger
|
|
57
|
-
merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
|
57
|
+
#merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
|
|
58
|
+
# "test/fixtures/Sample_Lane5/l.gz",
|
|
59
|
+
# "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
|
|
60
|
+
#assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
|
61
|
+
#stats = merger.merge()
|
|
62
|
+
#assert_equal("RX3\t18\nRX4\t7\nRX3X2\t16\nRX4X2\t8\nunmatched\t1\ntotal\t50\n",stats)
|
|
63
|
+
merger = Merger.new("test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R1_019.fastq.gz",
|
|
64
|
+
"test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R2_019.fastq.gz",
|
|
65
|
+
"test/fixtures/Sample_Lane8", "019", "test/fixtures/barcode_8.txt")
|
|
61
66
|
stats = merger.merge()
|
|
62
|
-
assert_equal([28, 18, 26, 21, 7],stats)
|
|
63
67
|
#assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
|
|
68
|
+
assert_equal("RX9\t22464\nRX10\t28699\nRX9X2\t26434\nRX10X2\t22994\nunmatched\t15445\ntotal\t116036\n",stats)
|
|
64
69
|
end
|
|
65
70
|
end
|