nugen_barcode_splitter 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/nugen_barcode_splitter
CHANGED
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
|
|
5
5
|
require "fileutils"
|
6
6
|
|
7
7
|
usage =<<EOF
|
8
|
-
V E R S I O N - 0 . 0 .
|
8
|
+
V E R S I O N - 0 . 0 . 10
|
9
9
|
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
10
10
|
_____________________________________________________________________________
|
11
11
|
|
@@ -134,32 +134,47 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
134
134
|
rescue Exception => e
|
135
135
|
STDERR.puts e.message
|
136
136
|
end
|
137
|
-
|
137
|
+
# Fwd_read
|
138
|
+
cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, true)
|
138
139
|
if options[:debug]
|
139
140
|
STDERR.puts cmd if options[:debug]
|
140
141
|
else
|
141
142
|
status = system('bash', '-c', cmd)
|
142
|
-
raise "Calling the template did not succeed!" if !status
|
143
|
+
raise "Calling the template for fwd did not succeed!" if !status
|
143
144
|
end
|
144
|
-
|
145
|
-
|
146
|
-
# Add the trimmed bases
|
147
|
-
Dir.glob(outdir+"/*").each do |fwd|
|
148
|
-
if fwd =~ /nugen_demultiplexing.log/
|
149
|
-
sample_sheet.lanes
|
150
|
-
statistics = Statistics.new(fwd)
|
151
|
-
end
|
152
|
-
next unless fwd =~ /R1_[0-9]{3}./
|
153
|
-
cmd = "base_adder #{fwd} 4 #{fwd}_added"
|
145
|
+
cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
|
154
146
|
if options[:debug]
|
155
147
|
STDERR.puts cmd if options[:debug]
|
156
148
|
else
|
157
|
-
status = system(cmd)
|
158
|
-
raise "Calling
|
159
|
-
cmd = "mv #{fwd}_added #{fwd}"
|
160
|
-
status = system(cmd)
|
161
|
-
raise "Was not able to rename file #{fwd}!" if status!=true
|
149
|
+
status = system('bash', '-c', cmd)
|
150
|
+
raise "Calling the template for rev did not succeed!" if !status
|
162
151
|
end
|
152
|
+
# Merging
|
153
|
+
merger = Merger.new(fwd,rev,outdir,number,barcodes)
|
154
|
+
merger.merge()
|
163
155
|
end
|
164
156
|
|
157
|
+
# Add the trimmed bases
|
158
|
+
#Dir.glob(outdir+"/*").each do |fwd|
|
159
|
+
# if fwd =~ /nugen_demultiplexing.log/
|
160
|
+
# sample_sheet.lanes
|
161
|
+
# statistics = Statistics.new(fwd)
|
162
|
+
# stats_out = outdir + "/statistics"
|
163
|
+
# stats_handler = File.new(stats_out,'w')
|
164
|
+
# stats_handler.write(statistics.to_s)
|
165
|
+
# stats_handler.close()
|
166
|
+
# end
|
167
|
+
# next unless fwd =~ /R1_[0-9]{3}./
|
168
|
+
# cmd = "base_adder #{fwd} 4 #{fwd}_added"
|
169
|
+
# if options[:debug]
|
170
|
+
# STDERR.puts cmd if options[:debug]
|
171
|
+
# else
|
172
|
+
# status = system(cmd)
|
173
|
+
# raise "Calling base_adder did not succeed!" if !status
|
174
|
+
# cmd = "mv #{fwd}_added #{fwd}"
|
175
|
+
# status = system(cmd)
|
176
|
+
# raise "Was not able to rename file #{fwd}!" if status!=true
|
177
|
+
# end
|
178
|
+
#end
|
179
|
+
|
165
180
|
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require "zlib"
|
2
|
+
|
3
|
+
class Merger
|
4
|
+
def initialize(fwd,rev,outdir,number,barcodes)
|
5
|
+
# get sampleID
|
6
|
+
@values_fwd = []
|
7
|
+
@values_rev = []
|
8
|
+
@sample_ids = []
|
9
|
+
i = 0
|
10
|
+
File.open(barcodes).each do |line|
|
11
|
+
next if line.include?("#")
|
12
|
+
line = line.split(" ")
|
13
|
+
@sample_ids[i] = line[0]
|
14
|
+
i += 1
|
15
|
+
end
|
16
|
+
@fwd = fwd
|
17
|
+
@rev = rev
|
18
|
+
@outdir = outdir
|
19
|
+
@number = number
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_accessor :sample_ids, :values_fwd, :values_rev
|
23
|
+
|
24
|
+
#def prepare_hash()
|
25
|
+
# @sample_ids.each_with_index do |sample_id, i|
|
26
|
+
# a = Thread.new {
|
27
|
+
# filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
28
|
+
# filehandler.each do |line|
|
29
|
+
# next unless line.include?("@HWI-")
|
30
|
+
# line = line.split(" ")
|
31
|
+
# name = line[0].split(":")[4..-1].join(":")
|
32
|
+
# @values_fwd[i].store(name,filehandler.pos)
|
33
|
+
# end
|
34
|
+
# filehandler.close()
|
35
|
+
# }
|
36
|
+
# b = Thread.new {
|
37
|
+
# filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
38
|
+
# filehandler.each do |line|
|
39
|
+
# next unless line.include?("@HWI-")
|
40
|
+
# line = line.split(" ")
|
41
|
+
# name = line[0].split(":")[4..-1].join(":")
|
42
|
+
# @values_rev[i].store(name,filehandler.pos)
|
43
|
+
# end
|
44
|
+
# filehandler.close()
|
45
|
+
# }
|
46
|
+
# a.join
|
47
|
+
# b.join
|
48
|
+
# end
|
49
|
+
#end
|
50
|
+
|
51
|
+
def merge()
|
52
|
+
fwd_file = Zlib::GzipReader.open(@fwd)
|
53
|
+
rev_file = Zlib::GzipReader.open(@rev)
|
54
|
+
fwd_splitted_files = []
|
55
|
+
rev_splitted_files = []
|
56
|
+
fwd_out_files = []
|
57
|
+
rev_out_files = []
|
58
|
+
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched_updated.fq",'w')
|
59
|
+
rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched_updated.fq",'w')
|
60
|
+
@sample_ids.each_with_index do |sample_id, i|
|
61
|
+
fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
62
|
+
rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
63
|
+
#OUTFILES????
|
64
|
+
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}_updated.fq",'w')
|
65
|
+
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}_updated.fq",'w')
|
66
|
+
end
|
67
|
+
fwd_file.each do |fwd_line|
|
68
|
+
rev_line = rev_file.readline()
|
69
|
+
rev_name = rev_line.split(" ")
|
70
|
+
fwd_name = fwd_line.split(" ")
|
71
|
+
marker = true
|
72
|
+
|
73
|
+
@sample_ids.each_with_index do |sample_id, i|
|
74
|
+
if !fwd_splitted_files[i].eof?
|
75
|
+
compare_line_fwd = fwd_splitted_files[i].readline().split(" ")
|
76
|
+
if fwd_line[0] == compare_line_fwd[0] && marker
|
77
|
+
marker = false
|
78
|
+
fwd_out_files[i].write(fwd_line)
|
79
|
+
rev_out_files[i].write(rev_line)
|
80
|
+
for k in 1..3
|
81
|
+
fwd_file.readline()
|
82
|
+
rev_splitted_files[i].readline if marker2
|
83
|
+
compare_line_fwd = fwd_splitted_files[i].readline()
|
84
|
+
fwd_out_files[i].write("NNNN"+compare_line_fwd) if k == 1
|
85
|
+
fwd_out_files[i].write(compare_line_fwd) if k == 2
|
86
|
+
fwd_out_files[i].write("@@@@"+compare_line_fwd) if k == 3
|
87
|
+
rev_out_files[i].write(rev_file.readline())
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
if !marker && !rev_splitted_files[i].eof?
|
92
|
+
compare_line_rev = rev_splitted_files[i].readline().split(" ")
|
93
|
+
if compare_line_rev[0] == compare_line_fwd[0]
|
94
|
+
for k in 1..3
|
95
|
+
rev_splitted_files[i].readline()
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
break if !marker
|
100
|
+
if !rev_splitted_files[i].eof?
|
101
|
+
compare_line_rev = rev_splitted_files[i].readline().split(" ")
|
102
|
+
if rev_name[0] == compare_line_rev[0] && marker
|
103
|
+
marker = false
|
104
|
+
fwd_out_files[i].write(fwd_line)
|
105
|
+
rev_out_files[i].write(rev_line)
|
106
|
+
for k in 1..3
|
107
|
+
rev_file.readline()
|
108
|
+
compare_line_rev = rev_splitted_files[i].readline()
|
109
|
+
rev_out_files[i].write("NNNN"+compare_line_rev) if k == 1
|
110
|
+
rev_out_files[i].write(compare_line_rev) if k == 2
|
111
|
+
rev_out_files[i].write("@@@@"+compare_line_rev) if k == 3
|
112
|
+
fwd_out_files[i].write(fwd_file.readline())
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
break if !marker
|
117
|
+
end
|
118
|
+
|
119
|
+
if marker
|
120
|
+
fwd_out_unmatched.write(fwd_line)
|
121
|
+
rev_out_unmatched.write(rev_line)
|
122
|
+
for k in 1..3
|
123
|
+
rev_out_unmatched.write(rev_file.readline())
|
124
|
+
fwd_out_unmatched.write(fwd_file.readline())
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
@@ -4,25 +4,26 @@ class NugenTemplate
|
|
4
4
|
|
5
5
|
def initialize(fastq_multx, options)
|
6
6
|
@template =<<EOF
|
7
|
-
#{fastq_multx} #{options}
|
8
|
-
<(gunzip -c <%= @
|
9
|
-
|
10
|
-
-o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq \\
|
7
|
+
#{fastq_multx} #{options} <%= @barcodes %> \\
|
8
|
+
<(gunzip -c <%= @read %>) \\
|
9
|
+
-o <%= @lane_dir %>/<%= @direction %>.%.fq \\
|
11
10
|
>> <%= @lane_dir %>/nugen_demultiplexing.log
|
12
11
|
EOF
|
13
12
|
end
|
14
13
|
|
15
|
-
def fill(lane, number, lane_dir, barcodes,
|
16
|
-
|
14
|
+
def fill(lane, number, lane_dir, barcodes, read, is_fwd)
|
15
|
+
if is_fwd
|
16
|
+
direction = "R1_#{number}"
|
17
|
+
else
|
18
|
+
direction = "R2_#{number}"
|
19
|
+
end
|
17
20
|
context = {
|
18
21
|
:lane => lane,
|
19
22
|
:number => number,
|
20
23
|
:lane_dir => lane_dir,
|
21
24
|
:barcodes => barcodes,
|
22
|
-
:
|
23
|
-
:
|
24
|
-
:r1 => "R1_#{number}",
|
25
|
-
:r2 => "R2_#{number}"
|
25
|
+
:read => read,
|
26
|
+
:direction => direction
|
26
27
|
}
|
27
28
|
|
28
29
|
eruby = Erubis::Eruby.new(@template)
|
@@ -9,9 +9,9 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
9
9
|
def test_nugen_template
|
10
10
|
template = NugenTemplate.new("fastq-multx", "")
|
11
11
|
assert template.to_s.include?("fastq-multx")
|
12
|
-
assert template.to_s.include?("<%= @
|
13
|
-
temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd",
|
14
|
-
assert_equal(temp.to_s, "fastq-multx
|
12
|
+
assert template.to_s.include?("<%= @read %>")
|
13
|
+
temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", true)
|
14
|
+
assert_equal(temp.to_s, "fastq-multx bc \\\n <(gunzip -c fwd) \\\n -o ~/Lane3//R1_33.%.fq \\\n >> ~/Lane3//nugen_demultiplexing.log\n")
|
15
15
|
end
|
16
16
|
|
17
17
|
def test_fastq
|
@@ -52,4 +52,13 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
52
52
|
assert_equal(stats.num_reads[0], 8533927)
|
53
53
|
assert_equal(stats.num_unmatched, 2614681)
|
54
54
|
end
|
55
|
+
|
56
|
+
def test_merger
|
57
|
+
merger = Merger.new("test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R1_001.fastq.gz",
|
58
|
+
"test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R2_001.fastq.gz",
|
59
|
+
"test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
|
60
|
+
assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
61
|
+
#merger.merge
|
62
|
+
#assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
|
63
|
+
end
|
55
64
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nugen_barcode_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -25,6 +25,7 @@ files:
|
|
25
25
|
- bin/nugen_barcode_splitter
|
26
26
|
- lib/nugen_barcode_splitter.rb
|
27
27
|
- lib/nugen_barcode_splitter/fastq.rb
|
28
|
+
- lib/nugen_barcode_splitter/merger.rb
|
28
29
|
- lib/nugen_barcode_splitter/nugen_template.rb
|
29
30
|
- lib/nugen_barcode_splitter/sample_sheet.rb
|
30
31
|
- lib/nugen_barcode_splitter/statistics.rb
|