nugen_barcode_splitter 0.0.18 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
data/bin/nugen_barcode_splitter
CHANGED
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
|
|
5
5
|
require "fileutils"
|
6
6
|
|
7
7
|
usage =<<EOF
|
8
|
-
V E R S I O N -
|
8
|
+
V E R S I O N - 1 . 0 . 0
|
9
9
|
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
10
10
|
_____________________________________________________________________________
|
11
11
|
|
@@ -33,7 +33,7 @@ options = {
|
|
33
33
|
:bel_only? => "",
|
34
34
|
:mismatches => "",
|
35
35
|
:keep_barcode => "",
|
36
|
-
:
|
36
|
+
:fastx => "fastq_multx",
|
37
37
|
:debug => false,
|
38
38
|
:lane_number => ""
|
39
39
|
}
|
@@ -79,8 +79,8 @@ optparse = OptionParser.new do |opts|
|
|
79
79
|
options[:lane_number] = i if i
|
80
80
|
end
|
81
81
|
|
82
|
-
opts.on("-x", "--
|
83
|
-
options[:
|
82
|
+
opts.on("-x", "--fastx_barcode_splitter.pl DIR", String) do |i|
|
83
|
+
options[:fastx] = i if i
|
84
84
|
end
|
85
85
|
|
86
86
|
opts.on("-d", "--debug", "Debug mode!") do |i|
|
@@ -117,7 +117,7 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
|
|
117
117
|
|
118
118
|
# Prepare template
|
119
119
|
multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
|
120
|
-
nugen_temp = NugenTemplate.new(options[:
|
120
|
+
nugen_temp = NugenTemplate.new(options[:fastx],multx_opts)
|
121
121
|
|
122
122
|
Dir.glob(options[:project_dir]+"/*").each do |p|
|
123
123
|
next unless File.directory? p
|
@@ -147,24 +147,18 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
147
147
|
status = system('bash', '-c', cmd)
|
148
148
|
raise "Calling the template for fwd did not succeed!" if !status
|
149
149
|
end
|
150
|
-
|
151
|
-
#if options[:debug]
|
152
|
-
# STDERR.puts cmd if options[:debug]
|
153
|
-
#else
|
154
|
-
# status = system('bash', '-c', cmd)
|
155
|
-
# raise "Calling the template for rev did not succeed!" if !status
|
156
|
-
#end
|
150
|
+
|
157
151
|
# Merging
|
158
152
|
merger = Merger.new(fwd,rev,outdir,number,barcodes)
|
159
153
|
stats = merger.merge()
|
160
|
-
stats_file = File.open("#{outdir}/
|
154
|
+
stats_file = File.open("#{outdir}/nugen_demultiplexing_Lane#{lane}.log", 'a')
|
161
155
|
stats_file.write(stats)
|
162
156
|
stats_file.close()
|
163
157
|
end
|
164
158
|
|
165
159
|
# Add the trimmed bases
|
166
160
|
Dir.glob(outdir+"/*").each do |f|
|
167
|
-
if f =~ /nugen_demultiplexing
|
161
|
+
if f =~ /nugen_demultiplexing/
|
168
162
|
sample_sheet.lanes
|
169
163
|
statistics = Statistics.new(f)
|
170
164
|
stats_out = outdir + "/statistics"
|
@@ -189,6 +183,13 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
189
183
|
status = system(cmd)
|
190
184
|
raise "Was not able to rename and move file #{f}!" if status!=true
|
191
185
|
end
|
186
|
+
cmd = "gzip #{sample_dir}/#{name}"
|
187
|
+
if options[:debug]
|
188
|
+
STDERR.puts cmd if options[:debug]
|
189
|
+
else
|
190
|
+
status = system(cmd)
|
191
|
+
raise "Was not able to gzip file #{f}"
|
192
|
+
end
|
192
193
|
end
|
193
194
|
|
194
195
|
end
|
@@ -19,39 +19,11 @@ class Merger
|
|
19
19
|
|
20
20
|
attr_accessor :sample_ids
|
21
21
|
|
22
|
-
#def prepare_hash()
|
23
|
-
# @sample_ids.each_with_index do |sample_id, i|
|
24
|
-
# a = Thread.new {
|
25
|
-
# filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
26
|
-
# filehandler.each do |line|
|
27
|
-
# next unless line.include?("@HWI-")
|
28
|
-
# line = line.split(" ")
|
29
|
-
# name = line[0].split(":")[4..-1].join(":")
|
30
|
-
# @values_fwd[i].store(name,filehandler.pos)
|
31
|
-
# end
|
32
|
-
# filehandler.close()
|
33
|
-
# }
|
34
|
-
# b = Thread.new {
|
35
|
-
# filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
36
|
-
# filehandler.each do |line|
|
37
|
-
# next unless line.include?("@HWI-")
|
38
|
-
# line = line.split(" ")
|
39
|
-
# name = line[0].split(":")[4..-1].join(":")
|
40
|
-
# @values_rev[i].store(name,filehandler.pos)
|
41
|
-
# end
|
42
|
-
# filehandler.close()
|
43
|
-
# }
|
44
|
-
# a.join
|
45
|
-
# b.join
|
46
|
-
# end
|
47
|
-
#end
|
48
|
-
|
49
22
|
def merge()
|
50
23
|
statistics = Array.new(@sample_ids.length()+2,0)
|
51
24
|
fwd_file = Zlib::GzipReader.open(@fwd)
|
52
25
|
rev_file = Zlib::GzipReader.open(@rev)
|
53
26
|
fwd_splitted_files = []
|
54
|
-
rev_splitted_files = []
|
55
27
|
fwd_out_files = []
|
56
28
|
rev_out_files = []
|
57
29
|
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
|
@@ -59,8 +31,6 @@ class Merger
|
|
59
31
|
|
60
32
|
@sample_ids.each_with_index do |sample_id, i|
|
61
33
|
fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
62
|
-
#rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
63
|
-
#OUTFILES????
|
64
34
|
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
|
65
35
|
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
|
66
36
|
end
|
@@ -72,7 +42,6 @@ class Merger
|
|
72
42
|
fwd_name = fwd_line.split(" ")
|
73
43
|
marker = true
|
74
44
|
|
75
|
-
|
76
45
|
@sample_ids.each_with_index do |sample_id, i|
|
77
46
|
if !fwd_splitted_files[i].eof? && marker
|
78
47
|
compare_line_fwd = fwd_splitted_files[i].readline()
|
@@ -98,47 +67,6 @@ class Merger
|
|
98
67
|
end
|
99
68
|
end
|
100
69
|
|
101
|
-
# if !marker && !rev_splitted_files[i].eof?
|
102
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
103
|
-
# name_compare_rev = compare_line_rev.split(" ")
|
104
|
-
# if name_compare_rev[0] == name_compare_fwd[0]
|
105
|
-
# for k in 1..3
|
106
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
107
|
-
# end
|
108
|
-
# else
|
109
|
-
# #puts compare_line_rev
|
110
|
-
# rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
111
|
-
# end
|
112
|
-
# end
|
113
|
-
# break if !marker
|
114
|
-
# if !rev_splitted_files[i].eof? && marker
|
115
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
116
|
-
# name_compare_rev = compare_line_rev.split(" ")
|
117
|
-
# puts "REV: " + compare_line_rev if i == 2
|
118
|
-
# #puts name_compare_rev[0]
|
119
|
-
# if rev_name[0] == name_compare_rev[0]
|
120
|
-
# marker = false
|
121
|
-
# statistics[i] += 1
|
122
|
-
# fwd_out_files[i].write(fwd_line)
|
123
|
-
# rev_out_files[i].write(rev_line)
|
124
|
-
# for k in 1..3
|
125
|
-
# rev_file.readline()
|
126
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
127
|
-
# rev_out_files[i].write(compare_line_rev.gsub(/[A-Z]{4}$/,"NNNN")) if k == 1
|
128
|
-
# rev_out_files[i].write(compare_line_rev) if k == 2
|
129
|
-
# rev_out_files[i].write(compare_line_rev.gsub(/[\S]{4}$/,"@@@@")) if k == 3
|
130
|
-
# fwd_out_files[i].write(fwd_file.readline())
|
131
|
-
# end
|
132
|
-
# fwd_file.lineno = fwd_file.lineno - 1
|
133
|
-
# rev_file.lineno = rev_file.lineno - 1
|
134
|
-
# else
|
135
|
-
# rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
136
|
-
# end
|
137
|
-
# end
|
138
|
-
# break if !marker
|
139
|
-
#end
|
140
|
-
|
141
|
-
|
142
70
|
if marker
|
143
71
|
statistics[-2] += 1
|
144
72
|
fwd_out_unmatched.write(fwd_line)
|
@@ -149,6 +77,10 @@ class Merger
|
|
149
77
|
end
|
150
78
|
end
|
151
79
|
end
|
80
|
+
stats = make_stats(statistics)
|
81
|
+
end
|
82
|
+
|
83
|
+
def make_stats(statistics)
|
152
84
|
stats = ""
|
153
85
|
@sample_ids.each_with_index do |id,i|
|
154
86
|
stats += id +"\t" + statistics[i].to_s + "\n"
|
@@ -156,4 +88,8 @@ class Merger
|
|
156
88
|
stats += "unmatched\t" + statistics[-2].to_s + "\n"
|
157
89
|
stats += "total\t" + statistics[-1].to_s + "\n"
|
158
90
|
end
|
159
|
-
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
@@ -42,8 +42,8 @@ class Statistics
|
|
42
42
|
@barcodes.each_with_index do |code, i|
|
43
43
|
str += "#{code}:\t#{@num_reads[i]} \n"
|
44
44
|
end
|
45
|
-
|
46
|
-
|
45
|
+
percent = (100 / @total.to_f) * @num_unmatched.to_f
|
46
|
+
percent = (percent.to_f * 100).round / 100.to_f
|
47
47
|
str += "Unmatched:\t#{@num_unmatched}\n"
|
48
48
|
str += "Total:\t#{@total}"
|
49
49
|
str.to_s
|
@@ -40,9 +40,7 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
40
40
|
samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
|
41
41
|
assert_equal(samplesheet.barcodes[3], "TTAG")
|
42
42
|
assert_equal(samplesheet.sample_id[4], "RX3")
|
43
|
-
|
44
43
|
samplesheet.create_barcode_txt("test/fixtures/barcode")
|
45
|
-
|
46
44
|
end
|
47
45
|
|
48
46
|
def test_statistics
|
@@ -54,17 +52,11 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
54
52
|
end
|
55
53
|
|
56
54
|
def test_merger
|
57
|
-
#merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
|
58
|
-
# "test/fixtures/Sample_Lane5/l.gz",
|
59
|
-
# "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
|
60
|
-
#assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
61
|
-
#stats = merger.merge()
|
62
|
-
#assert_equal("RX3\t18\nRX4\t7\nRX3X2\t16\nRX4X2\t8\nunmatched\t1\ntotal\t50\n",stats)
|
63
55
|
merger = Merger.new("test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R1_019.fastq.gz",
|
64
56
|
"test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R2_019.fastq.gz",
|
65
57
|
"test/fixtures/Sample_Lane8", "019", "test/fixtures/barcode_8.txt")
|
58
|
+
assert_equal(["RX9", "RX10", "RX9X2", "RX10X2"], merger.sample_ids)
|
66
59
|
stats = merger.merge()
|
67
|
-
#assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
|
68
60
|
assert_equal("RX9\t22464\nRX10\t28699\nRX9X2\t26434\nRX10X2\t22994\nunmatched\t15445\ntotal\t116036\n",stats)
|
69
61
|
end
|
70
62
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nugen_barcode_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,17 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-24 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "This gem is designed to demultiplex reads\n produced
|
15
15
|
by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
|
16
16
|
email:
|
17
17
|
- katharinaehayer@gmail.com
|
18
18
|
executables:
|
19
|
-
- base_adder
|
20
19
|
- nugen_barcode_splitter
|
21
20
|
extensions: []
|
22
21
|
extra_rdoc_files: []
|
23
22
|
files:
|
24
|
-
- bin/base_adder
|
25
23
|
- bin/nugen_barcode_splitter
|
26
24
|
- lib/nugen_barcode_splitter.rb
|
27
25
|
- lib/nugen_barcode_splitter/fastq.rb
|
data/bin/base_adder
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require "nugen_barcode_splitter"
|
3
|
-
|
4
|
-
usage =<<EOF
|
5
|
-
#{$0} file.fq number_of_bases out.fq
|
6
|
-
|
7
|
-
-_-_-_-_- #{$0} -_-_-_-_-
|
8
|
-
|
9
|
-
Adds N^P bases to the beginning of the
|
10
|
-
fastq file. The Quality is set on the
|
11
|
-
lowest value.
|
12
|
-
EOF
|
13
|
-
|
14
|
-
file_dir = ARGV[0]
|
15
|
-
number_of_bases = ARGV[1].to_i
|
16
|
-
out_dir = ARGV[2]
|
17
|
-
|
18
|
-
fastq = Fastq.new("#{file_dir}")
|
19
|
-
fastq.add(number_of_bases, out_dir)
|
20
|
-
fastq.close
|
21
|
-
|
22
|
-
|