nugen_barcode_splitter 0.0.18 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/nugen_barcode_splitter
CHANGED
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
|
|
5
5
|
require "fileutils"
|
6
6
|
|
7
7
|
usage =<<EOF
|
8
|
-
V E R S I O N -
|
8
|
+
V E R S I O N - 1 . 0 . 0
|
9
9
|
#{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
|
10
10
|
_____________________________________________________________________________
|
11
11
|
|
@@ -33,7 +33,7 @@ options = {
|
|
33
33
|
:bel_only? => "",
|
34
34
|
:mismatches => "",
|
35
35
|
:keep_barcode => "",
|
36
|
-
:
|
36
|
+
:fastx => "fastq_multx",
|
37
37
|
:debug => false,
|
38
38
|
:lane_number => ""
|
39
39
|
}
|
@@ -79,8 +79,8 @@ optparse = OptionParser.new do |opts|
|
|
79
79
|
options[:lane_number] = i if i
|
80
80
|
end
|
81
81
|
|
82
|
-
opts.on("-x", "--
|
83
|
-
options[:
|
82
|
+
opts.on("-x", "--fastx_barcode_splitter.pl DIR", String) do |i|
|
83
|
+
options[:fastx] = i if i
|
84
84
|
end
|
85
85
|
|
86
86
|
opts.on("-d", "--debug", "Debug mode!") do |i|
|
@@ -117,7 +117,7 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
|
|
117
117
|
|
118
118
|
# Prepare template
|
119
119
|
multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
|
120
|
-
nugen_temp = NugenTemplate.new(options[:
|
120
|
+
nugen_temp = NugenTemplate.new(options[:fastx],multx_opts)
|
121
121
|
|
122
122
|
Dir.glob(options[:project_dir]+"/*").each do |p|
|
123
123
|
next unless File.directory? p
|
@@ -147,24 +147,18 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
147
147
|
status = system('bash', '-c', cmd)
|
148
148
|
raise "Calling the template for fwd did not succeed!" if !status
|
149
149
|
end
|
150
|
-
|
151
|
-
#if options[:debug]
|
152
|
-
# STDERR.puts cmd if options[:debug]
|
153
|
-
#else
|
154
|
-
# status = system('bash', '-c', cmd)
|
155
|
-
# raise "Calling the template for rev did not succeed!" if !status
|
156
|
-
#end
|
150
|
+
|
157
151
|
# Merging
|
158
152
|
merger = Merger.new(fwd,rev,outdir,number,barcodes)
|
159
153
|
stats = merger.merge()
|
160
|
-
stats_file = File.open("#{outdir}/
|
154
|
+
stats_file = File.open("#{outdir}/nugen_demultiplexing_Lane#{lane}.log", 'a')
|
161
155
|
stats_file.write(stats)
|
162
156
|
stats_file.close()
|
163
157
|
end
|
164
158
|
|
165
159
|
# Add the trimmed bases
|
166
160
|
Dir.glob(outdir+"/*").each do |f|
|
167
|
-
if f =~ /nugen_demultiplexing
|
161
|
+
if f =~ /nugen_demultiplexing/
|
168
162
|
sample_sheet.lanes
|
169
163
|
statistics = Statistics.new(f)
|
170
164
|
stats_out = outdir + "/statistics"
|
@@ -189,6 +183,13 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
|
|
189
183
|
status = system(cmd)
|
190
184
|
raise "Was not able to rename and move file #{f}!" if status!=true
|
191
185
|
end
|
186
|
+
cmd = "gzip #{sample_dir}/#{name}"
|
187
|
+
if options[:debug]
|
188
|
+
STDERR.puts cmd if options[:debug]
|
189
|
+
else
|
190
|
+
status = system(cmd)
|
191
|
+
raise "Was not able to gzip file #{f}"
|
192
|
+
end
|
192
193
|
end
|
193
194
|
|
194
195
|
end
|
@@ -19,39 +19,11 @@ class Merger
|
|
19
19
|
|
20
20
|
attr_accessor :sample_ids
|
21
21
|
|
22
|
-
#def prepare_hash()
|
23
|
-
# @sample_ids.each_with_index do |sample_id, i|
|
24
|
-
# a = Thread.new {
|
25
|
-
# filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
26
|
-
# filehandler.each do |line|
|
27
|
-
# next unless line.include?("@HWI-")
|
28
|
-
# line = line.split(" ")
|
29
|
-
# name = line[0].split(":")[4..-1].join(":")
|
30
|
-
# @values_fwd[i].store(name,filehandler.pos)
|
31
|
-
# end
|
32
|
-
# filehandler.close()
|
33
|
-
# }
|
34
|
-
# b = Thread.new {
|
35
|
-
# filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
36
|
-
# filehandler.each do |line|
|
37
|
-
# next unless line.include?("@HWI-")
|
38
|
-
# line = line.split(" ")
|
39
|
-
# name = line[0].split(":")[4..-1].join(":")
|
40
|
-
# @values_rev[i].store(name,filehandler.pos)
|
41
|
-
# end
|
42
|
-
# filehandler.close()
|
43
|
-
# }
|
44
|
-
# a.join
|
45
|
-
# b.join
|
46
|
-
# end
|
47
|
-
#end
|
48
|
-
|
49
22
|
def merge()
|
50
23
|
statistics = Array.new(@sample_ids.length()+2,0)
|
51
24
|
fwd_file = Zlib::GzipReader.open(@fwd)
|
52
25
|
rev_file = Zlib::GzipReader.open(@rev)
|
53
26
|
fwd_splitted_files = []
|
54
|
-
rev_splitted_files = []
|
55
27
|
fwd_out_files = []
|
56
28
|
rev_out_files = []
|
57
29
|
fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
|
@@ -59,8 +31,6 @@ class Merger
|
|
59
31
|
|
60
32
|
@sample_ids.each_with_index do |sample_id, i|
|
61
33
|
fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
|
62
|
-
#rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
|
63
|
-
#OUTFILES????
|
64
34
|
fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
|
65
35
|
rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
|
66
36
|
end
|
@@ -72,7 +42,6 @@ class Merger
|
|
72
42
|
fwd_name = fwd_line.split(" ")
|
73
43
|
marker = true
|
74
44
|
|
75
|
-
|
76
45
|
@sample_ids.each_with_index do |sample_id, i|
|
77
46
|
if !fwd_splitted_files[i].eof? && marker
|
78
47
|
compare_line_fwd = fwd_splitted_files[i].readline()
|
@@ -98,47 +67,6 @@ class Merger
|
|
98
67
|
end
|
99
68
|
end
|
100
69
|
|
101
|
-
# if !marker && !rev_splitted_files[i].eof?
|
102
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
103
|
-
# name_compare_rev = compare_line_rev.split(" ")
|
104
|
-
# if name_compare_rev[0] == name_compare_fwd[0]
|
105
|
-
# for k in 1..3
|
106
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
107
|
-
# end
|
108
|
-
# else
|
109
|
-
# #puts compare_line_rev
|
110
|
-
# rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
111
|
-
# end
|
112
|
-
# end
|
113
|
-
# break if !marker
|
114
|
-
# if !rev_splitted_files[i].eof? && marker
|
115
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
116
|
-
# name_compare_rev = compare_line_rev.split(" ")
|
117
|
-
# puts "REV: " + compare_line_rev if i == 2
|
118
|
-
# #puts name_compare_rev[0]
|
119
|
-
# if rev_name[0] == name_compare_rev[0]
|
120
|
-
# marker = false
|
121
|
-
# statistics[i] += 1
|
122
|
-
# fwd_out_files[i].write(fwd_line)
|
123
|
-
# rev_out_files[i].write(rev_line)
|
124
|
-
# for k in 1..3
|
125
|
-
# rev_file.readline()
|
126
|
-
# compare_line_rev = rev_splitted_files[i].readline()
|
127
|
-
# rev_out_files[i].write(compare_line_rev.gsub(/[A-Z]{4}$/,"NNNN")) if k == 1
|
128
|
-
# rev_out_files[i].write(compare_line_rev) if k == 2
|
129
|
-
# rev_out_files[i].write(compare_line_rev.gsub(/[\S]{4}$/,"@@@@")) if k == 3
|
130
|
-
# fwd_out_files[i].write(fwd_file.readline())
|
131
|
-
# end
|
132
|
-
# fwd_file.lineno = fwd_file.lineno - 1
|
133
|
-
# rev_file.lineno = rev_file.lineno - 1
|
134
|
-
# else
|
135
|
-
# rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
|
136
|
-
# end
|
137
|
-
# end
|
138
|
-
# break if !marker
|
139
|
-
#end
|
140
|
-
|
141
|
-
|
142
70
|
if marker
|
143
71
|
statistics[-2] += 1
|
144
72
|
fwd_out_unmatched.write(fwd_line)
|
@@ -149,6 +77,10 @@ class Merger
|
|
149
77
|
end
|
150
78
|
end
|
151
79
|
end
|
80
|
+
stats = make_stats(statistics)
|
81
|
+
end
|
82
|
+
|
83
|
+
def make_stats(statistics)
|
152
84
|
stats = ""
|
153
85
|
@sample_ids.each_with_index do |id,i|
|
154
86
|
stats += id +"\t" + statistics[i].to_s + "\n"
|
@@ -156,4 +88,8 @@ class Merger
|
|
156
88
|
stats += "unmatched\t" + statistics[-2].to_s + "\n"
|
157
89
|
stats += "total\t" + statistics[-1].to_s + "\n"
|
158
90
|
end
|
159
|
-
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
@@ -42,8 +42,8 @@ class Statistics
|
|
42
42
|
@barcodes.each_with_index do |code, i|
|
43
43
|
str += "#{code}:\t#{@num_reads[i]} \n"
|
44
44
|
end
|
45
|
-
|
46
|
-
|
45
|
+
percent = (100 / @total.to_f) * @num_unmatched.to_f
|
46
|
+
percent = (percent.to_f * 100).round / 100.to_f
|
47
47
|
str += "Unmatched:\t#{@num_unmatched}\n"
|
48
48
|
str += "Total:\t#{@total}"
|
49
49
|
str.to_s
|
@@ -40,9 +40,7 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
40
40
|
samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
|
41
41
|
assert_equal(samplesheet.barcodes[3], "TTAG")
|
42
42
|
assert_equal(samplesheet.sample_id[4], "RX3")
|
43
|
-
|
44
43
|
samplesheet.create_barcode_txt("test/fixtures/barcode")
|
45
|
-
|
46
44
|
end
|
47
45
|
|
48
46
|
def test_statistics
|
@@ -54,17 +52,11 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
|
|
54
52
|
end
|
55
53
|
|
56
54
|
def test_merger
|
57
|
-
#merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
|
58
|
-
# "test/fixtures/Sample_Lane5/l.gz",
|
59
|
-
# "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
|
60
|
-
#assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
|
61
|
-
#stats = merger.merge()
|
62
|
-
#assert_equal("RX3\t18\nRX4\t7\nRX3X2\t16\nRX4X2\t8\nunmatched\t1\ntotal\t50\n",stats)
|
63
55
|
merger = Merger.new("test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R1_019.fastq.gz",
|
64
56
|
"test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R2_019.fastq.gz",
|
65
57
|
"test/fixtures/Sample_Lane8", "019", "test/fixtures/barcode_8.txt")
|
58
|
+
assert_equal(["RX9", "RX10", "RX9X2", "RX10X2"], merger.sample_ids)
|
66
59
|
stats = merger.merge()
|
67
|
-
#assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
|
68
60
|
assert_equal("RX9\t22464\nRX10\t28699\nRX9X2\t26434\nRX10X2\t22994\nunmatched\t15445\ntotal\t116036\n",stats)
|
69
61
|
end
|
70
62
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nugen_barcode_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,17 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-24 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "This gem is designed to demultiplex reads\n produced
|
15
15
|
by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
|
16
16
|
email:
|
17
17
|
- katharinaehayer@gmail.com
|
18
18
|
executables:
|
19
|
-
- base_adder
|
20
19
|
- nugen_barcode_splitter
|
21
20
|
extensions: []
|
22
21
|
extra_rdoc_files: []
|
23
22
|
files:
|
24
|
-
- bin/base_adder
|
25
23
|
- bin/nugen_barcode_splitter
|
26
24
|
- lib/nugen_barcode_splitter.rb
|
27
25
|
- lib/nugen_barcode_splitter/fastq.rb
|
data/bin/base_adder
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require "nugen_barcode_splitter"
|
3
|
-
|
4
|
-
usage =<<EOF
|
5
|
-
#{$0} file.fq number_of_bases out.fq
|
6
|
-
|
7
|
-
-_-_-_-_- #{$0} -_-_-_-_-
|
8
|
-
|
9
|
-
Adds N^P bases to the beginning of the
|
10
|
-
fastq file. The Quality is set on the
|
11
|
-
lowest value.
|
12
|
-
EOF
|
13
|
-
|
14
|
-
file_dir = ARGV[0]
|
15
|
-
number_of_bases = ARGV[1].to_i
|
16
|
-
out_dir = ARGV[2]
|
17
|
-
|
18
|
-
fastq = Fastq.new("#{file_dir}")
|
19
|
-
fastq.add(number_of_bases, out_dir)
|
20
|
-
fastq.close
|
21
|
-
|
22
|
-
|