nugen_barcode_splitter 0.0.18 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
5
5
  require "fileutils"
6
6
 
7
7
  usage =<<EOF
8
- V E R S I O N - 0 . 0 . 17 - BETA
8
+ V E R S I O N - 1 . 0 . 0
9
9
  #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
10
10
  _____________________________________________________________________________
11
11
 
@@ -33,7 +33,7 @@ options = {
33
33
  :bel_only? => "",
34
34
  :mismatches => "",
35
35
  :keep_barcode => "",
36
- :fastq_multx => "fastq_multx",
36
+ :fastx => "fastq_multx",
37
37
  :debug => false,
38
38
  :lane_number => ""
39
39
  }
@@ -79,8 +79,8 @@ optparse = OptionParser.new do |opts|
79
79
  options[:lane_number] = i if i
80
80
  end
81
81
 
82
- opts.on("-x", "--fastq_multx DIR", String) do |i|
83
- options[:fastq_multx] = i if i
82
+ opts.on("-x", "--fastx_barcode_splitter.pl DIR", String) do |i|
83
+ options[:fastx] = i if i
84
84
  end
85
85
 
86
86
  opts.on("-d", "--debug", "Debug mode!") do |i|
@@ -117,7 +117,7 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
117
117
 
118
118
  # Prepare template
119
119
  multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
120
- nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
120
+ nugen_temp = NugenTemplate.new(options[:fastx],multx_opts)
121
121
 
122
122
  Dir.glob(options[:project_dir]+"/*").each do |p|
123
123
  next unless File.directory? p
@@ -147,24 +147,18 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
147
147
  status = system('bash', '-c', cmd)
148
148
  raise "Calling the template for fwd did not succeed!" if !status
149
149
  end
150
- #cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
151
- #if options[:debug]
152
- # STDERR.puts cmd if options[:debug]
153
- #else
154
- # status = system('bash', '-c', cmd)
155
- # raise "Calling the template for rev did not succeed!" if !status
156
- #end
150
+
157
151
  # Merging
158
152
  merger = Merger.new(fwd,rev,outdir,number,barcodes)
159
153
  stats = merger.merge()
160
- stats_file = File.open("#{outdir}/nugen_demultiplexing.log", 'a')
154
+ stats_file = File.open("#{outdir}/nugen_demultiplexing_Lane#{lane}.log", 'a')
161
155
  stats_file.write(stats)
162
156
  stats_file.close()
163
157
  end
164
158
 
165
159
  # Add the trimmed bases
166
160
  Dir.glob(outdir+"/*").each do |f|
167
- if f =~ /nugen_demultiplexing.log/
161
+ if f =~ /nugen_demultiplexing/
168
162
  sample_sheet.lanes
169
163
  statistics = Statistics.new(f)
170
164
  stats_out = outdir + "/statistics"
@@ -189,6 +183,13 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
189
183
  status = system(cmd)
190
184
  raise "Was not able to rename and move file #{f}!" if status!=true
191
185
  end
186
+ cmd = "gzip #{sample_dir}/#{name}"
187
+ if options[:debug]
188
+ STDERR.puts cmd if options[:debug]
189
+ else
190
+ status = system(cmd)
191
+ raise "Was not able to gzip file #{f}"
192
+ end
192
193
  end
193
194
 
194
195
  end
@@ -19,39 +19,11 @@ class Merger
19
19
 
20
20
  attr_accessor :sample_ids
21
21
 
22
- #def prepare_hash()
23
- # @sample_ids.each_with_index do |sample_id, i|
24
- # a = Thread.new {
25
- # filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
26
- # filehandler.each do |line|
27
- # next unless line.include?("@HWI-")
28
- # line = line.split(" ")
29
- # name = line[0].split(":")[4..-1].join(":")
30
- # @values_fwd[i].store(name,filehandler.pos)
31
- # end
32
- # filehandler.close()
33
- # }
34
- # b = Thread.new {
35
- # filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
36
- # filehandler.each do |line|
37
- # next unless line.include?("@HWI-")
38
- # line = line.split(" ")
39
- # name = line[0].split(":")[4..-1].join(":")
40
- # @values_rev[i].store(name,filehandler.pos)
41
- # end
42
- # filehandler.close()
43
- # }
44
- # a.join
45
- # b.join
46
- # end
47
- #end
48
-
49
22
  def merge()
50
23
  statistics = Array.new(@sample_ids.length()+2,0)
51
24
  fwd_file = Zlib::GzipReader.open(@fwd)
52
25
  rev_file = Zlib::GzipReader.open(@rev)
53
26
  fwd_splitted_files = []
54
- rev_splitted_files = []
55
27
  fwd_out_files = []
56
28
  rev_out_files = []
57
29
  fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
@@ -59,8 +31,6 @@ class Merger
59
31
 
60
32
  @sample_ids.each_with_index do |sample_id, i|
61
33
  fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
62
- #rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
63
- #OUTFILES????
64
34
  fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
65
35
  rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
66
36
  end
@@ -72,7 +42,6 @@ class Merger
72
42
  fwd_name = fwd_line.split(" ")
73
43
  marker = true
74
44
 
75
-
76
45
  @sample_ids.each_with_index do |sample_id, i|
77
46
  if !fwd_splitted_files[i].eof? && marker
78
47
  compare_line_fwd = fwd_splitted_files[i].readline()
@@ -98,47 +67,6 @@ class Merger
98
67
  end
99
68
  end
100
69
 
101
- # if !marker && !rev_splitted_files[i].eof?
102
- # compare_line_rev = rev_splitted_files[i].readline()
103
- # name_compare_rev = compare_line_rev.split(" ")
104
- # if name_compare_rev[0] == name_compare_fwd[0]
105
- # for k in 1..3
106
- # compare_line_rev = rev_splitted_files[i].readline()
107
- # end
108
- # else
109
- # #puts compare_line_rev
110
- # rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
111
- # end
112
- # end
113
- # break if !marker
114
- # if !rev_splitted_files[i].eof? && marker
115
- # compare_line_rev = rev_splitted_files[i].readline()
116
- # name_compare_rev = compare_line_rev.split(" ")
117
- # puts "REV: " + compare_line_rev if i == 2
118
- # #puts name_compare_rev[0]
119
- # if rev_name[0] == name_compare_rev[0]
120
- # marker = false
121
- # statistics[i] += 1
122
- # fwd_out_files[i].write(fwd_line)
123
- # rev_out_files[i].write(rev_line)
124
- # for k in 1..3
125
- # rev_file.readline()
126
- # compare_line_rev = rev_splitted_files[i].readline()
127
- # rev_out_files[i].write(compare_line_rev.gsub(/[A-Z]{4}$/,"NNNN")) if k == 1
128
- # rev_out_files[i].write(compare_line_rev) if k == 2
129
- # rev_out_files[i].write(compare_line_rev.gsub(/[\S]{4}$/,"@@@@")) if k == 3
130
- # fwd_out_files[i].write(fwd_file.readline())
131
- # end
132
- # fwd_file.lineno = fwd_file.lineno - 1
133
- # rev_file.lineno = rev_file.lineno - 1
134
- # else
135
- # rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
136
- # end
137
- # end
138
- # break if !marker
139
- #end
140
-
141
-
142
70
  if marker
143
71
  statistics[-2] += 1
144
72
  fwd_out_unmatched.write(fwd_line)
@@ -149,6 +77,10 @@ class Merger
149
77
  end
150
78
  end
151
79
  end
80
+ stats = make_stats(statistics)
81
+ end
82
+
83
+ def make_stats(statistics)
152
84
  stats = ""
153
85
  @sample_ids.each_with_index do |id,i|
154
86
  stats += id +"\t" + statistics[i].to_s + "\n"
@@ -156,4 +88,8 @@ class Merger
156
88
  stats += "unmatched\t" + statistics[-2].to_s + "\n"
157
89
  stats += "total\t" + statistics[-1].to_s + "\n"
158
90
  end
159
- end
91
+ end
92
+
93
+
94
+
95
+
@@ -42,8 +42,8 @@ class Statistics
42
42
  @barcodes.each_with_index do |code, i|
43
43
  str += "#{code}:\t#{@num_reads[i]} \n"
44
44
  end
45
- #percent = (100 / @total.to_f) * @num_unmatched.to_f
46
- #percent = (percent.to_f * 100).round / 100.to_f
45
+ percent = (100 / @total.to_f) * @num_unmatched.to_f
46
+ percent = (percent.to_f * 100).round / 100.to_f
47
47
  str += "Unmatched:\t#{@num_unmatched}\n"
48
48
  str += "Total:\t#{@total}"
49
49
  str.to_s
@@ -40,9 +40,7 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
40
40
  samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
41
41
  assert_equal(samplesheet.barcodes[3], "TTAG")
42
42
  assert_equal(samplesheet.sample_id[4], "RX3")
43
-
44
43
  samplesheet.create_barcode_txt("test/fixtures/barcode")
45
-
46
44
  end
47
45
 
48
46
  def test_statistics
@@ -54,17 +52,11 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
54
52
  end
55
53
 
56
54
  def test_merger
57
- #merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
58
- # "test/fixtures/Sample_Lane5/l.gz",
59
- # "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
60
- #assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
61
- #stats = merger.merge()
62
- #assert_equal("RX3\t18\nRX4\t7\nRX3X2\t16\nRX4X2\t8\nunmatched\t1\ntotal\t50\n",stats)
63
55
  merger = Merger.new("test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R1_019.fastq.gz",
64
56
  "test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R2_019.fastq.gz",
65
57
  "test/fixtures/Sample_Lane8", "019", "test/fixtures/barcode_8.txt")
58
+ assert_equal(["RX9", "RX10", "RX9X2", "RX10X2"], merger.sample_ids)
66
59
  stats = merger.merge()
67
- #assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
68
60
  assert_equal("RX9\t22464\nRX10\t28699\nRX9X2\t26434\nRX10X2\t22994\nunmatched\t15445\ntotal\t116036\n",stats)
69
61
  end
70
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nugen_barcode_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,19 +9,17 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-22 00:00:00.000000000Z
12
+ date: 2012-02-24 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: ! "This gem is designed to demultiplex reads\n produced
15
15
  by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
16
16
  email:
17
17
  - katharinaehayer@gmail.com
18
18
  executables:
19
- - base_adder
20
19
  - nugen_barcode_splitter
21
20
  extensions: []
22
21
  extra_rdoc_files: []
23
22
  files:
24
- - bin/base_adder
25
23
  - bin/nugen_barcode_splitter
26
24
  - lib/nugen_barcode_splitter.rb
27
25
  - lib/nugen_barcode_splitter/fastq.rb
data/bin/base_adder DELETED
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require "nugen_barcode_splitter"
3
-
4
- usage =<<EOF
5
- #{$0} file.fq number_of_bases out.fq
6
-
7
- -_-_-_-_- #{$0} -_-_-_-_-
8
-
9
- Adds N^P bases to the beginning of the
10
- fastq file. The Quality is set on the
11
- lowest value.
12
- EOF
13
-
14
- file_dir = ARGV[0]
15
- number_of_bases = ARGV[1].to_i
16
- out_dir = ARGV[2]
17
-
18
- fastq = Fastq.new("#{file_dir}")
19
- fastq.add(number_of_bases, out_dir)
20
- fastq.close
21
-
22
-