nugen_barcode_splitter 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
5
5
  require "fileutils"
6
6
 
7
7
  usage =<<EOF
8
- V E R S I O N - 0 . 0 . 8
8
+ V E R S I O N - 0 . 0 . 10
9
9
  #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
10
10
  _____________________________________________________________________________
11
11
 
@@ -134,32 +134,47 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
134
134
  rescue Exception => e
135
135
  STDERR.puts e.message
136
136
  end
137
- cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, rev)
137
+ # Fwd_read
138
+ cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, true)
138
139
  if options[:debug]
139
140
  STDERR.puts cmd if options[:debug]
140
141
  else
141
142
  status = system('bash', '-c', cmd)
142
- raise "Calling the template did not succeed!" if !status
143
+ raise "Calling the template for fwd did not succeed!" if !status
143
144
  end
144
- end
145
-
146
- # Add the trimmed bases
147
- Dir.glob(outdir+"/*").each do |fwd|
148
- if fwd =~ /nugen_demultiplexing.log/
149
- sample_sheet.lanes
150
- statistics = Statistics.new(fwd)
151
- end
152
- next unless fwd =~ /R1_[0-9]{3}./
153
- cmd = "base_adder #{fwd} 4 #{fwd}_added"
145
+ cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
154
146
  if options[:debug]
155
147
  STDERR.puts cmd if options[:debug]
156
148
  else
157
- status = system(cmd)
158
- raise "Calling base_adder did not succeed!" if !status
159
- cmd = "mv #{fwd}_added #{fwd}"
160
- status = system(cmd)
161
- raise "Was not able to rename file #{fwd}!" if status!=true
149
+ status = system('bash', '-c', cmd)
150
+ raise "Calling the template for rev did not succeed!" if !status
162
151
  end
152
+ # Merging
153
+ merger = Merger.new(fwd,rev,outdir,number,barcodes)
154
+ merger.merge()
163
155
  end
164
156
 
157
+ # Add the trimmed bases
158
+ #Dir.glob(outdir+"/*").each do |fwd|
159
+ # if fwd =~ /nugen_demultiplexing.log/
160
+ # sample_sheet.lanes
161
+ # statistics = Statistics.new(fwd)
162
+ # stats_out = outdir + "/statistics"
163
+ # stats_handler = File.new(stats_out,'w')
164
+ # stats_handler.write(statistics.to_s)
165
+ # stats_handler.close()
166
+ # end
167
+ # next unless fwd =~ /R1_[0-9]{3}./
168
+ # cmd = "base_adder #{fwd} 4 #{fwd}_added"
169
+ # if options[:debug]
170
+ # STDERR.puts cmd if options[:debug]
171
+ # else
172
+ # status = system(cmd)
173
+ # raise "Calling base_adder did not succeed!" if !status
174
+ # cmd = "mv #{fwd}_added #{fwd}"
175
+ # status = system(cmd)
176
+ # raise "Was not able to rename file #{fwd}!" if status!=true
177
+ # end
178
+ #end
179
+
165
180
  end
@@ -0,0 +1,132 @@
1
+ require "zlib"
2
+
3
+ class Merger
4
+ def initialize(fwd,rev,outdir,number,barcodes)
5
+ # get sampleID
6
+ @values_fwd = []
7
+ @values_rev = []
8
+ @sample_ids = []
9
+ i = 0
10
+ File.open(barcodes).each do |line|
11
+ next if line.include?("#")
12
+ line = line.split(" ")
13
+ @sample_ids[i] = line[0]
14
+ i += 1
15
+ end
16
+ @fwd = fwd
17
+ @rev = rev
18
+ @outdir = outdir
19
+ @number = number
20
+ end
21
+
22
+ attr_accessor :sample_ids, :values_fwd, :values_rev
23
+
24
+ #def prepare_hash()
25
+ # @sample_ids.each_with_index do |sample_id, i|
26
+ # a = Thread.new {
27
+ # filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
28
+ # filehandler.each do |line|
29
+ # next unless line.include?("@HWI-")
30
+ # line = line.split(" ")
31
+ # name = line[0].split(":")[4..-1].join(":")
32
+ # @values_fwd[i].store(name,filehandler.pos)
33
+ # end
34
+ # filehandler.close()
35
+ # }
36
+ # b = Thread.new {
37
+ # filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
38
+ # filehandler.each do |line|
39
+ # next unless line.include?("@HWI-")
40
+ # line = line.split(" ")
41
+ # name = line[0].split(":")[4..-1].join(":")
42
+ # @values_rev[i].store(name,filehandler.pos)
43
+ # end
44
+ # filehandler.close()
45
+ # }
46
+ # a.join
47
+ # b.join
48
+ # end
49
+ #end
50
+
51
+ def merge()
52
+ fwd_file = Zlib::GzipReader.open(@fwd)
53
+ rev_file = Zlib::GzipReader.open(@rev)
54
+ fwd_splitted_files = []
55
+ rev_splitted_files = []
56
+ fwd_out_files = []
57
+ rev_out_files = []
58
+ fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched_updated.fq",'w')
59
+ rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched_updated.fq",'w')
60
+ @sample_ids.each_with_index do |sample_id, i|
61
+ fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
62
+ rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
63
+ #OUTFILES????
64
+ fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}_updated.fq",'w')
65
+ rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}_updated.fq",'w')
66
+ end
67
+ fwd_file.each do |fwd_line|
68
+ rev_line = rev_file.readline()
69
+ rev_name = rev_line.split(" ")
70
+ fwd_name = fwd_line.split(" ")
71
+ marker = true
72
+
73
+ @sample_ids.each_with_index do |sample_id, i|
74
+ if !fwd_splitted_files[i].eof?
75
+ compare_line_fwd = fwd_splitted_files[i].readline().split(" ")
76
+ if fwd_line[0] == compare_line_fwd[0] && marker
77
+ marker = false
78
+ fwd_out_files[i].write(fwd_line)
79
+ rev_out_files[i].write(rev_line)
80
+ for k in 1..3
81
+ fwd_file.readline()
82
+ rev_splitted_files[i].readline if marker2
83
+ compare_line_fwd = fwd_splitted_files[i].readline()
84
+ fwd_out_files[i].write("NNNN"+compare_line_fwd) if k == 1
85
+ fwd_out_files[i].write(compare_line_fwd) if k == 2
86
+ fwd_out_files[i].write("@@@@"+compare_line_fwd) if k == 3
87
+ rev_out_files[i].write(rev_file.readline())
88
+ end
89
+ end
90
+ end
91
+ if !marker && !rev_splitted_files[i].eof?
92
+ compare_line_rev = rev_splitted_files[i].readline().split(" ")
93
+ if compare_line_rev[0] == compare_line_fwd[0]
94
+ for k in 1..3
95
+ rev_splitted_files[i].readline()
96
+ end
97
+ end
98
+ end
99
+ break if !marker
100
+ if !rev_splitted_files[i].eof?
101
+ compare_line_rev = rev_splitted_files[i].readline().split(" ")
102
+ if rev_name[0] == compare_line_rev[0] && marker
103
+ marker = false
104
+ fwd_out_files[i].write(fwd_line)
105
+ rev_out_files[i].write(rev_line)
106
+ for k in 1..3
107
+ rev_file.readline()
108
+ compare_line_rev = rev_splitted_files[i].readline()
109
+ rev_out_files[i].write("NNNN"+compare_line_rev) if k == 1
110
+ rev_out_files[i].write(compare_line_rev) if k == 2
111
+ rev_out_files[i].write("@@@@"+compare_line_rev) if k == 3
112
+ fwd_out_files[i].write(fwd_file.readline())
113
+ end
114
+ end
115
+ end
116
+ break if !marker
117
+ end
118
+
119
+ if marker
120
+ fwd_out_unmatched.write(fwd_line)
121
+ rev_out_unmatched.write(rev_line)
122
+ for k in 1..3
123
+ rev_out_unmatched.write(rev_file.readline())
124
+ fwd_out_unmatched.write(fwd_file.readline())
125
+ end
126
+ end
127
+
128
+ end
129
+
130
+
131
+ end
132
+ end
@@ -4,25 +4,26 @@ class NugenTemplate
4
4
 
5
5
  def initialize(fastq_multx, options)
6
6
  @template =<<EOF
7
- #{fastq_multx} #{options} <%= @barcodes %> \\
8
- <(gunzip -c <%= @fwd %>) \\
9
- <(gunzip -c <%= @rev %>) \\
10
- -o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq \\
7
+ #{fastq_multx} #{options} <%= @barcodes %> \\
8
+ <(gunzip -c <%= @read %>) \\
9
+ -o <%= @lane_dir %>/<%= @direction %>.%.fq \\
11
10
  >> <%= @lane_dir %>/nugen_demultiplexing.log
12
11
  EOF
13
12
  end
14
13
 
15
- def fill(lane, number, lane_dir, barcodes, fwd, rev)
16
-
14
+ def fill(lane, number, lane_dir, barcodes, read, is_fwd)
15
+ if is_fwd
16
+ direction = "R1_#{number}"
17
+ else
18
+ direction = "R2_#{number}"
19
+ end
17
20
  context = {
18
21
  :lane => lane,
19
22
  :number => number,
20
23
  :lane_dir => lane_dir,
21
24
  :barcodes => barcodes,
22
- :fwd => fwd,
23
- :rev => rev,
24
- :r1 => "R1_#{number}",
25
- :r2 => "R2_#{number}"
25
+ :read => read,
26
+ :direction => direction
26
27
  }
27
28
 
28
29
  eruby = Erubis::Eruby.new(@template)
@@ -2,7 +2,7 @@ require "nugen_barcode_splitter/nugen_template"
2
2
  require "nugen_barcode_splitter/fastq"
3
3
  require "nugen_barcode_splitter/sample_sheet"
4
4
  require "nugen_barcode_splitter/statistics"
5
-
5
+ require "nugen_barcode_splitter/merger.rb"
6
6
  class NugenBarcodeSplitter
7
7
 
8
8
  end
@@ -9,9 +9,9 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
9
9
  def test_nugen_template
10
10
  template = NugenTemplate.new("fastq-multx", "")
11
11
  assert template.to_s.include?("fastq-multx")
12
- assert template.to_s.include?("<%= @fwd %>")
13
- temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", "rev")
14
- assert_equal(temp.to_s, "fastq-multx bc \\\n <(gunzip -c fwd) \\\n <(gunzip -c rev) \\\n -o ~/Lane3//R1_33.%.fq ~/Lane3//R2_33.%.fq \\\n >> ~/Lane3//nugen_demultiplexing.log\n")
12
+ assert template.to_s.include?("<%= @read %>")
13
+ temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", true)
14
+ assert_equal(temp.to_s, "fastq-multx bc \\\n <(gunzip -c fwd) \\\n -o ~/Lane3//R1_33.%.fq \\\n >> ~/Lane3//nugen_demultiplexing.log\n")
15
15
  end
16
16
 
17
17
  def test_fastq
@@ -52,4 +52,13 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
52
52
  assert_equal(stats.num_reads[0], 8533927)
53
53
  assert_equal(stats.num_unmatched, 2614681)
54
54
  end
55
+
56
+ def test_merger
57
+ merger = Merger.new("test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R1_001.fastq.gz",
58
+ "test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R2_001.fastq.gz",
59
+ "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
60
+ assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
61
+ #merger.merge
62
+ #assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
63
+ end
55
64
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nugen_barcode_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -25,6 +25,7 @@ files:
25
25
  - bin/nugen_barcode_splitter
26
26
  - lib/nugen_barcode_splitter.rb
27
27
  - lib/nugen_barcode_splitter/fastq.rb
28
+ - lib/nugen_barcode_splitter/merger.rb
28
29
  - lib/nugen_barcode_splitter/nugen_template.rb
29
30
  - lib/nugen_barcode_splitter/sample_sheet.rb
30
31
  - lib/nugen_barcode_splitter/statistics.rb