nugen_barcode_splitter 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
5
5
  require "fileutils"
6
6
 
7
7
  usage =<<EOF
8
- V E R S I O N - 0 . 0 . 8
8
+ V E R S I O N - 0 . 0 . 10
9
9
  #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
10
10
  _____________________________________________________________________________
11
11
 
@@ -134,32 +134,47 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
134
134
  rescue Exception => e
135
135
  STDERR.puts e.message
136
136
  end
137
- cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, rev)
137
+ # Fwd_read
138
+ cmd = nugen_temp.fill(lane,number,outdir, barcodes, fwd, true)
138
139
  if options[:debug]
139
140
  STDERR.puts cmd if options[:debug]
140
141
  else
141
142
  status = system('bash', '-c', cmd)
142
- raise "Calling the template did not succeed!" if !status
143
+ raise "Calling the template for fwd did not succeed!" if !status
143
144
  end
144
- end
145
-
146
- # Add the trimmed bases
147
- Dir.glob(outdir+"/*").each do |fwd|
148
- if fwd =~ /nugen_demultiplexing.log/
149
- sample_sheet.lanes
150
- statistics = Statistics.new(fwd)
151
- end
152
- next unless fwd =~ /R1_[0-9]{3}./
153
- cmd = "base_adder #{fwd} 4 #{fwd}_added"
145
+ cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
154
146
  if options[:debug]
155
147
  STDERR.puts cmd if options[:debug]
156
148
  else
157
- status = system(cmd)
158
- raise "Calling base_adder did not succeed!" if !status
159
- cmd = "mv #{fwd}_added #{fwd}"
160
- status = system(cmd)
161
- raise "Was not able to rename file #{fwd}!" if status!=true
149
+ status = system('bash', '-c', cmd)
150
+ raise "Calling the template for rev did not succeed!" if !status
162
151
  end
152
+ # Merging
153
+ merger = Merger.new(fwd,rev,outdir,number,barcodes)
154
+ merger.merge()
163
155
  end
164
156
 
157
+ # Add the trimmed bases
158
+ #Dir.glob(outdir+"/*").each do |fwd|
159
+ # if fwd =~ /nugen_demultiplexing.log/
160
+ # sample_sheet.lanes
161
+ # statistics = Statistics.new(fwd)
162
+ # stats_out = outdir + "/statistics"
163
+ # stats_handler = File.new(stats_out,'w')
164
+ # stats_handler.write(statistics.to_s)
165
+ # stats_handler.close()
166
+ # end
167
+ # next unless fwd =~ /R1_[0-9]{3}./
168
+ # cmd = "base_adder #{fwd} 4 #{fwd}_added"
169
+ # if options[:debug]
170
+ # STDERR.puts cmd if options[:debug]
171
+ # else
172
+ # status = system(cmd)
173
+ # raise "Calling base_adder did not succeed!" if !status
174
+ # cmd = "mv #{fwd}_added #{fwd}"
175
+ # status = system(cmd)
176
+ # raise "Was not able to rename file #{fwd}!" if status!=true
177
+ # end
178
+ #end
179
+
165
180
  end
@@ -0,0 +1,132 @@
1
+ require "zlib"
2
+
3
+ class Merger
4
+ def initialize(fwd,rev,outdir,number,barcodes)
5
+ # get sampleID
6
+ @values_fwd = []
7
+ @values_rev = []
8
+ @sample_ids = []
9
+ i = 0
10
+ File.open(barcodes).each do |line|
11
+ next if line.include?("#")
12
+ line = line.split(" ")
13
+ @sample_ids[i] = line[0]
14
+ i += 1
15
+ end
16
+ @fwd = fwd
17
+ @rev = rev
18
+ @outdir = outdir
19
+ @number = number
20
+ end
21
+
22
+ attr_accessor :sample_ids, :values_fwd, :values_rev
23
+
24
+ #def prepare_hash()
25
+ # @sample_ids.each_with_index do |sample_id, i|
26
+ # a = Thread.new {
27
+ # filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
28
+ # filehandler.each do |line|
29
+ # next unless line.include?("@HWI-")
30
+ # line = line.split(" ")
31
+ # name = line[0].split(":")[4..-1].join(":")
32
+ # @values_fwd[i].store(name,filehandler.pos)
33
+ # end
34
+ # filehandler.close()
35
+ # }
36
+ # b = Thread.new {
37
+ # filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
38
+ # filehandler.each do |line|
39
+ # next unless line.include?("@HWI-")
40
+ # line = line.split(" ")
41
+ # name = line[0].split(":")[4..-1].join(":")
42
+ # @values_rev[i].store(name,filehandler.pos)
43
+ # end
44
+ # filehandler.close()
45
+ # }
46
+ # a.join
47
+ # b.join
48
+ # end
49
+ #end
50
+
51
+ def merge()
52
+ fwd_file = Zlib::GzipReader.open(@fwd)
53
+ rev_file = Zlib::GzipReader.open(@rev)
54
+ fwd_splitted_files = []
55
+ rev_splitted_files = []
56
+ fwd_out_files = []
57
+ rev_out_files = []
58
+ fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched_updated.fq",'w')
59
+ rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched_updated.fq",'w')
60
+ @sample_ids.each_with_index do |sample_id, i|
61
+ fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
62
+ rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
63
+ #OUTFILES????
64
+ fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}_updated.fq",'w')
65
+ rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}_updated.fq",'w')
66
+ end
67
+ fwd_file.each do |fwd_line|
68
+ rev_line = rev_file.readline()
69
+ rev_name = rev_line.split(" ")
70
+ fwd_name = fwd_line.split(" ")
71
+ marker = true
72
+
73
+ @sample_ids.each_with_index do |sample_id, i|
74
+ if !fwd_splitted_files[i].eof?
75
+ compare_line_fwd = fwd_splitted_files[i].readline().split(" ")
76
+ if fwd_line[0] == compare_line_fwd[0] && marker
77
+ marker = false
78
+ fwd_out_files[i].write(fwd_line)
79
+ rev_out_files[i].write(rev_line)
80
+ for k in 1..3
81
+ fwd_file.readline()
82
+ rev_splitted_files[i].readline if marker2
83
+ compare_line_fwd = fwd_splitted_files[i].readline()
84
+ fwd_out_files[i].write("NNNN"+compare_line_fwd) if k == 1
85
+ fwd_out_files[i].write(compare_line_fwd) if k == 2
86
+ fwd_out_files[i].write("@@@@"+compare_line_fwd) if k == 3
87
+ rev_out_files[i].write(rev_file.readline())
88
+ end
89
+ end
90
+ end
91
+ if !marker && !rev_splitted_files[i].eof?
92
+ compare_line_rev = rev_splitted_files[i].readline().split(" ")
93
+ if compare_line_rev[0] == compare_line_fwd[0]
94
+ for k in 1..3
95
+ rev_splitted_files[i].readline()
96
+ end
97
+ end
98
+ end
99
+ break if !marker
100
+ if !rev_splitted_files[i].eof?
101
+ compare_line_rev = rev_splitted_files[i].readline().split(" ")
102
+ if rev_name[0] == compare_line_rev[0] && marker
103
+ marker = false
104
+ fwd_out_files[i].write(fwd_line)
105
+ rev_out_files[i].write(rev_line)
106
+ for k in 1..3
107
+ rev_file.readline()
108
+ compare_line_rev = rev_splitted_files[i].readline()
109
+ rev_out_files[i].write("NNNN"+compare_line_rev) if k == 1
110
+ rev_out_files[i].write(compare_line_rev) if k == 2
111
+ rev_out_files[i].write("@@@@"+compare_line_rev) if k == 3
112
+ fwd_out_files[i].write(fwd_file.readline())
113
+ end
114
+ end
115
+ end
116
+ break if !marker
117
+ end
118
+
119
+ if marker
120
+ fwd_out_unmatched.write(fwd_line)
121
+ rev_out_unmatched.write(rev_line)
122
+ for k in 1..3
123
+ rev_out_unmatched.write(rev_file.readline())
124
+ fwd_out_unmatched.write(fwd_file.readline())
125
+ end
126
+ end
127
+
128
+ end
129
+
130
+
131
+ end
132
+ end
@@ -4,25 +4,26 @@ class NugenTemplate
4
4
 
5
5
  def initialize(fastq_multx, options)
6
6
  @template =<<EOF
7
- #{fastq_multx} #{options} <%= @barcodes %> \\
8
- <(gunzip -c <%= @fwd %>) \\
9
- <(gunzip -c <%= @rev %>) \\
10
- -o <%= @lane_dir %>/<%= @r1 %>.%.fq <%= @lane_dir %>/<%= @r2 %>.%.fq \\
7
+ #{fastq_multx} #{options} <%= @barcodes %> \\
8
+ <(gunzip -c <%= @read %>) \\
9
+ -o <%= @lane_dir %>/<%= @direction %>.%.fq \\
11
10
  >> <%= @lane_dir %>/nugen_demultiplexing.log
12
11
  EOF
13
12
  end
14
13
 
15
- def fill(lane, number, lane_dir, barcodes, fwd, rev)
16
-
14
+ def fill(lane, number, lane_dir, barcodes, read, is_fwd)
15
+ if is_fwd
16
+ direction = "R1_#{number}"
17
+ else
18
+ direction = "R2_#{number}"
19
+ end
17
20
  context = {
18
21
  :lane => lane,
19
22
  :number => number,
20
23
  :lane_dir => lane_dir,
21
24
  :barcodes => barcodes,
22
- :fwd => fwd,
23
- :rev => rev,
24
- :r1 => "R1_#{number}",
25
- :r2 => "R2_#{number}"
25
+ :read => read,
26
+ :direction => direction
26
27
  }
27
28
 
28
29
  eruby = Erubis::Eruby.new(@template)
@@ -2,7 +2,7 @@ require "nugen_barcode_splitter/nugen_template"
2
2
  require "nugen_barcode_splitter/fastq"
3
3
  require "nugen_barcode_splitter/sample_sheet"
4
4
  require "nugen_barcode_splitter/statistics"
5
-
5
+ require "nugen_barcode_splitter/merger.rb"
6
6
  class NugenBarcodeSplitter
7
7
 
8
8
  end
@@ -9,9 +9,9 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
9
9
  def test_nugen_template
10
10
  template = NugenTemplate.new("fastq-multx", "")
11
11
  assert template.to_s.include?("fastq-multx")
12
- assert template.to_s.include?("<%= @fwd %>")
13
- temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", "rev")
14
- assert_equal(temp.to_s, "fastq-multx bc \\\n <(gunzip -c fwd) \\\n <(gunzip -c rev) \\\n -o ~/Lane3//R1_33.%.fq ~/Lane3//R2_33.%.fq \\\n >> ~/Lane3//nugen_demultiplexing.log\n")
12
+ assert template.to_s.include?("<%= @read %>")
13
+ temp = template.fill("Lane_3", "33", "~/Lane3/", "bc", "fwd", true)
14
+ assert_equal(temp.to_s, "fastq-multx bc \\\n <(gunzip -c fwd) \\\n -o ~/Lane3//R1_33.%.fq \\\n >> ~/Lane3//nugen_demultiplexing.log\n")
15
15
  end
16
16
 
17
17
  def test_fastq
@@ -52,4 +52,13 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
52
52
  assert_equal(stats.num_reads[0], 8533927)
53
53
  assert_equal(stats.num_unmatched, 2614681)
54
54
  end
55
+
56
+ def test_merger
57
+ merger = Merger.new("test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R1_001.fastq.gz",
58
+ "test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R2_001.fastq.gz",
59
+ "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
60
+ assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
61
+ #merger.merge
62
+ #assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
63
+ end
55
64
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nugen_barcode_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -25,6 +25,7 @@ files:
25
25
  - bin/nugen_barcode_splitter
26
26
  - lib/nugen_barcode_splitter.rb
27
27
  - lib/nugen_barcode_splitter/fastq.rb
28
+ - lib/nugen_barcode_splitter/merger.rb
28
29
  - lib/nugen_barcode_splitter/nugen_template.rb
29
30
  - lib/nugen_barcode_splitter/sample_sheet.rb
30
31
  - lib/nugen_barcode_splitter/statistics.rb