nugen_barcode_splitter 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
5
5
  require "fileutils"
6
6
 
7
7
  usage =<<EOF
8
- V E R S I O N - 0 . 0 . 11
8
+ V E R S I O N - 0 . 0 . 12 - BETA
9
9
  #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
10
10
  _____________________________________________________________________________
11
11
 
@@ -114,6 +114,8 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
114
114
  multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
115
115
  nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
116
116
 
117
+ statistics = Statistics.new()
118
+
117
119
  Dir.glob(options[:project_dir]+"/*").each do |p|
118
120
  next unless File.directory? p
119
121
  next unless p =~ /Sample_Lane/
@@ -151,30 +153,39 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
151
153
  end
152
154
  # Merging
153
155
  merger = Merger.new(fwd,rev,outdir,number,barcodes)
154
- merger.merge()
156
+ stats = merger.merge()
157
+ stats_file = File.open("#{outdir}/nugen_demultiplexing.log", 'a')
158
+ stats_file.puts(stats)
159
+ stats_file.close()
155
160
  end
156
161
 
157
162
  # Add the trimmed bases
158
- #Dir.glob(outdir+"/*").each do |fwd|
159
- # if fwd =~ /nugen_demultiplexing.log/
160
- # sample_sheet.lanes
161
- # statistics = Statistics.new(fwd)
162
- # stats_out = outdir + "/statistics"
163
- # stats_handler = File.new(stats_out,'w')
164
- # stats_handler.write(statistics.to_s)
165
- # stats_handler.close()
166
- # end
167
- # next unless fwd =~ /R1_[0-9]{3}./
168
- # cmd = "base_adder #{fwd} 4 #{fwd}_added"
169
- # if options[:debug]
170
- # STDERR.puts cmd if options[:debug]
171
- # else
172
- # status = system(cmd)
173
- # raise "Calling base_adder did not succeed!" if !status
174
- # cmd = "mv #{fwd}_added #{fwd}"
175
- # status = system(cmd)
176
- # raise "Was not able to rename file #{fwd}!" if status!=true
177
- # end
178
- #end
163
+ Dir.glob(outdir+"/*").each do |f|
164
+ if f =~ /nugen_demultiplexing.log/
165
+ sample_sheet.lanes
166
+ statistics = Statistics.new(f)
167
+ stats_out = outdir + "/statistics"
168
+ stats_handler = File.new(stats_out,'w')
169
+ stats_handler.write(statistics.to_s)
170
+ stats_handler.close()
171
+ end
172
+ next unless f =~ /_updated/
173
+ name = f.split("/")[-1]
174
+ id = name.split(".")[1]
175
+ sample_dir = outdir + "/Sample_#{id}"
176
+ begin
177
+ FileUtils.mkdir_p sample_dir unless File.directory? sample_dir
178
+ rescue Exception => e
179
+ STDERR.puts e.message
180
+ end
181
+ name = name.gsub(/_updated/, "")
182
+ cmd = `mv #{f} #{sample_dir}/name`
183
+ if options[:debug]
184
+ STDERR.puts cmd if options[:debug]
185
+ else
186
+ status = system(cmd)
187
+ raise "Was not able to rename and move file #{f}!" if status!=true
188
+ end
189
+ end
179
190
 
180
191
  end
@@ -49,22 +49,26 @@ class Merger
49
49
  #end
50
50
 
51
51
  def merge()
52
+ statistics = Array.new(@sample_ids.length()+2,0)
53
+
52
54
  fwd_file = Zlib::GzipReader.open(@fwd)
53
55
  rev_file = Zlib::GzipReader.open(@rev)
54
56
  fwd_splitted_files = []
55
57
  rev_splitted_files = []
56
58
  fwd_out_files = []
57
59
  rev_out_files = []
58
- fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched_updated.fq",'w')
59
- rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched_updated.fq",'w')
60
+ fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
61
+ rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched.updated.fq",'w')
60
62
  @sample_ids.each_with_index do |sample_id, i|
61
63
  fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
62
64
  rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
63
65
  #OUTFILES????
64
- fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}_updated.fq",'w')
65
- rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}_updated.fq",'w')
66
+ fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
67
+ rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
66
68
  end
69
+
67
70
  fwd_file.each do |fwd_line|
71
+ statistics[-1] += 1
68
72
  rev_line = rev_file.readline()
69
73
  rev_name = rev_line.split(" ")
70
74
  fwd_name = fwd_line.split(" ")
@@ -72,9 +76,11 @@ class Merger
72
76
 
73
77
  @sample_ids.each_with_index do |sample_id, i|
74
78
  if !fwd_splitted_files[i].eof?
75
- compare_line_fwd = fwd_splitted_files[i].readline().split(" ")
76
- if fwd_line[0] == compare_line_fwd[0] && marker
79
+ compare_line_fwd = fwd_splitted_files[i].readline()
80
+ name_compare_fwd = compare_line_fwd.split(" ")
81
+ if fwd_line[0] == name_compare_fwd[0] && marker
77
82
  marker = false
83
+ statistics[i] += 1
78
84
  fwd_out_files[i].write(fwd_line)
79
85
  rev_out_files[i].write(rev_line)
80
86
  for k in 1..3
@@ -87,21 +93,31 @@ class Merger
87
93
  end
88
94
  fwd_file.lineno = fwd_file.lineno - 1
89
95
  rev_file.lineno = rev_file.lineno - 1
96
+ else
97
+ #puts compare_line_fwd
98
+ fwd_splitted_files[i].pos = fwd_splitted_files[i].pos - compare_line_fwd.length()
90
99
  end
91
100
  end
101
+
92
102
  if !marker && !rev_splitted_files[i].eof?
93
- compare_line_rev = rev_splitted_files[i].readline().split(" ")
94
- if compare_line_rev[0] == compare_line_fwd[0]
103
+ compare_line_rev = rev_splitted_files[i].readline()
104
+ name_compare_rev = compare_line_rev.split(" ")
105
+ if name_compare_rev[0] == name_compare_fwd[0]
95
106
  for k in 1..3
96
- rev_splitted_files[i].readline()
107
+ compare_line_rev = rev_splitted_files[i].readline()
97
108
  end
109
+ else
110
+ #puts compare_line_rev
111
+ rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
98
112
  end
99
113
  end
100
114
  break if !marker
101
115
  if !rev_splitted_files[i].eof?
102
- compare_line_rev = rev_splitted_files[i].readline().split(" ")
103
- if rev_name[0] == compare_line_rev[0] && marker
116
+ compare_line_rev = rev_splitted_files[i].readline()
117
+ name_compare_rev = compare_line_rev.split(" ")
118
+ if rev_name[0] == name_compare_rev[0] && marker
104
119
  marker = false
120
+ statistics[i] += 1
105
121
  fwd_out_files[i].write(fwd_line)
106
122
  rev_out_files[i].write(rev_line)
107
123
  for k in 1..3
@@ -114,12 +130,15 @@ class Merger
114
130
  end
115
131
  fwd_file.lineno = fwd_file.lineno - 1
116
132
  rev_file.lineno = rev_file.lineno - 1
133
+ else
134
+ rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
117
135
  end
118
136
  end
119
137
  break if !marker
120
138
  end
121
139
 
122
140
  if marker
141
+ statistics[-2] += 1
123
142
  fwd_out_unmatched.write(fwd_line)
124
143
  rev_out_unmatched.write(rev_line)
125
144
  for k in 1..3
@@ -127,9 +146,12 @@ class Merger
127
146
  fwd_out_unmatched.write(fwd_file.readline())
128
147
  end
129
148
  end
130
-
131
149
  end
132
-
133
-
150
+ stats = ""
151
+ @sample_ids.each_with_index do |id,i|
152
+ stats += id +"/t" + statistics[i].to_s + "/n"
153
+ end
154
+ stats += "unmatched/t" + statistics[-2].to_s + "/n"
155
+ stats += "total/t" + statistics[-1].to_s + "/n"
134
156
  end
135
157
  end
@@ -3,27 +3,25 @@ require "erubis"
3
3
  class NugenTemplate
4
4
 
5
5
  def initialize(fastq_multx, options)
6
- @template =<<EOF
7
- gunzip -c <%= @read %> | #{fastq_multx} \\
8
- --bcfile <%= @barcodes %> #{options} <%= @options %> \\
9
- --prefix <%= @lane_dir %>/<%= @direction %> \\
10
- --suffix ".fq"
11
- EOF
12
6
  # @template =<<EOF
13
- ##{fastq_multx} #{options} <%= @barcodes %> \\
14
- # <(gunzip -c <%= @read %>) \\
15
- # -o <%= @lane_dir %>/<%= @direction %>.%.fq \\
16
- # >> <%= @lane_dir %>/nugen_demultiplexing.log
7
+ #gunzip -c <%= @read %> | #{fastq_multx} \\
8
+ # --bcfile <%= @barcodes %> #{options} <%= @options %> \\
9
+ # --prefix <%= @lane_dir %>/<%= @direction %> \\
10
+ # --suffix ".fq"
17
11
  #EOF
12
+ @template =<<EOF
13
+ #{fastq_multx} #{options} <%= @barcodes %> \\
14
+ <(gunzip -c <%= @read %>) \\
15
+ -o <%= @lane_dir %>/<%= @direction %>.%.fq \\
16
+ >> <%= @lane_dir %>/nugen_demultiplexing.log
17
+ EOF
18
18
  end
19
19
 
20
20
  def fill(lane, number, lane_dir, barcodes, read, is_fwd)
21
21
  if is_fwd
22
22
  direction = "R1_#{number}_"
23
- options = "--bol"
24
23
  else
25
24
  direction = "R2_#{number}_"
26
- options = "--eol"
27
25
  end
28
26
  context = {
29
27
  :lane => lane,
@@ -32,7 +30,7 @@ EOF
32
30
  :barcodes => barcodes,
33
31
  :read => read,
34
32
  :direction => direction,
35
- :options => options
33
+ #:options => options
36
34
  }
37
35
 
38
36
  eruby = Erubis::Eruby.new(@template)
@@ -1,3 +1,4 @@
1
+ require 'csv'
1
2
  class Statistics
2
3
 
3
4
  def initialize(lane_log)
@@ -54,11 +54,12 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
54
54
  end
55
55
 
56
56
  def test_merger
57
- merger = Merger.new("test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R1_001.fastq.gz",
58
- "test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R2_001.fastq.gz",
57
+ merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
58
+ "test/fixtures/Sample_Lane5/l.gz",
59
59
  "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
60
60
  assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
61
- #merger.merge
61
+ stats = merger.merge()
62
+ assert_equal([28, 18, 26, 21, 7],stats)
62
63
  #assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
63
64
  end
64
65
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nugen_barcode_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: