nugen_barcode_splitter 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
5
5
  require "fileutils"
6
6
 
7
7
  usage =<<EOF
8
- V E R S I O N - 0 . 0 . 11
8
+ V E R S I O N - 0 . 0 . 12 - BETA
9
9
  #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
10
10
  _____________________________________________________________________________
11
11
 
@@ -114,6 +114,8 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
114
114
  multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
115
115
  nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
116
116
 
117
+ statistics = Statistics.new()
118
+
117
119
  Dir.glob(options[:project_dir]+"/*").each do |p|
118
120
  next unless File.directory? p
119
121
  next unless p =~ /Sample_Lane/
@@ -151,30 +153,39 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
151
153
  end
152
154
  # Merging
153
155
  merger = Merger.new(fwd,rev,outdir,number,barcodes)
154
- merger.merge()
156
+ stats = merger.merge()
157
+ stats_file = File.open("#{outdir}/nugen_demultiplexing.log", 'a')
158
+ stats_file.puts(stats)
159
+ stats_file.close()
155
160
  end
156
161
 
157
162
  # Add the trimmed bases
158
- #Dir.glob(outdir+"/*").each do |fwd|
159
- # if fwd =~ /nugen_demultiplexing.log/
160
- # sample_sheet.lanes
161
- # statistics = Statistics.new(fwd)
162
- # stats_out = outdir + "/statistics"
163
- # stats_handler = File.new(stats_out,'w')
164
- # stats_handler.write(statistics.to_s)
165
- # stats_handler.close()
166
- # end
167
- # next unless fwd =~ /R1_[0-9]{3}./
168
- # cmd = "base_adder #{fwd} 4 #{fwd}_added"
169
- # if options[:debug]
170
- # STDERR.puts cmd if options[:debug]
171
- # else
172
- # status = system(cmd)
173
- # raise "Calling base_adder did not succeed!" if !status
174
- # cmd = "mv #{fwd}_added #{fwd}"
175
- # status = system(cmd)
176
- # raise "Was not able to rename file #{fwd}!" if status!=true
177
- # end
178
- #end
163
+ Dir.glob(outdir+"/*").each do |f|
164
+ if f =~ /nugen_demultiplexing.log/
165
+ sample_sheet.lanes
166
+ statistics = Statistics.new(f)
167
+ stats_out = outdir + "/statistics"
168
+ stats_handler = File.new(stats_out,'w')
169
+ stats_handler.write(statistics.to_s)
170
+ stats_handler.close()
171
+ end
172
+ next unless f =~ /_updated/
173
+ name = f.split("/")[-1]
174
+ id = name.split(".")[1]
175
+ sample_dir = outdir + "/Sample_#{id}"
176
+ begin
177
+ FileUtils.mkdir_p sample_dir unless File.directory? sample_dir
178
+ rescue Exception => e
179
+ STDERR.puts e.message
180
+ end
181
+ name = name.gsub(/_updated/, "")
182
+ cmd = `mv #{f} #{sample_dir}/name`
183
+ if options[:debug]
184
+ STDERR.puts cmd if options[:debug]
185
+ else
186
+ status = system(cmd)
187
+ raise "Was not able to rename and move file #{f}!" if status!=true
188
+ end
189
+ end
179
190
 
180
191
  end
@@ -49,22 +49,26 @@ class Merger
49
49
  #end
50
50
 
51
51
  def merge()
52
+ statistics = Array.new(@sample_ids.length()+2,0)
53
+
52
54
  fwd_file = Zlib::GzipReader.open(@fwd)
53
55
  rev_file = Zlib::GzipReader.open(@rev)
54
56
  fwd_splitted_files = []
55
57
  rev_splitted_files = []
56
58
  fwd_out_files = []
57
59
  rev_out_files = []
58
- fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched_updated.fq",'w')
59
- rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched_updated.fq",'w')
60
+ fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
61
+ rev_out_unmatched = File.open(@outdir+"/R2_#{@number}.unmatched.updated.fq",'w')
60
62
  @sample_ids.each_with_index do |sample_id, i|
61
63
  fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
62
64
  rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
63
65
  #OUTFILES????
64
- fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}_updated.fq",'w')
65
- rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}_updated.fq",'w')
66
+ fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
67
+ rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
66
68
  end
69
+
67
70
  fwd_file.each do |fwd_line|
71
+ statistics[-1] += 1
68
72
  rev_line = rev_file.readline()
69
73
  rev_name = rev_line.split(" ")
70
74
  fwd_name = fwd_line.split(" ")
@@ -72,9 +76,11 @@ class Merger
72
76
 
73
77
  @sample_ids.each_with_index do |sample_id, i|
74
78
  if !fwd_splitted_files[i].eof?
75
- compare_line_fwd = fwd_splitted_files[i].readline().split(" ")
76
- if fwd_line[0] == compare_line_fwd[0] && marker
79
+ compare_line_fwd = fwd_splitted_files[i].readline()
80
+ name_compare_fwd = compare_line_fwd.split(" ")
81
+ if fwd_line[0] == name_compare_fwd[0] && marker
77
82
  marker = false
83
+ statistics[i] += 1
78
84
  fwd_out_files[i].write(fwd_line)
79
85
  rev_out_files[i].write(rev_line)
80
86
  for k in 1..3
@@ -87,21 +93,31 @@ class Merger
87
93
  end
88
94
  fwd_file.lineno = fwd_file.lineno - 1
89
95
  rev_file.lineno = rev_file.lineno - 1
96
+ else
97
+ #puts compare_line_fwd
98
+ fwd_splitted_files[i].pos = fwd_splitted_files[i].pos - compare_line_fwd.length()
90
99
  end
91
100
  end
101
+
92
102
  if !marker && !rev_splitted_files[i].eof?
93
- compare_line_rev = rev_splitted_files[i].readline().split(" ")
94
- if compare_line_rev[0] == compare_line_fwd[0]
103
+ compare_line_rev = rev_splitted_files[i].readline()
104
+ name_compare_rev = compare_line_rev.split(" ")
105
+ if name_compare_rev[0] == name_compare_fwd[0]
95
106
  for k in 1..3
96
- rev_splitted_files[i].readline()
107
+ compare_line_rev = rev_splitted_files[i].readline()
97
108
  end
109
+ else
110
+ #puts compare_line_rev
111
+ rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
98
112
  end
99
113
  end
100
114
  break if !marker
101
115
  if !rev_splitted_files[i].eof?
102
- compare_line_rev = rev_splitted_files[i].readline().split(" ")
103
- if rev_name[0] == compare_line_rev[0] && marker
116
+ compare_line_rev = rev_splitted_files[i].readline()
117
+ name_compare_rev = compare_line_rev.split(" ")
118
+ if rev_name[0] == name_compare_rev[0] && marker
104
119
  marker = false
120
+ statistics[i] += 1
105
121
  fwd_out_files[i].write(fwd_line)
106
122
  rev_out_files[i].write(rev_line)
107
123
  for k in 1..3
@@ -114,12 +130,15 @@ class Merger
114
130
  end
115
131
  fwd_file.lineno = fwd_file.lineno - 1
116
132
  rev_file.lineno = rev_file.lineno - 1
133
+ else
134
+ rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
117
135
  end
118
136
  end
119
137
  break if !marker
120
138
  end
121
139
 
122
140
  if marker
141
+ statistics[-2] += 1
123
142
  fwd_out_unmatched.write(fwd_line)
124
143
  rev_out_unmatched.write(rev_line)
125
144
  for k in 1..3
@@ -127,9 +146,12 @@ class Merger
127
146
  fwd_out_unmatched.write(fwd_file.readline())
128
147
  end
129
148
  end
130
-
131
149
  end
132
-
133
-
150
+ stats = ""
151
+ @sample_ids.each_with_index do |id,i|
152
+ stats += id +"/t" + statistics[i].to_s + "/n"
153
+ end
154
+ stats += "unmatched/t" + statistics[-2].to_s + "/n"
155
+ stats += "total/t" + statistics[-1].to_s + "/n"
134
156
  end
135
157
  end
@@ -3,27 +3,25 @@ require "erubis"
3
3
  class NugenTemplate
4
4
 
5
5
  def initialize(fastq_multx, options)
6
- @template =<<EOF
7
- gunzip -c <%= @read %> | #{fastq_multx} \\
8
- --bcfile <%= @barcodes %> #{options} <%= @options %> \\
9
- --prefix <%= @lane_dir %>/<%= @direction %> \\
10
- --suffix ".fq"
11
- EOF
12
6
  # @template =<<EOF
13
- ##{fastq_multx} #{options} <%= @barcodes %> \\
14
- # <(gunzip -c <%= @read %>) \\
15
- # -o <%= @lane_dir %>/<%= @direction %>.%.fq \\
16
- # >> <%= @lane_dir %>/nugen_demultiplexing.log
7
+ #gunzip -c <%= @read %> | #{fastq_multx} \\
8
+ # --bcfile <%= @barcodes %> #{options} <%= @options %> \\
9
+ # --prefix <%= @lane_dir %>/<%= @direction %> \\
10
+ # --suffix ".fq"
17
11
  #EOF
12
+ @template =<<EOF
13
+ #{fastq_multx} #{options} <%= @barcodes %> \\
14
+ <(gunzip -c <%= @read %>) \\
15
+ -o <%= @lane_dir %>/<%= @direction %>.%.fq \\
16
+ >> <%= @lane_dir %>/nugen_demultiplexing.log
17
+ EOF
18
18
  end
19
19
 
20
20
  def fill(lane, number, lane_dir, barcodes, read, is_fwd)
21
21
  if is_fwd
22
22
  direction = "R1_#{number}_"
23
- options = "--bol"
24
23
  else
25
24
  direction = "R2_#{number}_"
26
- options = "--eol"
27
25
  end
28
26
  context = {
29
27
  :lane => lane,
@@ -32,7 +30,7 @@ EOF
32
30
  :barcodes => barcodes,
33
31
  :read => read,
34
32
  :direction => direction,
35
- :options => options
33
+ #:options => options
36
34
  }
37
35
 
38
36
  eruby = Erubis::Eruby.new(@template)
@@ -1,3 +1,4 @@
1
+ require 'csv'
1
2
  class Statistics
2
3
 
3
4
  def initialize(lane_log)
@@ -54,11 +54,12 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
54
54
  end
55
55
 
56
56
  def test_merger
57
- merger = Merger.new("test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R1_001.fastq.gz",
58
- "test/fixtures/Sample_Lane5/Lane5_NoIndex_L005_R2_001.fastq.gz",
57
+ merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
58
+ "test/fixtures/Sample_Lane5/l.gz",
59
59
  "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
60
60
  assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
61
- #merger.merge
61
+ stats = merger.merge()
62
+ assert_equal([28, 18, 26, 21, 7],stats)
62
63
  #assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
63
64
  end
64
65
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nugen_barcode_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: