nugen_barcode_splitter 0.0.18 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ require "nugen_barcode_splitter"
5
5
  require "fileutils"
6
6
 
7
7
  usage =<<EOF
8
- V E R S I O N - 0 . 0 . 17 - BETA
8
+ V E R S I O N - 1 . 0 . 0
9
9
  #{$0} [options] -p project_dir -o out_dir -c sample_sheet_nugen
10
10
  _____________________________________________________________________________
11
11
 
@@ -33,7 +33,7 @@ options = {
33
33
  :bel_only? => "",
34
34
  :mismatches => "",
35
35
  :keep_barcode => "",
36
- :fastq_multx => "fastq_multx",
36
+ :fastx => "fastq_multx",
37
37
  :debug => false,
38
38
  :lane_number => ""
39
39
  }
@@ -79,8 +79,8 @@ optparse = OptionParser.new do |opts|
79
79
  options[:lane_number] = i if i
80
80
  end
81
81
 
82
- opts.on("-x", "--fastq_multx DIR", String) do |i|
83
- options[:fastq_multx] = i if i
82
+ opts.on("-x", "--fastx_barcode_splitter.pl DIR", String) do |i|
83
+ options[:fastx] = i if i
84
84
  end
85
85
 
86
86
  opts.on("-d", "--debug", "Debug mode!") do |i|
@@ -117,7 +117,7 @@ sample_sheet.create_barcode_txt("#{options[:out_dir]}/barcode")
117
117
 
118
118
  # Prepare template
119
119
  multx_opts = "#{options[:keep_barcode]} #{options[:eol_only]} #{options[:bol_only]} #{options[:mismatches]}"
120
- nugen_temp = NugenTemplate.new(options[:fastq_multx],multx_opts)
120
+ nugen_temp = NugenTemplate.new(options[:fastx],multx_opts)
121
121
 
122
122
  Dir.glob(options[:project_dir]+"/*").each do |p|
123
123
  next unless File.directory? p
@@ -147,24 +147,18 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
147
147
  status = system('bash', '-c', cmd)
148
148
  raise "Calling the template for fwd did not succeed!" if !status
149
149
  end
150
- #cmd = nugen_temp.fill(lane,number,outdir, barcodes, rev, false)
151
- #if options[:debug]
152
- # STDERR.puts cmd if options[:debug]
153
- #else
154
- # status = system('bash', '-c', cmd)
155
- # raise "Calling the template for rev did not succeed!" if !status
156
- #end
150
+
157
151
  # Merging
158
152
  merger = Merger.new(fwd,rev,outdir,number,barcodes)
159
153
  stats = merger.merge()
160
- stats_file = File.open("#{outdir}/nugen_demultiplexing.log", 'a')
154
+ stats_file = File.open("#{outdir}/nugen_demultiplexing_Lane#{lane}.log", 'a')
161
155
  stats_file.write(stats)
162
156
  stats_file.close()
163
157
  end
164
158
 
165
159
  # Add the trimmed bases
166
160
  Dir.glob(outdir+"/*").each do |f|
167
- if f =~ /nugen_demultiplexing.log/
161
+ if f =~ /nugen_demultiplexing/
168
162
  sample_sheet.lanes
169
163
  statistics = Statistics.new(f)
170
164
  stats_out = outdir + "/statistics"
@@ -189,6 +183,13 @@ Dir.glob(options[:project_dir]+"/*").each do |p|
189
183
  status = system(cmd)
190
184
  raise "Was not able to rename and move file #{f}!" if status!=true
191
185
  end
186
+ cmd = "gzip #{sample_dir}/#{name}"
187
+ if options[:debug]
188
+ STDERR.puts cmd if options[:debug]
189
+ else
190
+ status = system(cmd)
191
+ raise "Was not able to gzip file #{f}"
192
+ end
192
193
  end
193
194
 
194
195
  end
@@ -19,39 +19,11 @@ class Merger
19
19
 
20
20
  attr_accessor :sample_ids
21
21
 
22
- #def prepare_hash()
23
- # @sample_ids.each_with_index do |sample_id, i|
24
- # a = Thread.new {
25
- # filehandler = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
26
- # filehandler.each do |line|
27
- # next unless line.include?("@HWI-")
28
- # line = line.split(" ")
29
- # name = line[0].split(":")[4..-1].join(":")
30
- # @values_fwd[i].store(name,filehandler.pos)
31
- # end
32
- # filehandler.close()
33
- # }
34
- # b = Thread.new {
35
- # filehandler = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
36
- # filehandler.each do |line|
37
- # next unless line.include?("@HWI-")
38
- # line = line.split(" ")
39
- # name = line[0].split(":")[4..-1].join(":")
40
- # @values_rev[i].store(name,filehandler.pos)
41
- # end
42
- # filehandler.close()
43
- # }
44
- # a.join
45
- # b.join
46
- # end
47
- #end
48
-
49
22
  def merge()
50
23
  statistics = Array.new(@sample_ids.length()+2,0)
51
24
  fwd_file = Zlib::GzipReader.open(@fwd)
52
25
  rev_file = Zlib::GzipReader.open(@rev)
53
26
  fwd_splitted_files = []
54
- rev_splitted_files = []
55
27
  fwd_out_files = []
56
28
  rev_out_files = []
57
29
  fwd_out_unmatched = File.open(@outdir+"/R1_#{@number}.unmatched.updated.fq",'w')
@@ -59,8 +31,6 @@ class Merger
59
31
 
60
32
  @sample_ids.each_with_index do |sample_id, i|
61
33
  fwd_splitted_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.fq")
62
- #rev_splitted_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.fq")
63
- #OUTFILES????
64
34
  fwd_out_files[i] = File.open(@outdir+"/R1_#{@number}.#{sample_id}.updated.fq",'w')
65
35
  rev_out_files[i] = File.open(@outdir+"/R2_#{@number}.#{sample_id}.updated.fq",'w')
66
36
  end
@@ -72,7 +42,6 @@ class Merger
72
42
  fwd_name = fwd_line.split(" ")
73
43
  marker = true
74
44
 
75
-
76
45
  @sample_ids.each_with_index do |sample_id, i|
77
46
  if !fwd_splitted_files[i].eof? && marker
78
47
  compare_line_fwd = fwd_splitted_files[i].readline()
@@ -98,47 +67,6 @@ class Merger
98
67
  end
99
68
  end
100
69
 
101
- # if !marker && !rev_splitted_files[i].eof?
102
- # compare_line_rev = rev_splitted_files[i].readline()
103
- # name_compare_rev = compare_line_rev.split(" ")
104
- # if name_compare_rev[0] == name_compare_fwd[0]
105
- # for k in 1..3
106
- # compare_line_rev = rev_splitted_files[i].readline()
107
- # end
108
- # else
109
- # #puts compare_line_rev
110
- # rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
111
- # end
112
- # end
113
- # break if !marker
114
- # if !rev_splitted_files[i].eof? && marker
115
- # compare_line_rev = rev_splitted_files[i].readline()
116
- # name_compare_rev = compare_line_rev.split(" ")
117
- # puts "REV: " + compare_line_rev if i == 2
118
- # #puts name_compare_rev[0]
119
- # if rev_name[0] == name_compare_rev[0]
120
- # marker = false
121
- # statistics[i] += 1
122
- # fwd_out_files[i].write(fwd_line)
123
- # rev_out_files[i].write(rev_line)
124
- # for k in 1..3
125
- # rev_file.readline()
126
- # compare_line_rev = rev_splitted_files[i].readline()
127
- # rev_out_files[i].write(compare_line_rev.gsub(/[A-Z]{4}$/,"NNNN")) if k == 1
128
- # rev_out_files[i].write(compare_line_rev) if k == 2
129
- # rev_out_files[i].write(compare_line_rev.gsub(/[\S]{4}$/,"@@@@")) if k == 3
130
- # fwd_out_files[i].write(fwd_file.readline())
131
- # end
132
- # fwd_file.lineno = fwd_file.lineno - 1
133
- # rev_file.lineno = rev_file.lineno - 1
134
- # else
135
- # rev_splitted_files[i].pos = rev_splitted_files[i].pos - compare_line_rev.length()
136
- # end
137
- # end
138
- # break if !marker
139
- #end
140
-
141
-
142
70
  if marker
143
71
  statistics[-2] += 1
144
72
  fwd_out_unmatched.write(fwd_line)
@@ -149,6 +77,10 @@ class Merger
149
77
  end
150
78
  end
151
79
  end
80
+ stats = make_stats(statistics)
81
+ end
82
+
83
+ def make_stats(statistics)
152
84
  stats = ""
153
85
  @sample_ids.each_with_index do |id,i|
154
86
  stats += id +"\t" + statistics[i].to_s + "\n"
@@ -156,4 +88,8 @@ class Merger
156
88
  stats += "unmatched\t" + statistics[-2].to_s + "\n"
157
89
  stats += "total\t" + statistics[-1].to_s + "\n"
158
90
  end
159
- end
91
+ end
92
+
93
+
94
+
95
+
@@ -42,8 +42,8 @@ class Statistics
42
42
  @barcodes.each_with_index do |code, i|
43
43
  str += "#{code}:\t#{@num_reads[i]} \n"
44
44
  end
45
- #percent = (100 / @total.to_f) * @num_unmatched.to_f
46
- #percent = (percent.to_f * 100).round / 100.to_f
45
+ percent = (100 / @total.to_f) * @num_unmatched.to_f
46
+ percent = (percent.to_f * 100).round / 100.to_f
47
47
  str += "Unmatched:\t#{@num_unmatched}\n"
48
48
  str += "Total:\t#{@total}"
49
49
  str.to_s
@@ -40,9 +40,7 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
40
40
  samplesheet = SampleSheet.new("test/fixtures/sample_sheet.csv")
41
41
  assert_equal(samplesheet.barcodes[3], "TTAG")
42
42
  assert_equal(samplesheet.sample_id[4], "RX3")
43
-
44
43
  samplesheet.create_barcode_txt("test/fixtures/barcode")
45
-
46
44
  end
47
45
 
48
46
  def test_statistics
@@ -54,17 +52,11 @@ class NugenBarcodeSplitterTest < Test::Unit::TestCase
54
52
  end
55
53
 
56
54
  def test_merger
57
- #merger = Merger.new("test/fixtures/Sample_Lane5/k.gz",
58
- # "test/fixtures/Sample_Lane5/l.gz",
59
- # "test/fixtures/Sample_Lane5", "001", "test/fixtures/barcode_5.txt")
60
- #assert_equal(merger.sample_ids, ["RX3", "RX4", "RX3X2", "RX4X2"])
61
- #stats = merger.merge()
62
- #assert_equal("RX3\t18\nRX4\t7\nRX3X2\t16\nRX4X2\t8\nunmatched\t1\ntotal\t50\n",stats)
63
55
  merger = Merger.new("test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R1_019.fastq.gz",
64
56
  "test/fixtures/Sample_Lane8/Lane8_NoIndex_L008_R2_019.fastq.gz",
65
57
  "test/fixtures/Sample_Lane8", "019", "test/fixtures/barcode_8.txt")
58
+ assert_equal(["RX9", "RX10", "RX9X2", "RX10X2"], merger.sample_ids)
66
59
  stats = merger.merge()
67
- #assert.equal(merger.values_fwd[1].value_at(),{"sfggf"=>"dffg"})
68
60
  assert_equal("RX9\t22464\nRX10\t28699\nRX9X2\t26434\nRX10X2\t22994\nunmatched\t15445\ntotal\t116036\n",stats)
69
61
  end
70
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nugen_barcode_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,19 +9,17 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-22 00:00:00.000000000Z
12
+ date: 2012-02-24 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: ! "This gem is designed to demultiplex reads\n produced
15
15
  by Illumina with Nugen\n (http://www.nugeninc.com/nugen/) barcodes."
16
16
  email:
17
17
  - katharinaehayer@gmail.com
18
18
  executables:
19
- - base_adder
20
19
  - nugen_barcode_splitter
21
20
  extensions: []
22
21
  extra_rdoc_files: []
23
22
  files:
24
- - bin/base_adder
25
23
  - bin/nugen_barcode_splitter
26
24
  - lib/nugen_barcode_splitter.rb
27
25
  - lib/nugen_barcode_splitter/fastq.rb
data/bin/base_adder DELETED
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require "nugen_barcode_splitter"
3
-
4
- usage =<<EOF
5
- #{$0} file.fq number_of_bases out.fq
6
-
7
- -_-_-_-_- #{$0} -_-_-_-_-
8
-
9
- Adds N^P bases to the beginning of the
10
- fastq file. The Quality is set on the
11
- lowest value.
12
- EOF
13
-
14
- file_dir = ARGV[0]
15
- number_of_bases = ARGV[1].to_i
16
- out_dir = ARGV[2]
17
-
18
- fastq = Fastq.new("#{file_dir}")
19
- fastq.add(number_of_bases, out_dir)
20
- fastq.close
21
-
22
-