bio-gadget 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bio-gadget/demlt.rb +55 -21
- data/lib/bio-gadget/version.rb +1 -1
- metadata +2 -2
data/lib/bio-gadget/demlt.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'bio-faster'
|
2
2
|
require 'levenshtein'
|
3
|
+
require 'parallel'
|
3
4
|
require 'thread'
|
4
5
|
|
5
6
|
module Bio
|
@@ -7,6 +8,7 @@ module Bio
|
|
7
8
|
namespace :bio
|
8
9
|
|
9
10
|
desc 'demlt BC POS', 'demultiplex fastq (via STDIN) by barcodes'
|
11
|
+
option :destdir, :type => :string, :default => '.'
|
10
12
|
def demlt(bcfile, tmpofs)
|
11
13
|
|
12
14
|
ofs = tmpofs.to_i
|
@@ -30,14 +32,18 @@ module Bio
|
|
30
32
|
ts = Array.new
|
31
33
|
qs = Array.new
|
32
34
|
(wells + ['other']).each { |well|
|
33
|
-
q =
|
34
|
-
t = Thread.new(well, q) do |well|
|
35
|
-
|
35
|
+
q = SizedQueue.new(100000)
|
36
|
+
t = Thread.new(well, q) do |well, q|
|
37
|
+
tc = Thread.current
|
38
|
+
tc[:file] = "#{options[:destdir]}/#{well}.fq.gz"
|
39
|
+
fp = open("| gzip -c > #{tc[:file]}", 'w')
|
40
|
+
tc[:read] = 0
|
36
41
|
while vals = q.shift
|
37
42
|
if vals == ""
|
38
43
|
break
|
39
44
|
else
|
40
45
|
fp.puts(vals)
|
46
|
+
tc[:read] = tc[:read] + 1
|
41
47
|
end
|
42
48
|
end
|
43
49
|
fp.close()
|
@@ -46,37 +52,65 @@ module Bio
|
|
46
52
|
ts.push(t)
|
47
53
|
}
|
48
54
|
|
49
|
-
|
50
|
-
tmpdist = Hash.new
|
55
|
+
seqs = Array.new
|
51
56
|
Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
|
57
|
-
if dists[0][1] < dists[1][1] && dists[0][1] < 2
|
58
|
-
idx = dists[0][0]
|
59
|
-
qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
60
|
-
reads[idx] = reads[idx]+1
|
61
|
-
else
|
62
|
-
qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
63
|
-
reads[-1] = reads[-1]+1
|
57
|
+
seqs.push([seqid, seq, qvs])
|
58
|
+
if seqs.size == 100000 * Parallel.processor_count
|
59
|
+
parallel_Levenshtein(seqs, bcs, ofs, bclen, qs)
|
60
|
+
seqs = Array.new
|
64
61
|
end
|
65
62
|
end
|
63
|
+
if seqs.size > 0
|
64
|
+
parallel_Levenshtein(seqs, bcs, ofs, bclen, qs)
|
65
|
+
end
|
66
66
|
|
67
67
|
qs.each { |q| q.push('') }
|
68
68
|
ts.each { |t| t.join }
|
69
69
|
|
70
70
|
total = 0
|
71
71
|
bcs.each_index { |i|
|
72
|
-
|
73
|
-
|
72
|
+
t = ts[i]
|
73
|
+
r = t[:read]
|
74
|
+
puts "#{bcs[i]}\t#{r}\t#{t[:file]}"
|
74
75
|
total = total+r
|
75
76
|
}
|
76
|
-
|
77
|
+
t = ts[-1]
|
78
|
+
r = t[:read]
|
79
|
+
puts "Other\t#{r}\t#{t[:file]}"
|
77
80
|
puts '===='
|
78
|
-
puts "Total\t#{total+
|
81
|
+
puts "Total\t#{total+r}"
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
protected
|
86
|
+
|
87
|
+
def parallel_Levenshtein(seqs, bcs, ofs, bclen, qs)
|
88
|
+
|
89
|
+
tmpdists = Parallel.map_with_index(bcs, :in_processes => Parallel.processor_count) do |bc, bcidx|
|
90
|
+
tmpdist = Array.new
|
91
|
+
seqs.each_index do |seqidx|
|
92
|
+
seqbc = seqs[seqidx][1][ofs, bclen]
|
93
|
+
tmpdist.push(Levenshtein.distance(bc, seqbc))
|
94
|
+
end
|
95
|
+
tmpdist
|
96
|
+
end
|
97
|
+
|
98
|
+
tmpdist = Hash.new
|
99
|
+
seqs.each_index do |seqidx|
|
100
|
+
seqid, seq, qvs = seqs[seqidx]
|
101
|
+
bcs.each_index do |bcidx|
|
102
|
+
tmpdist[bcidx] = tmpdists[bcidx][seqidx]
|
103
|
+
end
|
104
|
+
dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
|
105
|
+
if dists[0][1] < dists[1][1] && dists[0][1] < 2
|
106
|
+
idx = dists[0][0]
|
107
|
+
qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
108
|
+
else
|
109
|
+
qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
110
|
+
end
|
111
|
+
end
|
79
112
|
|
80
113
|
end
|
114
|
+
|
81
115
|
end
|
82
116
|
end
|
data/lib/bio-gadget/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: gthor
|