bio-gadget 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bio-gadget.rb +8 -2
- data/lib/bio-gadget/demlt.rb +31 -72
- data/lib/bio-gadget/version.rb +1 -1
- metadata +2 -2
data/lib/bio-gadget.rb
CHANGED
@@ -24,12 +24,18 @@ module Bio
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
@@mytemppaths = Array.new
|
28
|
+
|
29
|
+
def mytemppath(basename, tmpdir = Dir::tmpdir)
|
28
30
|
fp = Tempfile.open(basename, tmpdir)
|
29
31
|
path = fp.path
|
30
|
-
|
32
|
+
@@mytemppaths.push(path)
|
33
|
+
fp.close!
|
31
34
|
path
|
32
35
|
end
|
33
36
|
|
37
|
+
END {
|
38
|
+
@@mytemppaths.each { |path| File.unlink(path) if File.exist?(path) }
|
39
|
+
}
|
34
40
|
end
|
35
41
|
end
|
data/lib/bio-gadget/demlt.rb
CHANGED
@@ -33,88 +33,69 @@ module Bio
|
|
33
33
|
procs = Parallel.processor_count
|
34
34
|
|
35
35
|
fifo1paths = Array.new
|
36
|
-
procs.times { |i|
|
36
|
+
procs.times { |i|
|
37
|
+
fifo1path = mytemppath('fifo1-')
|
38
|
+
File.mkfifo(fifo1path)
|
39
|
+
fifo1paths.push(fifo1path)
|
40
|
+
}
|
37
41
|
pid = Kernel.fork {
|
38
42
|
fifo1s = Array.new
|
39
|
-
fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w
|
43
|
+
fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
|
40
44
|
total = 0
|
41
45
|
Bio::Faster.new(:stdin).each_record(:quality => :raw) do |vals|
|
42
46
|
fifo1 = fifo1s[total % procs]
|
43
47
|
fifo1.puts(vals.join("\t"))
|
44
|
-
fifo1.flush
|
45
48
|
total += 1
|
46
49
|
end
|
47
|
-
fifo1s.each { |fifo1| fifo1.
|
50
|
+
fifo1s.each { |fifo1| fifo1.close }
|
48
51
|
Kernel.exit!
|
49
52
|
}
|
50
53
|
|
51
|
-
fifo1paths.each { |fifo1path|
|
52
|
-
until File.exist?(fifo1path)
|
53
|
-
sleep 1
|
54
|
-
end
|
55
|
-
}
|
56
|
-
|
57
54
|
fifo2paths = Array.new
|
58
55
|
procs.times { |i|
|
59
|
-
fifo2path =
|
56
|
+
fifo2path = mytemppath('fifo2-')
|
57
|
+
File.mkfifo(fifo2path)
|
60
58
|
fifo2paths.push(fifo2path)
|
61
59
|
pid = Kernel.fork {
|
62
|
-
open(fifo2path, 'w
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
bcs.each_index { |bcidx|
|
74
|
-
tmpdists[bcidx] = Levenshtein.distance(bcs[bcidx], seq[ofs, bclen])
|
75
|
-
}
|
76
|
-
dists = tmpdists.sort { |a, b| a[1] <=> b[1] }
|
77
|
-
bc = dists[0][1] < 2 && dists[0][1] < dists[1][1] ? dists[0][0] : -1
|
78
|
-
fifo2.puts("#{bc}\t#{seqid}\t#{seq}\t#{qvs}")
|
79
|
-
fifo2.flush
|
80
|
-
end
|
81
|
-
end
|
82
|
-
fifo1.close
|
83
|
-
fifo2.puts('*')
|
60
|
+
open(fifo2path, 'w') { |fifo2|
|
61
|
+
open(fifo1paths[i], 'r').each { |line|
|
62
|
+
seqid, seq, qvs = line.rstrip.split(/\t/)
|
63
|
+
tmpdists = Hash.new
|
64
|
+
bcs.each_index { |bcidx|
|
65
|
+
tmpdists[bcidx] = Levenshtein.distance(bcs[bcidx], seq[ofs, bclen])
|
66
|
+
}
|
67
|
+
dists = tmpdists.sort { |a, b| a[1] <=> b[1] }
|
68
|
+
bc = dists[0][1] < 2 && dists[0][1] < dists[1][1] ? dists[0][0] : -1
|
69
|
+
fifo2.puts("#{bc}\t#{seqid}\t#{seq}\t#{qvs}")
|
70
|
+
}
|
84
71
|
}
|
85
72
|
Kernel.exit!
|
86
73
|
}
|
87
74
|
}
|
88
75
|
|
89
|
-
fifo2paths.each { |fifo2path|
|
90
|
-
until File.exist?(fifo2path)
|
91
|
-
sleep 1
|
92
|
-
end
|
93
|
-
}
|
94
|
-
|
95
76
|
tmpwells = wells + ['other']
|
96
77
|
|
97
78
|
fifo3paths = Array.new
|
98
|
-
tmpwells.each_index { |i|
|
79
|
+
tmpwells.each_index { |i|
|
80
|
+
fifo3path = mytemppath('fifo3-')
|
81
|
+
File.mkfifo(fifo3path)
|
82
|
+
fifo3paths.push(fifo3path)
|
83
|
+
}
|
99
84
|
pid = Kernel.fork {
|
100
85
|
fifo2s = Array.new
|
101
|
-
fifo2paths.each { |fifo2path| fifo2s.push(open(fifo2path, 'r
|
86
|
+
fifo2paths.each { |fifo2path| fifo2s.push(open(fifo2path, 'r')) }
|
102
87
|
fifo2done = Hash.new
|
103
88
|
fifo3s = Array.new
|
104
|
-
fifo3paths.each { |fifo3path| fifo3s.push(open(fifo3path, 'w
|
89
|
+
fifo3paths.each { |fifo3path| fifo3s.push(open(fifo3path, 'w')) }
|
105
90
|
fifo2s.cycle { |fifo2|
|
106
91
|
unless fifo2done.key?(fifo2)
|
107
92
|
line = fifo2.gets
|
108
93
|
if line.nil?
|
109
|
-
sleep 1
|
110
|
-
elsif line == "*\n"
|
111
|
-
# puts("#{fifo2} eof.")
|
112
94
|
fifo2done[fifo2] = ''
|
113
95
|
else
|
114
96
|
bcs, seqid, seq, qvs = line.rstrip.split(/\t/)
|
115
97
|
fifo3 = fifo3s[bcs.to_i]
|
116
98
|
fifo3.puts([seqid, seq, qvs].join("\t"))
|
117
|
-
fifo3.flush
|
118
99
|
end
|
119
100
|
end
|
120
101
|
if fifo2done.size == fifo2s.size
|
@@ -122,16 +103,10 @@ module Bio
|
|
122
103
|
end
|
123
104
|
}
|
124
105
|
fifo2s.each { |fifo2| fifo2.close }
|
125
|
-
fifo3s.each { |fifo3| fifo3.
|
106
|
+
fifo3s.each { |fifo3| fifo3.close }
|
126
107
|
Kernel.exit!
|
127
108
|
}
|
128
109
|
|
129
|
-
fifo3paths.each { |fifo3path|
|
130
|
-
until File.exist?(fifo3path)
|
131
|
-
sleep 1
|
132
|
-
end
|
133
|
-
}
|
134
|
-
|
135
110
|
tmpwells.each_index { |i|
|
136
111
|
well = tmpwells[i]
|
137
112
|
outpath = "#{options['output-dir']}/#{well}.fq.xz"
|
@@ -139,30 +114,14 @@ module Bio
|
|
139
114
|
left = ofs+bclen
|
140
115
|
right = ofs+bclen+len-1
|
141
116
|
preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
|
142
|
-
|
117
|
+
ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
|
143
118
|
| sort -k 1 -r | cut -f 2- | uniq -f 2 \\
|
144
119
|
| ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[2][#{left}..-1], "+", $F[1][#{left}..-1]].join("\\n"))' \\
|
145
120
|
DEDUPandFORMAT
|
146
121
|
: <<"FORMAT"
|
147
|
-
|
122
|
+
ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
|
148
123
|
FORMAT
|
149
|
-
preprocess
|
150
|
-
open(preprocess, 'w') { |fp|
|
151
|
-
fifo3 = open(fifo3paths[i], 'r+')
|
152
|
-
while true
|
153
|
-
line = fifo3.gets
|
154
|
-
if line.nil?
|
155
|
-
sleep 1
|
156
|
-
elsif line == "*\n"
|
157
|
-
break
|
158
|
-
else
|
159
|
-
fp.puts(line)
|
160
|
-
fp.flush
|
161
|
-
end
|
162
|
-
end
|
163
|
-
fifo3.close
|
164
|
-
}
|
165
|
-
Kernel.exit!
|
124
|
+
exec preprocess+"| xz -z -c -e > #{outpath}"
|
166
125
|
}
|
167
126
|
}
|
168
127
|
|
data/lib/bio-gadget/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: gthor
|