bio-gadget 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.org +1 -0
- data/bio-gadget.gemspec +2 -1
- data/lib/bio-gadget.rb +1 -0
- data/lib/bio-gadget/demlt.rb +82 -0
- data/lib/bio-gadget/version.rb +1 -1
- metadata +35 -2
data/README.org
CHANGED
@@ -17,6 +17,7 @@ Currently available commands are
|
|
17
17
|
|
18
18
|
: bio
|
19
19
|
: ---
|
20
|
+
: gthor bio:demlt BC POS # demultiplex fastq (via STDIN) by barcodes
|
20
21
|
: gthor bio:fqlzma # automatic (re)compression of *.fq(.gz|.bz2) files
|
21
22
|
: gthor bio:qvstat QUAL # statistics of quality values in *.qual file
|
22
23
|
: gthor bio:wigchr WIG CHR # extract wiggle track on specified chromosome
|
data/bio-gadget.gemspec
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
1
|
require File.expand_path('../lib/bio-gadget/version', __FILE__)
|
3
2
|
|
4
3
|
Gem::Specification.new do |gem|
|
@@ -17,4 +16,6 @@ Gem::Specification.new do |gem|
|
|
17
16
|
|
18
17
|
gem.add_dependency 'gthor'
|
19
18
|
gem.add_dependency 'parallel'
|
19
|
+
gem.add_dependency 'levenshtein-ffi'
|
20
|
+
gem.add_dependency 'bio-faster'
|
20
21
|
end
|
data/lib/bio-gadget.rb
CHANGED
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'bio-faster'
|
2
|
+
require 'levenshtein'
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class Gadget < Thor
|
7
|
+
namespace :bio
|
8
|
+
|
9
|
+
desc 'demlt BC POS', 'demultiplex fastq (via STDIN) by barcodes'
|
10
|
+
def demlt(bcfile, tmpofs)
|
11
|
+
|
12
|
+
ofs = tmpofs.to_i
|
13
|
+
|
14
|
+
wells = Array.new
|
15
|
+
bcs = Array.new
|
16
|
+
bclens = Array.new
|
17
|
+
open(bcfile).each do |line|
|
18
|
+
cols = line.rstrip.split
|
19
|
+
wells.push(cols[0])
|
20
|
+
bcs.push(cols[1])
|
21
|
+
bclens.push(cols[1].length)
|
22
|
+
end
|
23
|
+
|
24
|
+
bclens.uniq!
|
25
|
+
if bclens.size != 1
|
26
|
+
raise 'Inconsistent barcode sequences'
|
27
|
+
end
|
28
|
+
bclen = bclens[0]
|
29
|
+
|
30
|
+
ts = Array.new
|
31
|
+
qs = Array.new
|
32
|
+
(wells + ['other']).each { |well|
|
33
|
+
q = Queue.new
|
34
|
+
t = Thread.new(well, q) do |well|
|
35
|
+
fp = open("| gzip -c > #{well}.fq.gz", 'w')
|
36
|
+
while vals = q.shift
|
37
|
+
if vals == ""
|
38
|
+
break
|
39
|
+
else
|
40
|
+
fp.puts(vals)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
fp.close()
|
44
|
+
end
|
45
|
+
qs.push(q)
|
46
|
+
ts.push(t)
|
47
|
+
}
|
48
|
+
|
49
|
+
reads = Array.new(bcs.size+1, 0)
|
50
|
+
tmpdist = Hash.new
|
51
|
+
Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
|
52
|
+
seqbc = seq[ofs, bclen]
|
53
|
+
bcs.each_index do |i|
|
54
|
+
tmpdist[i] = Levenshtein.distance(bcs[i], seqbc)
|
55
|
+
end
|
56
|
+
dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
|
57
|
+
if dists[0][1] < dists[1][1] && dists[0][1] < 2
|
58
|
+
idx = dists[0][0]
|
59
|
+
qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
60
|
+
reads[idx] = reads[idx]+1
|
61
|
+
else
|
62
|
+
qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
63
|
+
reads[-1] = reads[-1]+1
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
qs.each { |q| q.push('') }
|
68
|
+
ts.each { |t| t.join }
|
69
|
+
|
70
|
+
total = 0
|
71
|
+
bcs.each_index { |i|
|
72
|
+
r = reads[i]
|
73
|
+
puts "#{bcs[i]}\t#{r}\t#{wells[i]}.fq.gz"
|
74
|
+
total = total+r
|
75
|
+
}
|
76
|
+
puts "Other\t#{reads[-1]}\tother.fq.gz"
|
77
|
+
puts '===='
|
78
|
+
puts "Total\t#{total+reads[-1]}"
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/bio-gadget/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gadget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: gthor
|
@@ -43,6 +43,38 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: levenshtein-ffi
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: bio-faster
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
46
78
|
description: Gadgets for bioinformatics
|
47
79
|
email:
|
48
80
|
- shintaro.katayama@gmail.com
|
@@ -58,6 +90,7 @@ files:
|
|
58
90
|
- Rakefile
|
59
91
|
- bio-gadget.gemspec
|
60
92
|
- lib/bio-gadget.rb
|
93
|
+
- lib/bio-gadget/demlt.rb
|
61
94
|
- lib/bio-gadget/fqlzma.rb
|
62
95
|
- lib/bio-gadget/qvstat.rb
|
63
96
|
- lib/bio-gadget/version.rb
|