bio-gadget 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.org +1 -0
- data/bio-gadget.gemspec +2 -1
- data/lib/bio-gadget.rb +1 -0
- data/lib/bio-gadget/demlt.rb +82 -0
- data/lib/bio-gadget/version.rb +1 -1
- metadata +35 -2
data/README.org
CHANGED
|
@@ -17,6 +17,7 @@ Currently available commands are
|
|
|
17
17
|
|
|
18
18
|
: bio
|
|
19
19
|
: ---
|
|
20
|
+
: gthor bio:demlt BC POS # demultiplex fastq (via STDIN) by barcodes
|
|
20
21
|
: gthor bio:fqlzma # automatic (re)compression of *.fq(.gz|.bz2) files
|
|
21
22
|
: gthor bio:qvstat QUAL # statistics of quality values in *.qual file
|
|
22
23
|
: gthor bio:wigchr WIG CHR # extract wiggle track on specified chromosome
|
data/bio-gadget.gemspec
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
|
2
1
|
require File.expand_path('../lib/bio-gadget/version', __FILE__)
|
|
3
2
|
|
|
4
3
|
Gem::Specification.new do |gem|
|
|
@@ -17,4 +16,6 @@ Gem::Specification.new do |gem|
|
|
|
17
16
|
|
|
18
17
|
gem.add_dependency 'gthor'
|
|
19
18
|
gem.add_dependency 'parallel'
|
|
19
|
+
gem.add_dependency 'levenshtein-ffi'
|
|
20
|
+
gem.add_dependency 'bio-faster'
|
|
20
21
|
end
|
data/lib/bio-gadget.rb
CHANGED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require 'bio-faster'
|
|
2
|
+
require 'levenshtein'
|
|
3
|
+
require 'thread'
|
|
4
|
+
|
|
5
|
+
module Bio
|
|
6
|
+
class Gadget < Thor
|
|
7
|
+
namespace :bio
|
|
8
|
+
|
|
9
|
+
desc 'demlt BC POS', 'demultiplex fastq (via STDIN) by barcodes'
|
|
10
|
+
def demlt(bcfile, tmpofs)
|
|
11
|
+
|
|
12
|
+
ofs = tmpofs.to_i
|
|
13
|
+
|
|
14
|
+
wells = Array.new
|
|
15
|
+
bcs = Array.new
|
|
16
|
+
bclens = Array.new
|
|
17
|
+
open(bcfile).each do |line|
|
|
18
|
+
cols = line.rstrip.split
|
|
19
|
+
wells.push(cols[0])
|
|
20
|
+
bcs.push(cols[1])
|
|
21
|
+
bclens.push(cols[1].length)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
bclens.uniq!
|
|
25
|
+
if bclens.size != 1
|
|
26
|
+
raise 'Inconsistent barcode sequences'
|
|
27
|
+
end
|
|
28
|
+
bclen = bclens[0]
|
|
29
|
+
|
|
30
|
+
ts = Array.new
|
|
31
|
+
qs = Array.new
|
|
32
|
+
(wells + ['other']).each { |well|
|
|
33
|
+
q = Queue.new
|
|
34
|
+
t = Thread.new(well, q) do |well|
|
|
35
|
+
fp = open("| gzip -c > #{well}.fq.gz", 'w')
|
|
36
|
+
while vals = q.shift
|
|
37
|
+
if vals == ""
|
|
38
|
+
break
|
|
39
|
+
else
|
|
40
|
+
fp.puts(vals)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
fp.close()
|
|
44
|
+
end
|
|
45
|
+
qs.push(q)
|
|
46
|
+
ts.push(t)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
reads = Array.new(bcs.size+1, 0)
|
|
50
|
+
tmpdist = Hash.new
|
|
51
|
+
Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
|
|
52
|
+
seqbc = seq[ofs, bclen]
|
|
53
|
+
bcs.each_index do |i|
|
|
54
|
+
tmpdist[i] = Levenshtein.distance(bcs[i], seqbc)
|
|
55
|
+
end
|
|
56
|
+
dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
|
|
57
|
+
if dists[0][1] < dists[1][1] && dists[0][1] < 2
|
|
58
|
+
idx = dists[0][0]
|
|
59
|
+
qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
|
60
|
+
reads[idx] = reads[idx]+1
|
|
61
|
+
else
|
|
62
|
+
qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
|
|
63
|
+
reads[-1] = reads[-1]+1
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
qs.each { |q| q.push('') }
|
|
68
|
+
ts.each { |t| t.join }
|
|
69
|
+
|
|
70
|
+
total = 0
|
|
71
|
+
bcs.each_index { |i|
|
|
72
|
+
r = reads[i]
|
|
73
|
+
puts "#{bcs[i]}\t#{r}\t#{wells[i]}.fq.gz"
|
|
74
|
+
total = total+r
|
|
75
|
+
}
|
|
76
|
+
puts "Other\t#{reads[-1]}\tother.fq.gz"
|
|
77
|
+
puts '===='
|
|
78
|
+
puts "Total\t#{total+reads[-1]}"
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
data/lib/bio-gadget/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bio-gadget
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2012-
|
|
12
|
+
date: 2012-12-06 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: gthor
|
|
@@ -43,6 +43,38 @@ dependencies:
|
|
|
43
43
|
- - ! '>='
|
|
44
44
|
- !ruby/object:Gem::Version
|
|
45
45
|
version: '0'
|
|
46
|
+
- !ruby/object:Gem::Dependency
|
|
47
|
+
name: levenshtein-ffi
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
49
|
+
none: false
|
|
50
|
+
requirements:
|
|
51
|
+
- - ! '>='
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '0'
|
|
54
|
+
type: :runtime
|
|
55
|
+
prerelease: false
|
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
+
none: false
|
|
58
|
+
requirements:
|
|
59
|
+
- - ! '>='
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
- !ruby/object:Gem::Dependency
|
|
63
|
+
name: bio-faster
|
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
|
65
|
+
none: false
|
|
66
|
+
requirements:
|
|
67
|
+
- - ! '>='
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '0'
|
|
70
|
+
type: :runtime
|
|
71
|
+
prerelease: false
|
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
73
|
+
none: false
|
|
74
|
+
requirements:
|
|
75
|
+
- - ! '>='
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: '0'
|
|
46
78
|
description: Gadgets for bioinformatics
|
|
47
79
|
email:
|
|
48
80
|
- shintaro.katayama@gmail.com
|
|
@@ -58,6 +90,7 @@ files:
|
|
|
58
90
|
- Rakefile
|
|
59
91
|
- bio-gadget.gemspec
|
|
60
92
|
- lib/bio-gadget.rb
|
|
93
|
+
- lib/bio-gadget/demlt.rb
|
|
61
94
|
- lib/bio-gadget/fqlzma.rb
|
|
62
95
|
- lib/bio-gadget/qvstat.rb
|
|
63
96
|
- lib/bio-gadget/version.rb
|