bio-gadget 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.org CHANGED
@@ -17,6 +17,7 @@ Currently available commands are
17
17
 
18
18
  : bio
19
19
  : ---
20
+ : gthor bio:demlt BC POS # demultiplex fastq (via STDIN) by barcodes
20
21
  : gthor bio:fqlzma # automatic (re)compression of *.fq(.gz|.bz2) files
21
22
  : gthor bio:qvstat QUAL # statistics of quality values in *.qual file
22
23
  : gthor bio:wigchr WIG CHR # extract wiggle track on specified chromosome
data/bio-gadget.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # -*- encoding: utf-8 -*-
2
1
  require File.expand_path('../lib/bio-gadget/version', __FILE__)
3
2
 
4
3
  Gem::Specification.new do |gem|
@@ -17,4 +16,6 @@ Gem::Specification.new do |gem|
17
16
 
18
17
  gem.add_dependency 'gthor'
19
18
  gem.add_dependency 'parallel'
19
+ gem.add_dependency 'levenshtein-ffi'
20
+ gem.add_dependency 'bio-faster'
20
21
  end
data/lib/bio-gadget.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'bio-gadget/version'
2
+ require 'bio-gadget/demlt'
2
3
  require 'bio-gadget/fqlzma'
3
4
  require 'bio-gadget/qvstat'
4
5
  require 'bio-gadget/wigchr'
@@ -0,0 +1,82 @@
1
+ require 'bio-faster'
2
+ require 'levenshtein'
3
+ require 'thread'
4
+
5
+ module Bio
6
+ class Gadget < Thor
7
+ namespace :bio
8
+
9
+ desc 'demlt BC POS', 'demultiplex fastq (via STDIN) by barcodes'
10
+ def demlt(bcfile, tmpofs)
11
+
12
+ ofs = tmpofs.to_i
13
+
14
+ wells = Array.new
15
+ bcs = Array.new
16
+ bclens = Array.new
17
+ open(bcfile).each do |line|
18
+ cols = line.rstrip.split
19
+ wells.push(cols[0])
20
+ bcs.push(cols[1])
21
+ bclens.push(cols[1].length)
22
+ end
23
+
24
+ bclens.uniq!
25
+ if bclens.size != 1
26
+ raise 'Inconsistent barcode sequences'
27
+ end
28
+ bclen = bclens[0]
29
+
30
+ ts = Array.new
31
+ qs = Array.new
32
+ (wells + ['other']).each { |well|
33
+ q = Queue.new
34
+ t = Thread.new(well, q) do |well|
35
+ fp = open("| gzip -c > #{well}.fq.gz", 'w')
36
+ while vals = q.shift
37
+ if vals == ""
38
+ break
39
+ else
40
+ fp.puts(vals)
41
+ end
42
+ end
43
+ fp.close()
44
+ end
45
+ qs.push(q)
46
+ ts.push(t)
47
+ }
48
+
49
+ reads = Array.new(bcs.size+1, 0)
50
+ tmpdist = Hash.new
51
+ Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
52
+ seqbc = seq[ofs, bclen]
53
+ bcs.each_index do |i|
54
+ tmpdist[i] = Levenshtein.distance(bcs[i], seqbc)
55
+ end
56
+ dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
57
+ if dists[0][1] < dists[1][1] && dists[0][1] < 2
58
+ idx = dists[0][0]
59
+ qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
60
+ reads[idx] = reads[idx]+1
61
+ else
62
+ qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
63
+ reads[-1] = reads[-1]+1
64
+ end
65
+ end
66
+
67
+ qs.each { |q| q.push('') }
68
+ ts.each { |t| t.join }
69
+
70
+ total = 0
71
+ bcs.each_index { |i|
72
+ r = reads[i]
73
+ puts "#{bcs[i]}\t#{r}\t#{wells[i]}.fq.gz"
74
+ total = total+r
75
+ }
76
+ puts "Other\t#{reads[-1]}\tother.fq.gz"
77
+ puts '===='
78
+ puts "Total\t#{total+reads[-1]}"
79
+
80
+ end
81
+ end
82
+ end
@@ -3,7 +3,7 @@ require 'thor'
3
3
  module Bio
4
4
  class Gadget < Thor
5
5
 
6
- VERSION = "0.1.2"
6
+ VERSION = "0.1.3"
7
7
 
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-12 00:00:00.000000000 Z
12
+ date: 2012-12-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: gthor
@@ -43,6 +43,38 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: levenshtein-ffi
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: bio-faster
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
46
78
  description: Gadgets for bioinformatics
47
79
  email:
48
80
  - shintaro.katayama@gmail.com
@@ -58,6 +90,7 @@ files:
58
90
  - Rakefile
59
91
  - bio-gadget.gemspec
60
92
  - lib/bio-gadget.rb
93
+ - lib/bio-gadget/demlt.rb
61
94
  - lib/bio-gadget/fqlzma.rb
62
95
  - lib/bio-gadget/qvstat.rb
63
96
  - lib/bio-gadget/version.rb