bio-gadget 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.org CHANGED
@@ -17,6 +17,7 @@ Currently available commands are
17
17
 
18
18
  : bio
19
19
  : ---
20
+ : gthor bio:demlt BC POS # demultiplex fastq (via STDIN) by barcodes
20
21
  : gthor bio:fqlzma # automatic (re)compression of *.fq(.gz|.bz2) files
21
22
  : gthor bio:qvstat QUAL # statistics of quality values in *.qual file
22
23
  : gthor bio:wigchr WIG CHR # extract wiggle track on specified chromosome
data/bio-gadget.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # -*- encoding: utf-8 -*-
2
1
  require File.expand_path('../lib/bio-gadget/version', __FILE__)
3
2
 
4
3
  Gem::Specification.new do |gem|
@@ -17,4 +16,6 @@ Gem::Specification.new do |gem|
17
16
 
18
17
  gem.add_dependency 'gthor'
19
18
  gem.add_dependency 'parallel'
19
+ gem.add_dependency 'levenshtein-ffi'
20
+ gem.add_dependency 'bio-faster'
20
21
  end
data/lib/bio-gadget.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'bio-gadget/version'
2
+ require 'bio-gadget/demlt'
2
3
  require 'bio-gadget/fqlzma'
3
4
  require 'bio-gadget/qvstat'
4
5
  require 'bio-gadget/wigchr'
@@ -0,0 +1,82 @@
1
+ require 'bio-faster'
2
+ require 'levenshtein'
3
+ require 'thread'
4
+
5
+ module Bio
6
+ class Gadget < Thor
7
+ namespace :bio
8
+
9
+ desc 'demlt BC POS', 'demultiplex fastq (via STDIN) by barcodes'
10
+ def demlt(bcfile, tmpofs)
11
+
12
+ ofs = tmpofs.to_i
13
+
14
+ wells = Array.new
15
+ bcs = Array.new
16
+ bclens = Array.new
17
+ open(bcfile).each do |line|
18
+ cols = line.rstrip.split
19
+ wells.push(cols[0])
20
+ bcs.push(cols[1])
21
+ bclens.push(cols[1].length)
22
+ end
23
+
24
+ bclens.uniq!
25
+ if bclens.size != 1
26
+ raise 'Inconsistent barcode sequences'
27
+ end
28
+ bclen = bclens[0]
29
+
30
+ ts = Array.new
31
+ qs = Array.new
32
+ (wells + ['other']).each { |well|
33
+ q = Queue.new
34
+ t = Thread.new(well, q) do |well|
35
+ fp = open("| gzip -c > #{well}.fq.gz", 'w')
36
+ while vals = q.shift
37
+ if vals == ""
38
+ break
39
+ else
40
+ fp.puts(vals)
41
+ end
42
+ end
43
+ fp.close()
44
+ end
45
+ qs.push(q)
46
+ ts.push(t)
47
+ }
48
+
49
+ reads = Array.new(bcs.size+1, 0)
50
+ tmpdist = Hash.new
51
+ Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
52
+ seqbc = seq[ofs, bclen]
53
+ bcs.each_index do |i|
54
+ tmpdist[i] = Levenshtein.distance(bcs[i], seqbc)
55
+ end
56
+ dists = tmpdist.sort { |a, b| a[1] <=> b[1] }
57
+ if dists[0][1] < dists[1][1] && dists[0][1] < 2
58
+ idx = dists[0][0]
59
+ qs[idx].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
60
+ reads[idx] = reads[idx]+1
61
+ else
62
+ qs[-1].push(">#{seqid}\n#{seq}\n+\n#{qvs}")
63
+ reads[-1] = reads[-1]+1
64
+ end
65
+ end
66
+
67
+ qs.each { |q| q.push('') }
68
+ ts.each { |t| t.join }
69
+
70
+ total = 0
71
+ bcs.each_index { |i|
72
+ r = reads[i]
73
+ puts "#{bcs[i]}\t#{r}\t#{wells[i]}.fq.gz"
74
+ total = total+r
75
+ }
76
+ puts "Other\t#{reads[-1]}\tother.fq.gz"
77
+ puts '===='
78
+ puts "Total\t#{total+reads[-1]}"
79
+
80
+ end
81
+ end
82
+ end
@@ -3,7 +3,7 @@ require 'thor'
3
3
  module Bio
4
4
  class Gadget < Thor
5
5
 
6
- VERSION = "0.1.2"
6
+ VERSION = "0.1.3"
7
7
 
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-12 00:00:00.000000000 Z
12
+ date: 2012-12-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: gthor
@@ -43,6 +43,38 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: levenshtein-ffi
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: bio-faster
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
46
78
  description: Gadgets for bioinformatics
47
79
  email:
48
80
  - shintaro.katayama@gmail.com
@@ -58,6 +90,7 @@ files:
58
90
  - Rakefile
59
91
  - bio-gadget.gemspec
60
92
  - lib/bio-gadget.rb
93
+ - lib/bio-gadget/demlt.rb
61
94
  - lib/bio-gadget/fqlzma.rb
62
95
  - lib/bio-gadget/qvstat.rb
63
96
  - lib/bio-gadget/version.rb