bio-gadget 0.4.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ require 'test_helper'
2
+
3
+ class Bio::GadgetTest < Minitest::Test
4
+ def test_that_it_has_a_version_number
5
+ refute_nil ::Bio::Gadget::VERSION
6
+ end
7
+
8
+ def test_it_does_something_useful
9
+ assert false
10
+ end
11
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'bio/gadget'
3
+
4
+ require 'minitest/autorun'
metadata CHANGED
@@ -1,114 +1,181 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.8
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shintaro Katayama
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2013-06-09 00:00:00.000000000 Z
11
+ date: 2017-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: thor
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
15
43
  requirement: !ruby/object:Gem::Requirement
16
44
  requirements:
17
- - - '>='
45
+ - - ">="
18
46
  - !ruby/object:Gem::Version
19
47
  version: '0'
20
- type: :runtime
48
+ type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
- - - '>='
52
+ - - ">="
25
53
  - !ruby/object:Gem::Version
26
54
  version: '0'
27
55
  - !ruby/object:Gem::Dependency
28
- name: parallel
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bio
29
71
  requirement: !ruby/object:Gem::Requirement
30
72
  requirements:
31
- - - '>='
73
+ - - ">="
32
74
  - !ruby/object:Gem::Version
33
75
  version: '0'
34
76
  type: :runtime
35
77
  prerelease: false
36
78
  version_requirements: !ruby/object:Gem::Requirement
37
79
  requirements:
38
- - - '>='
80
+ - - ">="
39
81
  - !ruby/object:Gem::Version
40
82
  version: '0'
41
83
  - !ruby/object:Gem::Dependency
42
- name: levenshtein-ffi
84
+ name: damerau-levenshtein
43
85
  requirement: !ruby/object:Gem::Requirement
44
86
  requirements:
45
- - - '>='
87
+ - - ">="
46
88
  - !ruby/object:Gem::Version
47
89
  version: '0'
48
90
  type: :runtime
49
91
  prerelease: false
50
92
  version_requirements: !ruby/object:Gem::Requirement
51
93
  requirements:
52
- - - '>='
94
+ - - ">="
53
95
  - !ruby/object:Gem::Version
54
96
  version: '0'
55
97
  - !ruby/object:Gem::Dependency
56
- name: bio-faster
98
+ name: mkfifo
57
99
  requirement: !ruby/object:Gem::Requirement
58
100
  requirements:
59
- - - '>='
101
+ - - ">="
60
102
  - !ruby/object:Gem::Version
61
103
  version: '0'
62
104
  type: :runtime
63
105
  prerelease: false
64
106
  version_requirements: !ruby/object:Gem::Requirement
65
107
  requirements:
66
- - - '>='
108
+ - - ">="
67
109
  - !ruby/object:Gem::Version
68
110
  version: '0'
69
111
  - !ruby/object:Gem::Dependency
70
- name: mkfifo
112
+ name: parallel
71
113
  requirement: !ruby/object:Gem::Requirement
72
114
  requirements:
73
- - - '>='
115
+ - - ">="
74
116
  - !ruby/object:Gem::Version
75
117
  version: '0'
76
118
  type: :runtime
77
119
  prerelease: false
78
120
  version_requirements: !ruby/object:Gem::Requirement
79
121
  requirements:
80
- - - '>='
122
+ - - ">="
81
123
  - !ruby/object:Gem::Version
82
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: thor
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.19.3
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.19.3
83
139
  description: Gadgets for bioinformatics
84
140
  email:
85
141
  - shintaro.katayama@gmail.com
86
142
  executables:
87
143
  - bio-gadget
88
- extensions: []
144
+ - fq1l
145
+ - rbg
146
+ - strt
147
+ extensions:
148
+ - ext/bio_gadget/extconf.rb
89
149
  extra_rdoc_files: []
90
150
  files:
91
- - .gitignore
151
+ - ".gitignore"
152
+ - ".travis.yml"
92
153
  - Gemfile
93
- - Gthorfile
94
154
  - LICENSE
95
155
  - README.org
96
156
  - Rakefile
97
- - bin/bio-gadget
157
+ - bin/console
158
+ - bin/setup
98
159
  - bio-gadget.gemspec
99
- - lib/bio-gadget.rb
100
- - lib/bio-gadget/dedup.rb
101
- - lib/bio-gadget/demlt.rb
102
- - lib/bio-gadget/femrg.rb
103
- - lib/bio-gadget/fqxz.rb
104
- - lib/bio-gadget/peak.rb
105
- - lib/bio-gadget/qvstat.rb
106
- - lib/bio-gadget/rgt2mtx.rb
107
- - lib/bio-gadget/version.rb
108
- - lib/bio-gadget/wig5p.rb
109
- - lib/bio-gadget/wigchr.rb
160
+ - exe/bio-gadget
161
+ - exe/fq1l
162
+ - exe/rbg
163
+ - exe/strt
164
+ - ext/bio_gadget/bio_gadget.c
165
+ - ext/bio_gadget/bio_gadget.h
166
+ - ext/bio_gadget/extconf.rb
167
+ - lib/bio/gadget.rb
168
+ - lib/bio/gadget/fq1l.rb
169
+ - lib/bio/gadget/strt.rb
170
+ - lib/bio/gadget/strt/count.rb
171
+ - lib/bio/gadget/strt/depth.rb
172
+ - lib/bio/gadget/strt/prepare_transcriptome.rb
173
+ - lib/bio/gadgets.rb
174
+ - test/bio/gadget_test.rb
175
+ - test/test_helper.rb
110
176
  homepage: https://github.com/shka/ruby-bio-gadget
111
- licenses: []
177
+ licenses:
178
+ - MIT
112
179
  metadata: {}
113
180
  post_install_message:
114
181
  rdoc_options: []
@@ -116,18 +183,20 @@ require_paths:
116
183
  - lib
117
184
  required_ruby_version: !ruby/object:Gem::Requirement
118
185
  requirements:
119
- - - '>='
186
+ - - ">="
120
187
  - !ruby/object:Gem::Version
121
188
  version: '0'
122
189
  required_rubygems_version: !ruby/object:Gem::Requirement
123
190
  requirements:
124
- - - '>='
191
+ - - ">="
125
192
  - !ruby/object:Gem::Version
126
193
  version: '0'
127
194
  requirements: []
128
195
  rubyforge_project:
129
- rubygems_version: 2.0.0
196
+ rubygems_version: 2.6.8
130
197
  signing_key:
131
198
  specification_version: 4
132
- summary: Gadgets for bioinformatics
133
- test_files: []
199
+ summary: ''
200
+ test_files:
201
+ - test/bio/gadget_test.rb
202
+ - test/test_helper.rb
data/Gthorfile DELETED
@@ -1,2 +0,0 @@
1
- $:.unshift File.expand_path('../lib', __FILE__)
2
- require 'bio-gadget'
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'bio-gadget'
4
-
5
- Bio::Gadget.start
@@ -1,44 +0,0 @@
1
- require 'bio-gadget/version'
2
- require 'bio-gadget/dedup'
3
- require 'bio-gadget/demlt'
4
- require 'bio-gadget/femrg'
5
- require 'bio-gadget/fqxz'
6
- require 'bio-gadget/peak'
7
- require 'bio-gadget/qvstat'
8
- require 'bio-gadget/rgt2mtx'
9
- require 'bio-gadget/wig5p'
10
- require 'bio-gadget/wigchr'
11
-
12
- require 'tempfile'
13
-
14
- module Bio
15
- class Gadget < Thor
16
-
17
- private
18
-
19
- def myopen(file, &block)
20
- # how to write?
21
- f = (/\|/ !~ file && /\.gz$/ =~ file) ? "| gunzip -c #{file}" : file
22
- unless block.nil?
23
- o = open(f); block.call(o); o.close
24
- else
25
- open(f)
26
- end
27
- end
28
-
29
- @@mytemppaths = Array.new
30
-
31
- def mytemppath(basename, tmpdir = Dir::tmpdir)
32
- fp = Tempfile.open(basename, tmpdir)
33
- path = fp.path
34
- @@mytemppaths.push(path)
35
- fp.close!
36
- path
37
- end
38
-
39
- END {
40
- @@mytemppaths.each { |path| File.unlink(path) if File.exist?(path) }
41
- }
42
-
43
- end
44
- end
@@ -1,33 +0,0 @@
1
- require 'bio-faster'
2
- require 'parallel'
3
-
4
- module Bio
5
- class Gadget < Thor
6
- namespace :bio
7
-
8
- desc 'dedup', 'deduplicate fastq (via STDIN)'
9
- def dedup
10
-
11
- p1in, p1out = IO.pipe
12
-
13
- fork {
14
- p1in.close
15
- $stdout.reopen(p1out)
16
- open("| sort -k 1 -r -S #{sprintf('%2d', 100/(Parallel.processor_count+1))}% -T $TMPDIR | cut -f 2- | uniq -f 2", 'w') { |fp|
17
- Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
18
- fp.puts "#{seq}#{qvs}\t#{seqid}\t#{qvs}\t#{seq}"
19
- end
20
- }
21
- }
22
-
23
- p1out.close
24
-
25
- p1in.each_line { |line|
26
- seqid, qvs, seq = line.rstrip.split
27
- puts "@#{seqid}\n#{seq}\n+\n#{qvs}"
28
- }
29
-
30
- end
31
-
32
- end
33
- end
@@ -1,149 +0,0 @@
1
- require 'bio-faster'
2
- require 'levenshtein'
3
- require 'mkfifo'
4
- require 'parallel'
5
-
6
- module Bio
7
- class Gadget < Thor
8
-
9
- namespace :bio
10
-
11
- desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
12
- option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
13
- option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
14
- option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
15
- option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
16
- option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
17
- option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
18
- def demlt(bcfile, fastq=:stdin)
19
-
20
- ofs = options['umi-length']
21
- clen = options['cdna-length']
22
- gtrim = options['g-trimming']
23
- qtrim = options['q-trimming']
24
- mlen = options['min-length']
25
-
26
- wells = Array.new
27
- bcs = Array.new
28
- bclens = Array.new
29
- open(bcfile).each do |line|
30
- cols = line.rstrip.split
31
- wells.push(cols[0])
32
- bcs.push(cols[1])
33
- bclens.push(cols[1].length)
34
- end
35
-
36
- bclens.uniq!
37
- if bclens.size != 1
38
- raise 'Inconsistent barcode sequence lengths'
39
- end
40
- bclen = bclens[0]
41
-
42
- procs = Parallel.processor_count
43
-
44
- fifo1paths = Array.new
45
- procs.times { |i|
46
- fifo1path = mytemppath('fifo1-')
47
- File.mkfifo(fifo1path)
48
- fifo1paths.push(fifo1path)
49
- }
50
- pid = Kernel.fork {
51
- fifo1s = Array.new
52
- fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
53
- total = 0
54
- Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
55
- fifo1 = fifo1s[total % procs]
56
- fifo1.puts(vals.join("\t"))
57
- total += 1
58
- end
59
- fifo1s.each { |fifo1| fifo1.close }
60
- Kernel.exit!
61
- }
62
-
63
- fifo2paths = Array.new
64
- procs.times { |i|
65
- fifo2path = mytemppath('fifo2-')
66
- File.mkfifo(fifo2path)
67
- fifo2paths.push(fifo2path)
68
- pid = Kernel.fork {
69
- open(fifo2path, 'w') { |fifo2|
70
- open(fifo1paths[i], 'r').each { |line|
71
- seqid, seq, qvs = line.rstrip.split(/\t/)
72
- tmpdists = Hash.new
73
- bcs.each_index { |bcidx|
74
- tmpdists[bcidx] = Levenshtein.distance(bcs[bcidx], seq[ofs, bclen])
75
- }
76
- dists = tmpdists.sort { |a, b| a[1] <=> b[1] }
77
- bc = dists[0][1] < 2 && dists[0][1] < dists[1][1] ? dists[0][0] : -1
78
- fifo2.puts("#{bc}\t#{seqid}\t#{seq}\t#{qvs}")
79
- }
80
- }
81
- Kernel.exit!
82
- }
83
- }
84
-
85
- tmpwells = wells + ['other']
86
-
87
- fifo3paths = Array.new
88
- tmpwells.each_index { |i|
89
- fifo3path = mytemppath('fifo3-')
90
- File.mkfifo(fifo3path)
91
- fifo3paths.push(fifo3path)
92
- }
93
- pid = Kernel.fork {
94
- fifo2s = Array.new
95
- fifo2paths.each { |fifo2path| fifo2s.push(open(fifo2path, 'r')) }
96
- fifo2done = Hash.new
97
- fifo3s = Array.new
98
- fifo3paths.each { |fifo3path| fifo3s.push(open(fifo3path, 'w')) }
99
- fifo2s.cycle { |fifo2|
100
- unless fifo2done.key?(fifo2)
101
- line = fifo2.gets
102
- if line.nil?
103
- fifo2done[fifo2] = ''
104
- else
105
- bcs, seqid, seq, qvs = line.rstrip.split(/\t/)
106
- fifo3 = fifo3s[bcs.to_i]
107
- fifo3.puts([seqid, seq, qvs].join("\t"))
108
- end
109
- end
110
- if fifo2done.size == fifo2s.size
111
- break
112
- end
113
- }
114
- fifo2s.each { |fifo2| fifo2.close }
115
- fifo3s.each { |fifo3| fifo3.close }
116
- Kernel.exit!
117
- }
118
-
119
- tmpwells.each_index { |i|
120
- well = tmpwells[i]
121
- outpath = "#{options['output-dir']}/#{well}.fq.xz"
122
- pid = Kernel.fork {
123
- left = ofs+bclen
124
- right = clen > -1 ? -1 : ofs+bclen+clen-1
125
- preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
126
- ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
127
- | sort -k 1 -r | cut -f 2- | uniq -f 2 \\
128
- | ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[2][#{left}..-1], "+", $F[1][#{left}..-1]].join("\\n"))' \\
129
- DEDUPandFORMAT
130
- : <<"FORMAT"
131
- ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
132
- FORMAT
133
-
134
- preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
135
-
136
- if qtrim != '~' || mlen > 0
137
- preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m+1>='+mlen.to_s+'}\''
138
- end
139
-
140
- exec preprocess+"| xz -z -c -e > #{outpath}"
141
- }
142
- }
143
-
144
- Process.waitall
145
-
146
- end
147
-
148
- end
149
- end