bio-gadget 0.4.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ require 'test_helper'
2
+
3
+ class Bio::GadgetTest < Minitest::Test
4
+ def test_that_it_has_a_version_number
5
+ refute_nil ::Bio::Gadget::VERSION
6
+ end
7
+
8
+ def test_it_does_something_useful
9
+ assert false
10
+ end
11
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'bio/gadget'
3
+
4
+ require 'minitest/autorun'
metadata CHANGED
@@ -1,114 +1,181 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gadget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.8
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shintaro Katayama
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2013-06-09 00:00:00.000000000 Z
11
+ date: 2017-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: thor
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
15
43
  requirement: !ruby/object:Gem::Requirement
16
44
  requirements:
17
- - - '>='
45
+ - - ">="
18
46
  - !ruby/object:Gem::Version
19
47
  version: '0'
20
- type: :runtime
48
+ type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
- - - '>='
52
+ - - ">="
25
53
  - !ruby/object:Gem::Version
26
54
  version: '0'
27
55
  - !ruby/object:Gem::Dependency
28
- name: parallel
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bio
29
71
  requirement: !ruby/object:Gem::Requirement
30
72
  requirements:
31
- - - '>='
73
+ - - ">="
32
74
  - !ruby/object:Gem::Version
33
75
  version: '0'
34
76
  type: :runtime
35
77
  prerelease: false
36
78
  version_requirements: !ruby/object:Gem::Requirement
37
79
  requirements:
38
- - - '>='
80
+ - - ">="
39
81
  - !ruby/object:Gem::Version
40
82
  version: '0'
41
83
  - !ruby/object:Gem::Dependency
42
- name: levenshtein-ffi
84
+ name: damerau-levenshtein
43
85
  requirement: !ruby/object:Gem::Requirement
44
86
  requirements:
45
- - - '>='
87
+ - - ">="
46
88
  - !ruby/object:Gem::Version
47
89
  version: '0'
48
90
  type: :runtime
49
91
  prerelease: false
50
92
  version_requirements: !ruby/object:Gem::Requirement
51
93
  requirements:
52
- - - '>='
94
+ - - ">="
53
95
  - !ruby/object:Gem::Version
54
96
  version: '0'
55
97
  - !ruby/object:Gem::Dependency
56
- name: bio-faster
98
+ name: mkfifo
57
99
  requirement: !ruby/object:Gem::Requirement
58
100
  requirements:
59
- - - '>='
101
+ - - ">="
60
102
  - !ruby/object:Gem::Version
61
103
  version: '0'
62
104
  type: :runtime
63
105
  prerelease: false
64
106
  version_requirements: !ruby/object:Gem::Requirement
65
107
  requirements:
66
- - - '>='
108
+ - - ">="
67
109
  - !ruby/object:Gem::Version
68
110
  version: '0'
69
111
  - !ruby/object:Gem::Dependency
70
- name: mkfifo
112
+ name: parallel
71
113
  requirement: !ruby/object:Gem::Requirement
72
114
  requirements:
73
- - - '>='
115
+ - - ">="
74
116
  - !ruby/object:Gem::Version
75
117
  version: '0'
76
118
  type: :runtime
77
119
  prerelease: false
78
120
  version_requirements: !ruby/object:Gem::Requirement
79
121
  requirements:
80
- - - '>='
122
+ - - ">="
81
123
  - !ruby/object:Gem::Version
82
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: thor
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.19.3
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.19.3
83
139
  description: Gadgets for bioinformatics
84
140
  email:
85
141
  - shintaro.katayama@gmail.com
86
142
  executables:
87
143
  - bio-gadget
88
- extensions: []
144
+ - fq1l
145
+ - rbg
146
+ - strt
147
+ extensions:
148
+ - ext/bio_gadget/extconf.rb
89
149
  extra_rdoc_files: []
90
150
  files:
91
- - .gitignore
151
+ - ".gitignore"
152
+ - ".travis.yml"
92
153
  - Gemfile
93
- - Gthorfile
94
154
  - LICENSE
95
155
  - README.org
96
156
  - Rakefile
97
- - bin/bio-gadget
157
+ - bin/console
158
+ - bin/setup
98
159
  - bio-gadget.gemspec
99
- - lib/bio-gadget.rb
100
- - lib/bio-gadget/dedup.rb
101
- - lib/bio-gadget/demlt.rb
102
- - lib/bio-gadget/femrg.rb
103
- - lib/bio-gadget/fqxz.rb
104
- - lib/bio-gadget/peak.rb
105
- - lib/bio-gadget/qvstat.rb
106
- - lib/bio-gadget/rgt2mtx.rb
107
- - lib/bio-gadget/version.rb
108
- - lib/bio-gadget/wig5p.rb
109
- - lib/bio-gadget/wigchr.rb
160
+ - exe/bio-gadget
161
+ - exe/fq1l
162
+ - exe/rbg
163
+ - exe/strt
164
+ - ext/bio_gadget/bio_gadget.c
165
+ - ext/bio_gadget/bio_gadget.h
166
+ - ext/bio_gadget/extconf.rb
167
+ - lib/bio/gadget.rb
168
+ - lib/bio/gadget/fq1l.rb
169
+ - lib/bio/gadget/strt.rb
170
+ - lib/bio/gadget/strt/count.rb
171
+ - lib/bio/gadget/strt/depth.rb
172
+ - lib/bio/gadget/strt/prepare_transcriptome.rb
173
+ - lib/bio/gadgets.rb
174
+ - test/bio/gadget_test.rb
175
+ - test/test_helper.rb
110
176
  homepage: https://github.com/shka/ruby-bio-gadget
111
- licenses: []
177
+ licenses:
178
+ - MIT
112
179
  metadata: {}
113
180
  post_install_message:
114
181
  rdoc_options: []
@@ -116,18 +183,20 @@ require_paths:
116
183
  - lib
117
184
  required_ruby_version: !ruby/object:Gem::Requirement
118
185
  requirements:
119
- - - '>='
186
+ - - ">="
120
187
  - !ruby/object:Gem::Version
121
188
  version: '0'
122
189
  required_rubygems_version: !ruby/object:Gem::Requirement
123
190
  requirements:
124
- - - '>='
191
+ - - ">="
125
192
  - !ruby/object:Gem::Version
126
193
  version: '0'
127
194
  requirements: []
128
195
  rubyforge_project:
129
- rubygems_version: 2.0.0
196
+ rubygems_version: 2.6.8
130
197
  signing_key:
131
198
  specification_version: 4
132
- summary: Gadgets for bioinformatics
133
- test_files: []
199
+ summary: ''
200
+ test_files:
201
+ - test/bio/gadget_test.rb
202
+ - test/test_helper.rb
data/Gthorfile DELETED
@@ -1,2 +0,0 @@
1
- $:.unshift File.expand_path('../lib', __FILE__)
2
- require 'bio-gadget'
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'bio-gadget'
4
-
5
- Bio::Gadget.start
@@ -1,44 +0,0 @@
1
- require 'bio-gadget/version'
2
- require 'bio-gadget/dedup'
3
- require 'bio-gadget/demlt'
4
- require 'bio-gadget/femrg'
5
- require 'bio-gadget/fqxz'
6
- require 'bio-gadget/peak'
7
- require 'bio-gadget/qvstat'
8
- require 'bio-gadget/rgt2mtx'
9
- require 'bio-gadget/wig5p'
10
- require 'bio-gadget/wigchr'
11
-
12
- require 'tempfile'
13
-
14
- module Bio
15
- class Gadget < Thor
16
-
17
- private
18
-
19
- def myopen(file, &block)
20
- # how to write?
21
- f = (/\|/ !~ file && /\.gz$/ =~ file) ? "| gunzip -c #{file}" : file
22
- unless block.nil?
23
- o = open(f); block.call(o); o.close
24
- else
25
- open(f)
26
- end
27
- end
28
-
29
- @@mytemppaths = Array.new
30
-
31
- def mytemppath(basename, tmpdir = Dir::tmpdir)
32
- fp = Tempfile.open(basename, tmpdir)
33
- path = fp.path
34
- @@mytemppaths.push(path)
35
- fp.close!
36
- path
37
- end
38
-
39
- END {
40
- @@mytemppaths.each { |path| File.unlink(path) if File.exist?(path) }
41
- }
42
-
43
- end
44
- end
@@ -1,33 +0,0 @@
1
- require 'bio-faster'
2
- require 'parallel'
3
-
4
- module Bio
5
- class Gadget < Thor
6
- namespace :bio
7
-
8
- desc 'dedup', 'deduplicate fastq (via STDIN)'
9
- def dedup
10
-
11
- p1in, p1out = IO.pipe
12
-
13
- fork {
14
- p1in.close
15
- $stdout.reopen(p1out)
16
- open("| sort -k 1 -r -S #{sprintf('%2d', 100/(Parallel.processor_count+1))}% -T $TMPDIR | cut -f 2- | uniq -f 2", 'w') { |fp|
17
- Bio::Faster.new(:stdin).each_record(:quality => :raw) do |seqid, seq, qvs|
18
- fp.puts "#{seq}#{qvs}\t#{seqid}\t#{qvs}\t#{seq}"
19
- end
20
- }
21
- }
22
-
23
- p1out.close
24
-
25
- p1in.each_line { |line|
26
- seqid, qvs, seq = line.rstrip.split
27
- puts "@#{seqid}\n#{seq}\n+\n#{qvs}"
28
- }
29
-
30
- end
31
-
32
- end
33
- end
@@ -1,149 +0,0 @@
1
- require 'bio-faster'
2
- require 'levenshtein'
3
- require 'mkfifo'
4
- require 'parallel'
5
-
6
- module Bio
7
- class Gadget < Thor
8
-
9
- namespace :bio
10
-
11
- desc 'demlt BARCODE [FASTQ]', "Demultiplex fastq from STDIN by barcodes.\n\n"
12
- option 'output-dir', :aliases => '-o', :type => :string, :default => '.'
13
- option 'umi-length', :aliases => '-u', :type => :numeric, :default => 4, :desc => '0 is no umi, means no PCR-amplicon reduction.'
14
- option 'cdna-length', :aliases => '-c', :type => :numeric, :default => 37, :desc => 'Trimming length before PCA-amplicon reduction. -1 is no trimming by length.'
15
- option 'g-trimming', :aliases => '-g', :type => :boolean, :default => false, :desc => "Trimming of 5'-end poly-G. Length of the trimmed Gs attached after the read name."
16
- option 'q-trimming', :aliases => '-q', :type => :string, :default => '~', :desc => "Quality threshold - nucleotides with lower quality will be trimmed, from the end of the sequence. '~' is no trimming by quality, because this is the maximum quality base character."
17
- option 'min-length', :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Length threshold - sequences shorter than this after trimming will be filtered out. 0 is no filtering.'
18
- def demlt(bcfile, fastq=:stdin)
19
-
20
- ofs = options['umi-length']
21
- clen = options['cdna-length']
22
- gtrim = options['g-trimming']
23
- qtrim = options['q-trimming']
24
- mlen = options['min-length']
25
-
26
- wells = Array.new
27
- bcs = Array.new
28
- bclens = Array.new
29
- open(bcfile).each do |line|
30
- cols = line.rstrip.split
31
- wells.push(cols[0])
32
- bcs.push(cols[1])
33
- bclens.push(cols[1].length)
34
- end
35
-
36
- bclens.uniq!
37
- if bclens.size != 1
38
- raise 'Inconsistent barcode sequence lengths'
39
- end
40
- bclen = bclens[0]
41
-
42
- procs = Parallel.processor_count
43
-
44
- fifo1paths = Array.new
45
- procs.times { |i|
46
- fifo1path = mytemppath('fifo1-')
47
- File.mkfifo(fifo1path)
48
- fifo1paths.push(fifo1path)
49
- }
50
- pid = Kernel.fork {
51
- fifo1s = Array.new
52
- fifo1paths.each { |fifo1path| fifo1s.push(open(fifo1path, 'w')) }
53
- total = 0
54
- Bio::Faster.new(fastq).each_record(:quality => :raw) do |vals|
55
- fifo1 = fifo1s[total % procs]
56
- fifo1.puts(vals.join("\t"))
57
- total += 1
58
- end
59
- fifo1s.each { |fifo1| fifo1.close }
60
- Kernel.exit!
61
- }
62
-
63
- fifo2paths = Array.new
64
- procs.times { |i|
65
- fifo2path = mytemppath('fifo2-')
66
- File.mkfifo(fifo2path)
67
- fifo2paths.push(fifo2path)
68
- pid = Kernel.fork {
69
- open(fifo2path, 'w') { |fifo2|
70
- open(fifo1paths[i], 'r').each { |line|
71
- seqid, seq, qvs = line.rstrip.split(/\t/)
72
- tmpdists = Hash.new
73
- bcs.each_index { |bcidx|
74
- tmpdists[bcidx] = Levenshtein.distance(bcs[bcidx], seq[ofs, bclen])
75
- }
76
- dists = tmpdists.sort { |a, b| a[1] <=> b[1] }
77
- bc = dists[0][1] < 2 && dists[0][1] < dists[1][1] ? dists[0][0] : -1
78
- fifo2.puts("#{bc}\t#{seqid}\t#{seq}\t#{qvs}")
79
- }
80
- }
81
- Kernel.exit!
82
- }
83
- }
84
-
85
- tmpwells = wells + ['other']
86
-
87
- fifo3paths = Array.new
88
- tmpwells.each_index { |i|
89
- fifo3path = mytemppath('fifo3-')
90
- File.mkfifo(fifo3path)
91
- fifo3paths.push(fifo3path)
92
- }
93
- pid = Kernel.fork {
94
- fifo2s = Array.new
95
- fifo2paths.each { |fifo2path| fifo2s.push(open(fifo2path, 'r')) }
96
- fifo2done = Hash.new
97
- fifo3s = Array.new
98
- fifo3paths.each { |fifo3path| fifo3s.push(open(fifo3path, 'w')) }
99
- fifo2s.cycle { |fifo2|
100
- unless fifo2done.key?(fifo2)
101
- line = fifo2.gets
102
- if line.nil?
103
- fifo2done[fifo2] = ''
104
- else
105
- bcs, seqid, seq, qvs = line.rstrip.split(/\t/)
106
- fifo3 = fifo3s[bcs.to_i]
107
- fifo3.puts([seqid, seq, qvs].join("\t"))
108
- end
109
- end
110
- if fifo2done.size == fifo2s.size
111
- break
112
- end
113
- }
114
- fifo2s.each { |fifo2| fifo2.close }
115
- fifo3s.each { |fifo3| fifo3.close }
116
- Kernel.exit!
117
- }
118
-
119
- tmpwells.each_index { |i|
120
- well = tmpwells[i]
121
- outpath = "#{options['output-dir']}/#{well}.fq.xz"
122
- pid = Kernel.fork {
123
- left = ofs+bclen
124
- right = clen > -1 ? -1 : ofs+bclen+clen-1
125
- preprocess = ofs > 0 ? <<"DEDUPandFORMAT"
126
- ruby -F'\\t' -anle 'f1=$F[1][0..#{right}];f2=$F[2][0..#{right}];puts([f1+f2, $F[0], f2, f1].join("\\t"))' #{fifo3paths[i]} \\
127
- | sort -k 1 -r | cut -f 2- | uniq -f 2 \\
128
- | ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[2][#{left}..-1], "+", $F[1][#{left}..-1]].join("\\n"))' \\
129
- DEDUPandFORMAT
130
- : <<"FORMAT"
131
- ruby -F'\\t' -anle 'puts(["@"+$F[0], $F[1][#{left}..#{right}], "+", $F[2][#{left}..#{right}].rstrip].join("\\n"))' #{fifo3paths[i]} \\
132
- FORMAT
133
-
134
- preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|s=v[1].gsub(/^G+/,"");l=v[1].length-s.length;puts("@#{v[0]}|-G#{l}\\n#{s}\\n+\\n#{v[2][l,s.length]}") if s.length>0}\'' if gtrim
135
-
136
- if qtrim != '~' || mlen > 0
137
- preprocess += '| ruby -e \'require "bio-faster";Bio::Faster.new(:stdin).each_record(:quality=>:raw){|v|m=v[2].length-1;0.upto(m){|i|if v[2][i]<"'+qtrim+'" then m=i-1;break;end};puts("@#{v[0]}\n#{v[1][0..m]}\n+\n#{v[2][0..m]}") if m+1>='+mlen.to_s+'}\''
138
- end
139
-
140
- exec preprocess+"| xz -z -c -e > #{outpath}"
141
- }
142
- }
143
-
144
- Process.waitall
145
-
146
- end
147
-
148
- end
149
- end