bio-gadget 0.4.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 919243e81fb8eecaf1e9af9f85914439f3aa27d5
4
- data.tar.gz: 5c1a8c551f09e46857720493b9495fe499778d27
3
+ metadata.gz: 8aea0f635b0ee3678108495d6fcd104f1ec7293e
4
+ data.tar.gz: 895f8bc130d316f85d376f85647c420b9152409e
5
5
  SHA512:
6
- metadata.gz: 20827e3e37d7360508c52b49d3f4ccfb0b034a647656088c56f9aedd1015794ce766a6069b3b48fd18285461488de9bec764753e141752156c354b3c91e3fe5a
7
- data.tar.gz: aae01b065bf04def5d836cfc18c2b3ada4d8c8210f23d2b26796d32cffbfaebd2c0e20b5bad4a69e2cd5dc68e243c04921cdbe1587c3ae0560881c0238bbee75
6
+ metadata.gz: 26ac831558ebfe3e03a826d675ad492245b23556698661b143b83716a61e93d2071c6fb194067db221c4c87b6c4d8cb1079d96c688053ad3b804814ecd54253c
7
+ data.tar.gz: 92d48f2548473d0ce0f842c97b30a623973e2a083de34719f43df46206ff6a5319ad13558a166052227ac1dcbbffa2dbefd2a2560ff79bb5a321466a5486b8d2
data/.gitignore CHANGED
@@ -1,21 +1,11 @@
1
- *.gem
2
- *.rbc
3
- .bundle
4
- .config
5
- .yardoc
6
- Gemfile.lock
7
- InstalledFiles
8
- _yardoc
9
- coverage
10
- doc/
11
- lib/bundler/man
12
- pkg
13
- rdoc
14
- spec/reports
15
- test/tmp
16
- test/version_tmp
17
- tmp
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
18
10
  *~
19
- .#*
20
- .fuse*
21
-
11
+ .DS_Store
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Shintaro Katayama
1
+ Copyright (c) 2012-2016 Shintaro Katayama
2
2
 
3
3
  MIT License
4
4
 
data/README.org CHANGED
@@ -12,24 +12,3 @@ To check all commands and the usages in this package
12
12
 
13
13
  : bio-gadget help
14
14
 
15
- Currently available commands are
16
-
17
- - dedup :: Deduplicate fastq (via STDIN)
18
- - demlt :: Demultiplex fastq by barcodes
19
- - femrg :: Extract and merge first exons
20
- - fqxz :: (Re)compression of *.fq(.gz|.bz2) files
21
- - pead :: Find peak within each exon from wigs by a mojority vote.
22
- - qvstat :: Statistics of quality values in *.qual file
23
- - rgt2mtx :: Convert cuffdiff read group tracking file into tab-separated matrix
24
- - wig5p :: Convert bam-format alignments into wig-format table
25
- - wigchr :: Extract wiggle track on specified chromosome
26
-
27
- * Contributing
28
-
29
- 1. Fork it
30
- 2. Create your feature branch (`git checkout -b my-new-feature`)
31
- 3. (optional) Install gems (`bundle`)
32
- 4. Add your feature
33
- 5. Commit your changes (`git commit -am 'Added some feature'`)
34
- 6. Push to the branch (`git push origin my-new-feature`)
35
- 7. Create new Pull Request
data/Rakefile CHANGED
@@ -1,2 +1,6 @@
1
- #!/usr/bin/env rake
2
1
  require "bundler/gem_tasks"
2
+ require "rake/extensiontask"
3
+
4
+ Rake::ExtensionTask.new "bio_gadget" do |ext|
5
+ ext.lib_dir = "lib/bio/gadget"
6
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "test"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -1,22 +1,28 @@
1
- require File.expand_path('../lib/bio-gadget/version', __FILE__)
2
-
3
1
  Gem::Specification.new do |gem|
4
- gem.authors = ["Shintaro Katayama"]
5
- gem.email = ["shintaro.katayama@gmail.com"]
6
- gem.description = %q{Gadgets for bioinformatics}
2
+ gem.name = 'bio-gadget'
3
+ gem.version = '0.5.0'
4
+ gem.licenses = ['MIT']
7
5
  gem.summary = gem.description
8
- gem.homepage = "https://github.com/shka/ruby-bio-gadget"
6
+ gem.description = %q{Gadgets for bioinformatics}
7
+ gem.authors = ['Shintaro Katayama']
8
+ gem.email = ['shintaro.katayama@gmail.com']
9
+ gem.homepage = 'https://github.com/shka/ruby-bio-gadget'
10
+ gem.extensions = %w[ext/bio_gadget/extconf.rb]
9
11
 
10
12
  gem.files = `git ls-files`.split($\)
11
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.bindir = 'exe'
14
+ gem.executables = gem.files.grep(%r{^exe/}).map{ |f| File.basename(f) }
12
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
13
- gem.name = "bio-gadget"
14
- gem.require_paths = ["lib"]
15
- gem.version = Bio::Gadget::VERSION
16
+ gem.require_paths = ['lib']
16
17
 
17
- gem.add_dependency 'thor'
18
- gem.add_dependency 'parallel'
19
- gem.add_dependency 'levenshtein-ffi'
20
- gem.add_dependency 'bio-faster'
18
+ gem.add_development_dependency 'bundler', '~> 1.12'
19
+ gem.add_development_dependency 'rake', '~> 10.0'
20
+ gem.add_development_dependency 'rake-compiler'
21
+ gem.add_development_dependency 'minitest', '~> 5.0'
22
+
23
+ gem.add_dependency 'bio'
24
+ gem.add_dependency 'damerau-levenshtein'
21
25
  gem.add_dependency 'mkfifo'
26
+ gem.add_dependency 'parallel'
27
+ gem.add_dependency 'thor' , '~> 0.19.3'
22
28
  end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio/gadget'
4
+
5
+ module Bio
6
+ class Gadgets < Bio::Gadget
7
+
8
+ register(Bio::Gadget::Fq1l, 'fq1l', 'fq1l [COMMAND]', 'Tools for oneline-fastq; fq1l in the alias')
9
+ register(Bio::Gadget::Strt, 'strt', 'strt [COMMAND]', 'Tools for STRT RNAseq; strt is the alias')
10
+
11
+ end
12
+ end
13
+
14
+ Bio::Gadgets.start
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio/gadget'
4
+
5
+ Bio::Gadget::Fq1l.start
data/exe/rbg ADDED
@@ -0,0 +1 @@
1
+ exe/bio-gadget
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio/gadget'
4
+
5
+ Bio::Gadget::Strt.start
@@ -0,0 +1,313 @@
1
+ #include <limits.h>
2
+ #include <regex.h>
3
+ #include <stdio.h>
4
+ #include <string.h>
5
+ #include "bio_gadget.h"
6
+
7
+ VALUE bio_gadget_fq1l_i2i(vSelf, vFirst, vLast)
8
+ VALUE vSelf;
9
+ VALUE vFirst;
10
+ VALUE vLast;
11
+ {
12
+ char line[BUFSIZE];
13
+ char index[BUFSIZE] = "";
14
+ char *acc;
15
+ char *seq;
16
+ char *sep;
17
+ char *qual;
18
+ unsigned long head;
19
+ unsigned long length;
20
+
21
+ head = NUM2INT(vFirst)-1;
22
+ length = NUM2INT(vLast)-head;
23
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
24
+ acc = strtok(line, "\t");
25
+ seq = strtok(NULL, "\t");
26
+ sep = strtok(NULL, "\t");
27
+ qual = strtok(NULL, "\t");
28
+ strcpy(index, seq+head);
29
+ index[length] = 0;
30
+ printf("%s%s\t%s\t%s\t%s", acc, index, seq, sep, qual);
31
+ }
32
+
33
+ return Qnil;
34
+ }
35
+
36
+ VALUE bio_gadget_fq1l_nr_deg(vSelf)
37
+ VALUE vSelf;
38
+ {
39
+ char line[BUFSIZE];
40
+ char *acc;
41
+ char *seq;
42
+ char *sep;
43
+ char *qual;
44
+ char pseq[BUFSIZE] = "";
45
+ unsigned long pseql = ULONG_MAX;
46
+ char regexs[BUFSIZE];
47
+ regex_t regexc;
48
+
49
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
50
+ acc = strtok(line, "\t");
51
+ seq = strtok(NULL, "\t");
52
+ sep = strtok(NULL, "\t");
53
+ qual = strtok(NULL, "\t");
54
+ if (strlen(seq) >= pseql) {
55
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
56
+ strcpy(pseq, seq);
57
+ pseql = strlen(seq);
58
+ } else {
59
+ sprintf(regexs, "^%s", seq);
60
+ regcomp(&regexc, regexs, REG_NOSUB);
61
+ if (regexec(&regexc, pseq, 0, NULL, 0) == REG_NOMATCH) {
62
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
63
+ strcpy(pseq, seq);
64
+ pseql = strlen(seq);
65
+ }
66
+ }
67
+ regfree(&regexc);
68
+ }
69
+
70
+ return Qnil;
71
+ }
72
+
73
+ VALUE bio_gadget_fq1l_nr_std(vSelf)
74
+ VALUE vSelf;
75
+ {
76
+ char line[BUFSIZE];
77
+ char *acc;
78
+ char *seq;
79
+ char *sep;
80
+ char *qual;
81
+ char pseq[BUFSIZE] = "";
82
+
83
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
84
+ acc = strtok(line, "\t");
85
+ seq = strtok(NULL, "\t");
86
+ sep = strtok(NULL, "\t");
87
+ qual = strtok(NULL, "\t");
88
+ if (strcmp(pseq, seq) != 0) {
89
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
90
+ strcpy(pseq, seq);
91
+ }
92
+ }
93
+
94
+ return Qnil;
95
+ }
96
+
97
+ VALUE bio_gadget_fq1l_slice(vSelf, vNth, vSlice)
98
+ VALUE vSelf;
99
+ VALUE vNth;
100
+ VALUE vSlice;
101
+ {
102
+ char line[BUFSIZE];
103
+ unsigned int nth;
104
+ unsigned int slice;
105
+ unsigned long count;
106
+
107
+ nth = NUM2INT(vNth)-1;
108
+ slice = NUM2INT(vSlice);
109
+ count = 0;
110
+
111
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
112
+ if (count % slice == nth)
113
+ fputs(line, stdout);
114
+ count += 1;
115
+ }
116
+
117
+ return Qnil;
118
+ }
119
+
120
+ VALUE bio_gadget_fq1l_t3(vSelf, vCmdIn, vLen, vMinLen, vPathOut)
121
+ VALUE vSelf;
122
+ VALUE vCmdIn;
123
+ VALUE vLen;
124
+ VALUE vMinLen;
125
+ VALUE vPathOut;
126
+ {
127
+ char line[BUFSIZE];
128
+ char *acc;
129
+ char *seq;
130
+ char *sep;
131
+ char *qual;
132
+ int len;
133
+ unsigned long minlen;
134
+ unsigned long seqlen;
135
+ FILE *fp_in;
136
+ FILE *fp_out;
137
+
138
+ fp_in = RTEST(vCmdIn) ? popen(StringValueCStr(vCmdIn), "r") : stdin;
139
+ fp_out = RTEST(vPathOut) ? fopen(StringValueCStr(vPathOut), "w") : stdout;
140
+ len = NUM2INT(vLen);
141
+ minlen = NUM2INT(vMinLen);
142
+
143
+ while(fgets(line, BUFSIZE, fp_in) != NULL) {
144
+ acc = strtok(line, "\t");
145
+ seq = strtok(NULL, "\t");
146
+ seqlen = strlen(seq)-len;
147
+ if (seqlen > 0 && seqlen >= minlen) {
148
+ sep = strtok(NULL, "\t");
149
+ qual = strtok(NULL, "\t");
150
+ seq[seqlen] = 0;
151
+ qual[seqlen] = 0;
152
+ fprintf(fp_out, "%s\t%s\t%s\t%s\n", acc, seq, sep, qual);
153
+ }
154
+ }
155
+ fclose(fp_out);
156
+ fclose(fp_in);
157
+
158
+ return Qnil;
159
+ }
160
+
161
+ VALUE bio_gadget_fq1l_t3q(vSelf, vLQs, vMinLen)
162
+ VALUE vSelf;
163
+ VALUE vLQs;
164
+ VALUE vMinLen;
165
+ {
166
+ char line[BUFSIZE];
167
+ char *acc;
168
+ char *seq;
169
+ char *sep;
170
+ char *qual;
171
+ char *lqs;
172
+ unsigned long minlen;
173
+ unsigned long seqlen;
174
+
175
+ lqs = StringValueCStr(vLQs);
176
+ minlen = NUM2INT(vMinLen);
177
+
178
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
179
+ acc = strtok(line, "\t");
180
+ seq = strtok(NULL, "\t");
181
+ sep = strtok(NULL, "\t");
182
+ qual = strtok(NULL, "\t\n");
183
+ seqlen = strcspn(qual, lqs);
184
+ if (seqlen > 0 && seqlen >= minlen) {
185
+ if (seqlen < strlen(qual)) {
186
+ seq[seqlen] = 0;
187
+ qual[seqlen] = 0;
188
+ }
189
+ printf("%s\t%s\t%s\t%s\n", acc, seq, sep, qual);
190
+ }
191
+ }
192
+
193
+ return Qnil;
194
+ }
195
+
196
+ VALUE bio_gadget_fq1l_t5(vSelf, vPattern, vMinLen)
197
+ VALUE vSelf;
198
+ VALUE vPattern;
199
+ VALUE vMinLen;
200
+ {
201
+ char regexs[BUFSIZE];
202
+ regex_t regexc;
203
+ unsigned long minlen;
204
+ char line[BUFSIZE];
205
+ regmatch_t match[1];
206
+ char *acc;
207
+ char *seq;
208
+ char *sep;
209
+ char *qual;
210
+
211
+ sprintf(regexs, "^%s", StringValueCStr(vPattern));
212
+ regcomp(&regexc, regexs, REG_EXTENDED);
213
+
214
+ minlen = NUM2INT(vMinLen);
215
+
216
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
217
+ acc = strtok(line, "\t");
218
+ seq = strtok(NULL, "\t");
219
+ sep = strtok(NULL, "\t");
220
+ qual = strtok(NULL, "\t");
221
+ if(regexec(&regexc, seq, 1, match, 0) != REG_NOMATCH) {
222
+ seq += match[0].rm_eo;
223
+ qual += match[0].rm_eo;
224
+ if(strlen(seq) >= minlen)
225
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
226
+ } else
227
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
228
+ }
229
+
230
+ regfree(&regexc);
231
+ return Qnil;
232
+ }
233
+
234
+ VALUE bio_gadget_fq1l_to(vSelf, vDraw, vSkip)
235
+ VALUE vSelf;
236
+ VALUE vDraw;
237
+ VALUE vSkip;
238
+ {
239
+ char line[BUFSIZE];
240
+ unsigned int draw;
241
+ unsigned int skip;
242
+ unsigned int sum;
243
+ unsigned long count;
244
+
245
+ draw = NUM2INT(vDraw);
246
+ skip = NUM2INT(vSkip);
247
+ sum = draw + skip;
248
+ count = 0;
249
+
250
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
251
+ if (count % sum < draw)
252
+ fputs(line, stdout);
253
+ count += 1;
254
+ }
255
+
256
+ return Qnil;
257
+ }
258
+
259
+ VALUE bio_gadget_fq1l_u2i(vSelf, vFirst, vLast)
260
+ VALUE vSelf;
261
+ VALUE vFirst;
262
+ VALUE vLast;
263
+ {
264
+ char line[BUFSIZE];
265
+ char index[BUFSIZE] = "";
266
+ char *acc;
267
+ char *acc1;
268
+ char *acc2;
269
+ char *seq;
270
+ char *sep;
271
+ char *qual;
272
+ unsigned long head;
273
+ unsigned long length;
274
+
275
+ head = NUM2INT(vFirst)-1;
276
+ length = NUM2INT(vLast)-head;
277
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
278
+ acc = strtok(line, "\t");
279
+ seq = strtok(NULL, "\t");
280
+ sep = strtok(NULL, "\t");
281
+ qual = strtok(NULL, "\t");
282
+ strcpy(index, seq+head);
283
+ index[length] = 0;
284
+ acc1 = strtok(acc, " ");
285
+ acc2 = strtok(NULL, " ");
286
+ if(acc2 == NULL) {
287
+ printf("%s:%s\t%s\t%s\t%s", acc1, index, seq, sep, qual);
288
+ }
289
+ else {
290
+ printf("%s:%s %s\t%s\t%s\t%s", acc1, index, acc2, seq, sep, qual);
291
+ }
292
+ }
293
+
294
+ return Qnil;
295
+ }
296
+
297
+
298
+ VALUE rb_mBio_Gadget;
299
+
300
+ void
301
+ Init_bio_gadget(void)
302
+ {
303
+ rb_mBio_Gadget = rb_define_module("BioGadget");
304
+ rb_define_module_function(rb_mBio_Gadget, "i2i", bio_gadget_fq1l_i2i, 2);
305
+ rb_define_module_function(rb_mBio_Gadget, "nr_deg", bio_gadget_fq1l_nr_deg, 0);
306
+ rb_define_module_function(rb_mBio_Gadget, "nr_std", bio_gadget_fq1l_nr_std, 0);
307
+ rb_define_module_function(rb_mBio_Gadget, "slice", bio_gadget_fq1l_slice, 2);
308
+ rb_define_module_function(rb_mBio_Gadget, "t3", bio_gadget_fq1l_t3, 4);
309
+ rb_define_module_function(rb_mBio_Gadget, "t3q", bio_gadget_fq1l_t3q, 2);
310
+ rb_define_module_function(rb_mBio_Gadget, "t5", bio_gadget_fq1l_t5, 2);
311
+ rb_define_module_function(rb_mBio_Gadget, "to", bio_gadget_fq1l_to, 2);
312
+ rb_define_module_function(rb_mBio_Gadget, "u2i", bio_gadget_fq1l_u2i, 2);
313
+ }