bio-gadget 0.4.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 919243e81fb8eecaf1e9af9f85914439f3aa27d5
4
- data.tar.gz: 5c1a8c551f09e46857720493b9495fe499778d27
3
+ metadata.gz: 8aea0f635b0ee3678108495d6fcd104f1ec7293e
4
+ data.tar.gz: 895f8bc130d316f85d376f85647c420b9152409e
5
5
  SHA512:
6
- metadata.gz: 20827e3e37d7360508c52b49d3f4ccfb0b034a647656088c56f9aedd1015794ce766a6069b3b48fd18285461488de9bec764753e141752156c354b3c91e3fe5a
7
- data.tar.gz: aae01b065bf04def5d836cfc18c2b3ada4d8c8210f23d2b26796d32cffbfaebd2c0e20b5bad4a69e2cd5dc68e243c04921cdbe1587c3ae0560881c0238bbee75
6
+ metadata.gz: 26ac831558ebfe3e03a826d675ad492245b23556698661b143b83716a61e93d2071c6fb194067db221c4c87b6c4d8cb1079d96c688053ad3b804814ecd54253c
7
+ data.tar.gz: 92d48f2548473d0ce0f842c97b30a623973e2a083de34719f43df46206ff6a5319ad13558a166052227ac1dcbbffa2dbefd2a2560ff79bb5a321466a5486b8d2
data/.gitignore CHANGED
@@ -1,21 +1,11 @@
1
- *.gem
2
- *.rbc
3
- .bundle
4
- .config
5
- .yardoc
6
- Gemfile.lock
7
- InstalledFiles
8
- _yardoc
9
- coverage
10
- doc/
11
- lib/bundler/man
12
- pkg
13
- rdoc
14
- spec/reports
15
- test/tmp
16
- test/version_tmp
17
- tmp
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
18
10
  *~
19
- .#*
20
- .fuse*
21
-
11
+ .DS_Store
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Shintaro Katayama
1
+ Copyright (c) 2012-2016 Shintaro Katayama
2
2
 
3
3
  MIT License
4
4
 
data/README.org CHANGED
@@ -12,24 +12,3 @@ To check all commands and the usages in this package
12
12
 
13
13
  : bio-gadget help
14
14
 
15
- Currently available commands are
16
-
17
- - dedup :: Deduplicate fastq (via STDIN)
18
- - demlt :: Demultiplex fastq by barcodes
19
- - femrg :: Extract and merge first exons
20
- - fqxz :: (Re)compression of *.fq(.gz|.bz2) files
21
- - pead :: Find peak within each exon from wigs by a mojority vote.
22
- - qvstat :: Statistics of quality values in *.qual file
23
- - rgt2mtx :: Convert cuffdiff read group tracking file into tab-separated matrix
24
- - wig5p :: Convert bam-format alignments into wig-format table
25
- - wigchr :: Extract wiggle track on specified chromosome
26
-
27
- * Contributing
28
-
29
- 1. Fork it
30
- 2. Create your feature branch (`git checkout -b my-new-feature`)
31
- 3. (optional) Install gems (`bundle`)
32
- 4. Add your feature
33
- 5. Commit your changes (`git commit -am 'Added some feature'`)
34
- 6. Push to the branch (`git push origin my-new-feature`)
35
- 7. Create new Pull Request
data/Rakefile CHANGED
@@ -1,2 +1,6 @@
1
- #!/usr/bin/env rake
2
1
  require "bundler/gem_tasks"
2
+ require "rake/extensiontask"
3
+
4
+ Rake::ExtensionTask.new "bio_gadget" do |ext|
5
+ ext.lib_dir = "lib/bio/gadget"
6
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "test"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -1,22 +1,28 @@
1
- require File.expand_path('../lib/bio-gadget/version', __FILE__)
2
-
3
1
  Gem::Specification.new do |gem|
4
- gem.authors = ["Shintaro Katayama"]
5
- gem.email = ["shintaro.katayama@gmail.com"]
6
- gem.description = %q{Gadgets for bioinformatics}
2
+ gem.name = 'bio-gadget'
3
+ gem.version = '0.5.0'
4
+ gem.licenses = ['MIT']
7
5
  gem.summary = gem.description
8
- gem.homepage = "https://github.com/shka/ruby-bio-gadget"
6
+ gem.description = %q{Gadgets for bioinformatics}
7
+ gem.authors = ['Shintaro Katayama']
8
+ gem.email = ['shintaro.katayama@gmail.com']
9
+ gem.homepage = 'https://github.com/shka/ruby-bio-gadget'
10
+ gem.extensions = %w[ext/bio_gadget/extconf.rb]
9
11
 
10
12
  gem.files = `git ls-files`.split($\)
11
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.bindir = 'exe'
14
+ gem.executables = gem.files.grep(%r{^exe/}).map{ |f| File.basename(f) }
12
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
13
- gem.name = "bio-gadget"
14
- gem.require_paths = ["lib"]
15
- gem.version = Bio::Gadget::VERSION
16
+ gem.require_paths = ['lib']
16
17
 
17
- gem.add_dependency 'thor'
18
- gem.add_dependency 'parallel'
19
- gem.add_dependency 'levenshtein-ffi'
20
- gem.add_dependency 'bio-faster'
18
+ gem.add_development_dependency 'bundler', '~> 1.12'
19
+ gem.add_development_dependency 'rake', '~> 10.0'
20
+ gem.add_development_dependency 'rake-compiler'
21
+ gem.add_development_dependency 'minitest', '~> 5.0'
22
+
23
+ gem.add_dependency 'bio'
24
+ gem.add_dependency 'damerau-levenshtein'
21
25
  gem.add_dependency 'mkfifo'
26
+ gem.add_dependency 'parallel'
27
+ gem.add_dependency 'thor' , '~> 0.19.3'
22
28
  end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio/gadget'
4
+
5
+ module Bio
6
+ class Gadgets < Bio::Gadget
7
+
8
+ register(Bio::Gadget::Fq1l, 'fq1l', 'fq1l [COMMAND]', 'Tools for oneline-fastq; fq1l in the alias')
9
+ register(Bio::Gadget::Strt, 'strt', 'strt [COMMAND]', 'Tools for STRT RNAseq; strt is the alias')
10
+
11
+ end
12
+ end
13
+
14
+ Bio::Gadgets.start
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio/gadget'
4
+
5
+ Bio::Gadget::Fq1l.start
data/exe/rbg ADDED
@@ -0,0 +1 @@
1
+ exe/bio-gadget
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio/gadget'
4
+
5
+ Bio::Gadget::Strt.start
@@ -0,0 +1,313 @@
1
+ #include <limits.h>
2
+ #include <regex.h>
3
+ #include <stdio.h>
4
+ #include <string.h>
5
+ #include "bio_gadget.h"
6
+
7
+ VALUE bio_gadget_fq1l_i2i(vSelf, vFirst, vLast)
8
+ VALUE vSelf;
9
+ VALUE vFirst;
10
+ VALUE vLast;
11
+ {
12
+ char line[BUFSIZE];
13
+ char index[BUFSIZE] = "";
14
+ char *acc;
15
+ char *seq;
16
+ char *sep;
17
+ char *qual;
18
+ unsigned long head;
19
+ unsigned long length;
20
+
21
+ head = NUM2INT(vFirst)-1;
22
+ length = NUM2INT(vLast)-head;
23
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
24
+ acc = strtok(line, "\t");
25
+ seq = strtok(NULL, "\t");
26
+ sep = strtok(NULL, "\t");
27
+ qual = strtok(NULL, "\t");
28
+ strcpy(index, seq+head);
29
+ index[length] = 0;
30
+ printf("%s%s\t%s\t%s\t%s", acc, index, seq, sep, qual);
31
+ }
32
+
33
+ return Qnil;
34
+ }
35
+
36
+ VALUE bio_gadget_fq1l_nr_deg(vSelf)
37
+ VALUE vSelf;
38
+ {
39
+ char line[BUFSIZE];
40
+ char *acc;
41
+ char *seq;
42
+ char *sep;
43
+ char *qual;
44
+ char pseq[BUFSIZE] = "";
45
+ unsigned long pseql = ULONG_MAX;
46
+ char regexs[BUFSIZE];
47
+ regex_t regexc;
48
+
49
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
50
+ acc = strtok(line, "\t");
51
+ seq = strtok(NULL, "\t");
52
+ sep = strtok(NULL, "\t");
53
+ qual = strtok(NULL, "\t");
54
+ if (strlen(seq) >= pseql) {
55
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
56
+ strcpy(pseq, seq);
57
+ pseql = strlen(seq);
58
+ } else {
59
+ sprintf(regexs, "^%s", seq);
60
+ regcomp(&regexc, regexs, REG_NOSUB);
61
+ if (regexec(&regexc, pseq, 0, NULL, 0) == REG_NOMATCH) {
62
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
63
+ strcpy(pseq, seq);
64
+ pseql = strlen(seq);
65
+ }
66
+ }
67
+ regfree(&regexc);
68
+ }
69
+
70
+ return Qnil;
71
+ }
72
+
73
+ VALUE bio_gadget_fq1l_nr_std(vSelf)
74
+ VALUE vSelf;
75
+ {
76
+ char line[BUFSIZE];
77
+ char *acc;
78
+ char *seq;
79
+ char *sep;
80
+ char *qual;
81
+ char pseq[BUFSIZE] = "";
82
+
83
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
84
+ acc = strtok(line, "\t");
85
+ seq = strtok(NULL, "\t");
86
+ sep = strtok(NULL, "\t");
87
+ qual = strtok(NULL, "\t");
88
+ if (strcmp(pseq, seq) != 0) {
89
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
90
+ strcpy(pseq, seq);
91
+ }
92
+ }
93
+
94
+ return Qnil;
95
+ }
96
+
97
+ VALUE bio_gadget_fq1l_slice(vSelf, vNth, vSlice)
98
+ VALUE vSelf;
99
+ VALUE vNth;
100
+ VALUE vSlice;
101
+ {
102
+ char line[BUFSIZE];
103
+ unsigned int nth;
104
+ unsigned int slice;
105
+ unsigned long count;
106
+
107
+ nth = NUM2INT(vNth)-1;
108
+ slice = NUM2INT(vSlice);
109
+ count = 0;
110
+
111
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
112
+ if (count % slice == nth)
113
+ fputs(line, stdout);
114
+ count += 1;
115
+ }
116
+
117
+ return Qnil;
118
+ }
119
+
120
+ VALUE bio_gadget_fq1l_t3(vSelf, vCmdIn, vLen, vMinLen, vPathOut)
121
+ VALUE vSelf;
122
+ VALUE vCmdIn;
123
+ VALUE vLen;
124
+ VALUE vMinLen;
125
+ VALUE vPathOut;
126
+ {
127
+ char line[BUFSIZE];
128
+ char *acc;
129
+ char *seq;
130
+ char *sep;
131
+ char *qual;
132
+ int len;
133
+ unsigned long minlen;
134
+ unsigned long seqlen;
135
+ FILE *fp_in;
136
+ FILE *fp_out;
137
+
138
+ fp_in = RTEST(vCmdIn) ? popen(StringValueCStr(vCmdIn), "r") : stdin;
139
+ fp_out = RTEST(vPathOut) ? fopen(StringValueCStr(vPathOut), "w") : stdout;
140
+ len = NUM2INT(vLen);
141
+ minlen = NUM2INT(vMinLen);
142
+
143
+ while(fgets(line, BUFSIZE, fp_in) != NULL) {
144
+ acc = strtok(line, "\t");
145
+ seq = strtok(NULL, "\t");
146
+ seqlen = strlen(seq)-len;
147
+ if (seqlen > 0 && seqlen >= minlen) {
148
+ sep = strtok(NULL, "\t");
149
+ qual = strtok(NULL, "\t");
150
+ seq[seqlen] = 0;
151
+ qual[seqlen] = 0;
152
+ fprintf(fp_out, "%s\t%s\t%s\t%s\n", acc, seq, sep, qual);
153
+ }
154
+ }
155
+ fclose(fp_out);
156
+ fclose(fp_in);
157
+
158
+ return Qnil;
159
+ }
160
+
161
+ VALUE bio_gadget_fq1l_t3q(vSelf, vLQs, vMinLen)
162
+ VALUE vSelf;
163
+ VALUE vLQs;
164
+ VALUE vMinLen;
165
+ {
166
+ char line[BUFSIZE];
167
+ char *acc;
168
+ char *seq;
169
+ char *sep;
170
+ char *qual;
171
+ char *lqs;
172
+ unsigned long minlen;
173
+ unsigned long seqlen;
174
+
175
+ lqs = StringValueCStr(vLQs);
176
+ minlen = NUM2INT(vMinLen);
177
+
178
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
179
+ acc = strtok(line, "\t");
180
+ seq = strtok(NULL, "\t");
181
+ sep = strtok(NULL, "\t");
182
+ qual = strtok(NULL, "\t\n");
183
+ seqlen = strcspn(qual, lqs);
184
+ if (seqlen > 0 && seqlen >= minlen) {
185
+ if (seqlen < strlen(qual)) {
186
+ seq[seqlen] = 0;
187
+ qual[seqlen] = 0;
188
+ }
189
+ printf("%s\t%s\t%s\t%s\n", acc, seq, sep, qual);
190
+ }
191
+ }
192
+
193
+ return Qnil;
194
+ }
195
+
196
+ VALUE bio_gadget_fq1l_t5(vSelf, vPattern, vMinLen)
197
+ VALUE vSelf;
198
+ VALUE vPattern;
199
+ VALUE vMinLen;
200
+ {
201
+ char regexs[BUFSIZE];
202
+ regex_t regexc;
203
+ unsigned long minlen;
204
+ char line[BUFSIZE];
205
+ regmatch_t match[1];
206
+ char *acc;
207
+ char *seq;
208
+ char *sep;
209
+ char *qual;
210
+
211
+ sprintf(regexs, "^%s", StringValueCStr(vPattern));
212
+ regcomp(&regexc, regexs, REG_EXTENDED);
213
+
214
+ minlen = NUM2INT(vMinLen);
215
+
216
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
217
+ acc = strtok(line, "\t");
218
+ seq = strtok(NULL, "\t");
219
+ sep = strtok(NULL, "\t");
220
+ qual = strtok(NULL, "\t");
221
+ if(regexec(&regexc, seq, 1, match, 0) != REG_NOMATCH) {
222
+ seq += match[0].rm_eo;
223
+ qual += match[0].rm_eo;
224
+ if(strlen(seq) >= minlen)
225
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
226
+ } else
227
+ printf("%s\t%s\t%s\t%s", acc, seq, sep, qual);
228
+ }
229
+
230
+ regfree(&regexc);
231
+ return Qnil;
232
+ }
233
+
234
+ VALUE bio_gadget_fq1l_to(vSelf, vDraw, vSkip)
235
+ VALUE vSelf;
236
+ VALUE vDraw;
237
+ VALUE vSkip;
238
+ {
239
+ char line[BUFSIZE];
240
+ unsigned int draw;
241
+ unsigned int skip;
242
+ unsigned int sum;
243
+ unsigned long count;
244
+
245
+ draw = NUM2INT(vDraw);
246
+ skip = NUM2INT(vSkip);
247
+ sum = draw + skip;
248
+ count = 0;
249
+
250
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
251
+ if (count % sum < draw)
252
+ fputs(line, stdout);
253
+ count += 1;
254
+ }
255
+
256
+ return Qnil;
257
+ }
258
+
259
+ VALUE bio_gadget_fq1l_u2i(vSelf, vFirst, vLast)
260
+ VALUE vSelf;
261
+ VALUE vFirst;
262
+ VALUE vLast;
263
+ {
264
+ char line[BUFSIZE];
265
+ char index[BUFSIZE] = "";
266
+ char *acc;
267
+ char *acc1;
268
+ char *acc2;
269
+ char *seq;
270
+ char *sep;
271
+ char *qual;
272
+ unsigned long head;
273
+ unsigned long length;
274
+
275
+ head = NUM2INT(vFirst)-1;
276
+ length = NUM2INT(vLast)-head;
277
+ while(fgets(line, BUFSIZE, stdin) != NULL) {
278
+ acc = strtok(line, "\t");
279
+ seq = strtok(NULL, "\t");
280
+ sep = strtok(NULL, "\t");
281
+ qual = strtok(NULL, "\t");
282
+ strcpy(index, seq+head);
283
+ index[length] = 0;
284
+ acc1 = strtok(acc, " ");
285
+ acc2 = strtok(NULL, " ");
286
+ if(acc2 == NULL) {
287
+ printf("%s:%s\t%s\t%s\t%s", acc1, index, seq, sep, qual);
288
+ }
289
+ else {
290
+ printf("%s:%s %s\t%s\t%s\t%s", acc1, index, acc2, seq, sep, qual);
291
+ }
292
+ }
293
+
294
+ return Qnil;
295
+ }
296
+
297
+
298
+ VALUE rb_mBio_Gadget;
299
+
300
+ void
301
+ Init_bio_gadget(void)
302
+ {
303
+ rb_mBio_Gadget = rb_define_module("BioGadget");
304
+ rb_define_module_function(rb_mBio_Gadget, "i2i", bio_gadget_fq1l_i2i, 2);
305
+ rb_define_module_function(rb_mBio_Gadget, "nr_deg", bio_gadget_fq1l_nr_deg, 0);
306
+ rb_define_module_function(rb_mBio_Gadget, "nr_std", bio_gadget_fq1l_nr_std, 0);
307
+ rb_define_module_function(rb_mBio_Gadget, "slice", bio_gadget_fq1l_slice, 2);
308
+ rb_define_module_function(rb_mBio_Gadget, "t3", bio_gadget_fq1l_t3, 4);
309
+ rb_define_module_function(rb_mBio_Gadget, "t3q", bio_gadget_fq1l_t3q, 2);
310
+ rb_define_module_function(rb_mBio_Gadget, "t5", bio_gadget_fq1l_t5, 2);
311
+ rb_define_module_function(rb_mBio_Gadget, "to", bio_gadget_fq1l_to, 2);
312
+ rb_define_module_function(rb_mBio_Gadget, "u2i", bio_gadget_fq1l_u2i, 2);
313
+ }