transrate 0.3.1 → 1.0.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: be3eaa6170b268789a0ff6d6cb68448a7af6e037
4
- data.tar.gz: 5e857b0392f9791e7111069ee1ed61ee19d3853c
3
+ metadata.gz: 329f398d7dc832c5e56dfe90b6483fb00321bfce
4
+ data.tar.gz: 52acb0232a8f42cb3604c2981899f0d931a1195b
5
5
  SHA512:
6
- metadata.gz: f87811816aa975bfc85cfd025a90029c310bfdad19587cb5ea6a4909a78151f1904db0d69e5593b56f0f6b0044f5072e1e966580d7b6a45f78522ede8eda52cd
7
- data.tar.gz: e9dba5e8a9910716250c3bd82bea190264d4ac32caf67acc44009b080e5eab553cf2a7317d1153d851f797c0eb45d07e1531896743b6bb1b3df902608d696d15
6
+ metadata.gz: 35f079c3474e02aa98a896cf6bbe4cd3d87ef7b7e95c91d13abea585eeb54b598ffe9539f7fc7b8c87a40ec78df92a8e1ba9fd23ac46e4da9c8f616919ab004e
7
+ data.tar.gz: fca938cc86122088ecb0308273d8552e1ebb26dd47e52e8154015aa27e68814016cedda45553f4bf76fcc9b164bcaaaf1cad1787c4f8296f316e6833b235e6bc
data/.gitignore CHANGED
@@ -18,6 +18,7 @@ tmp
18
18
  .#*
19
19
  \#*
20
20
  *so
21
+ dryrun
21
22
 
22
23
  # YARD artifacts
23
24
  .yardoc
@@ -39,3 +40,9 @@ dryrun
39
40
  *.bam
40
41
  *.csv
41
42
  *.coverage
43
+
44
+ # c extension build artefacts
45
+ Makefile
46
+ transrate.bundle
47
+ transrate.o
48
+ .RUBYARCHDIR.time
data/README.md CHANGED
@@ -4,11 +4,12 @@
4
4
 
5
5
  ## Development status
6
6
 
7
- [![Gem Version](https://badge.fury.io/rb/transrate.png)][gem]
8
- [![Build Status](https://secure.travis-ci.org/Blahah/transrate.png?branch=master)][travis]
9
- [![Dependency Status](https://gemnasium.com/Blahah/transrate.png?travis)][gemnasium]
10
- [![Code Climate](https://codeclimate.com/github/Blahah/transrate.png)][codeclimate]
11
- [![Coverage Status](https://coveralls.io/repos/Blahah/transrate/badge.png?branch=master)][coveralls]
7
+ [![Gem Version](http://img.shields.io/gem/v/transrate.svg)][gem]
8
+ ![Downloads](http://img.shields.io/gem/dtv/transrate.svg)
9
+ [![Build Status](http://img.shields.io/travis/Blahah/transrate/master.svg)][travis]
10
+ [![Dependency Status](http://img.shields.io/gemnasium/Blahah/transrate.svg)][gemnasium]
11
+ [![Code Climate](http://img.shields.io/codeclimate/github/Blahah/transrate.svg)][codeclimate]
12
+ [![Coverage Status](http://img.shields.io/coveralls/Blahah/transrate.svg)][coveralls]
12
13
 
13
14
  [gem]: https://badge.fury.io/rb/transrate
14
15
  [travis]: https://travis-ci.org/Blahah/transrate
@@ -20,7 +21,7 @@ This software is being actively developed. Please be aware that there may be bug
20
21
 
21
22
  ## Citation
22
23
 
23
- Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/3687/Blahah/transrate.png)](http://dx.doi.org/10.5281/zenodo.11037).
24
+ Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/3687/Blahah/transrate.png)](http://dx.doi.org/10.5281/zenodo.11039).
24
25
 
25
26
  ## Documentation
26
27
 
@@ -46,12 +46,6 @@ opts = Trollop::options do
46
46
  :type => String
47
47
  opt :right, "right reads file in FASTQ format",
48
48
  :type => String
49
- opt :insertsize, "mean insert size",
50
- :default => 200,
51
- :type => Integer
52
- opt :insertsd, "insert size standard deviation",
53
- :default => 50,
54
- :type => Integer
55
49
  opt :threads, "number of threads to use",
56
50
  :default => 8,
57
51
  :type => Integer
@@ -98,6 +92,21 @@ if opts.reference && !File.exist?(opts.reference)
98
92
  raise IOError.new "Reference fasta file does not exist: #{opts.reference}"
99
93
  end
100
94
 
95
+ if opts.left and opts.right
96
+ if opts.left.split(",").length != opts.right.split(",").length
97
+ msg = "Please provide the same number of left reads as right reads"
98
+ raise ArgumentError.new(msg)
99
+ end
100
+ opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
101
+ if !File.exist?(left)
102
+ raise IOError.new "Left read fastq file does not exist: #{left}"
103
+ end
104
+ if !File.exist?(right)
105
+ raise IOError.new "Right read fastq file does not exist: #{right}"
106
+ end
107
+ end
108
+ end
109
+
101
110
  if opts.profile
102
111
  logger.info "Starting profiler"
103
112
  RubyProf.start
@@ -201,13 +210,16 @@ opts.assembly.split(',').each do |assembly|
201
210
  logger.info "Comparative metrics done in #{Time.now - t0} seconds"
202
211
 
203
212
  logger.info "-" * report_width
213
+ else
214
+ logger.info "No reference provided, skipping comparative diagnostics"
215
+ end
216
+
217
+ if (opts.left && opts.right)
204
218
  score = transrater.assembly_score
205
219
  unless score.nil?
206
- logger.info "OVERALL SCORE: #{score.to_f.round(2) * 100}%"
220
+ logger.info "TRANSRATE ASSEMBLY SCORE: #{score.round(2)}"
207
221
  logger.info "-" * report_width
208
222
  end
209
- else
210
- logger.info "No reference provided, skipping comparative diagnostics"
211
223
  end
212
224
 
213
225
  # write contig metrics to file for each contig
@@ -25,21 +25,56 @@ blastplus:
25
25
  64bit:
26
26
  macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
27
27
  linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
28
- bowtie2:
28
+ snap:
29
29
  binaries:
30
- - bowtie2
31
- - bowtie2-align-l
32
- - bowtie2-align-s
33
- - bowtie2-build
34
- - bowtie2-build-l
35
- - bowtie2-build-s
36
- - bowtie2-inspect
37
- - bowtie2-inspect-l
38
- - bowtie2-inspect-s
30
+ - snap
39
31
  version:
40
- number: '2.2.3'
41
- command: 'bowtie2 --version'
32
+ number: '1.0dev.50'
33
+ command: 'snap'
42
34
  url:
43
35
  64bit:
44
- linux: http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.3/bowtie2-2.2.3-linux-x86_64.zip
45
- macosx: http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.3/bowtie2-2.2.3-macos-x86_64.zip
36
+ linux: https://github.com/cboursnell/snap/raw/dev/bin/linux/snap.tar.gz
37
+ macosx: https://github.com/HibberdLab/snap/raw/dev/bin/macosx/snap.tar.gz
38
+ samtools:
39
+ binaries:
40
+ - samtools
41
+ - bcftools
42
+ version:
43
+ number: '0.1.19'
44
+ command: 'samtools'
45
+ url:
46
+ 64bit:
47
+ linux: https://github.com/cboursnell/samtools/raw/master/build/linux64.tar.gz
48
+ macosx: https://github.com/cboursnell/samtools/raw/master/build/osx64.tar.gz
49
+ bam-read:
50
+ binaries:
51
+ - bam-read
52
+ version:
53
+ number: '0.3\.1'
54
+ command: 'bam-read'
55
+ url:
56
+ 64bit:
57
+ linux: https://github.com/cboursnell/transrate-bam-read/raw/master/bin/linux/bam-read
58
+ macosx: https://github.com/Blahah/transrate-bam-read/raw/master/bin/macosx/bam-read
59
+ unpack: false
60
+ bam-split:
61
+ binaries:
62
+ - bam-split
63
+ version:
64
+ number: '0.1'
65
+ command: 'bam-split'
66
+ url:
67
+ 64bit:
68
+ linux: https://github.com/Blahah/transrate-bam-read/raw/master/bin/linux/bam-split
69
+ macosx: https://github.com/Blahah/transrate-bam-read/raw/master/bin/macosx/bam-split
70
+ unpack: false
71
+ express:
72
+ binaries:
73
+ - express
74
+ version:
75
+ number: '1.5.1'
76
+ command: 'express --version'
77
+ url:
78
+ 64bit:
79
+ linux: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-linux_x86_64.tgz
80
+ macosx: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-macosx_x86_64.tgz
@@ -1,223 +1,257 @@
1
1
  #include "ruby.h"
2
2
  #include <stdlib.h>
3
+ #include <math.h>
3
4
 
4
5
  // Defining a space for information and references about the module to be
5
6
  // stored internally
6
7
  VALUE Contig = Qnil;
8
+ VALUE ReadMetrics = Qnil;
7
9
  VALUE Transrate = Qnil;
8
10
 
9
11
  // Prototype for the initialization method - Ruby calls this, not you
10
12
  void Init_transrate();
11
13
 
12
14
  // methods are prefixed by 'method_' here
13
- //VALUE TestInit(VALUE, VALUE, VALUE, VALUE, VALUE);
15
+ // contig
14
16
  VALUE method_composition(VALUE, VALUE);
15
17
  VALUE method_base_count(VALUE,VALUE);
16
18
  VALUE method_dibase_count(VALUE,VALUE);
17
19
  VALUE method_kmer_count(VALUE,VALUE,VALUE);
18
20
  VALUE method_longest_orf(VALUE, VALUE);
21
+ // read_metrics
19
22
 
20
23
  int * base_counts;
21
24
  int * dibase_counts;
22
25
 
23
26
  // The initialization method for this module
24
27
  void Init_transrate() {
25
- Transrate = rb_define_module("Transrate");
26
- // VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
27
- Contig = rb_define_class_under(Transrate, "Contig", rb_cObject);
28
- // rb_define_method(Contig, "initialize", TestInit, 2);
29
- rb_define_method(Contig, "composition", method_composition, 1);
30
- rb_define_method(Contig, "base_count", method_base_count, 1);
31
- rb_define_method(Contig, "dibase_count", method_dibase_count, 1);
32
- rb_define_method(Contig, "kmer_count", method_kmer_count, 2);
33
- rb_define_method(Contig, "longest_orf", method_longest_orf, 1);
28
+ Transrate = rb_define_module("Transrate");
29
+ Contig = rb_define_class_under(Transrate, "Contig", rb_cObject);
30
+ ReadMetrics = rb_define_class_under(Transrate, "ReadMetrics", rb_cObject);
31
+ // contig
32
+ rb_define_method(Contig, "composition", method_composition, 1);
33
+ rb_define_method(Contig, "base_count", method_base_count, 1);
34
+ rb_define_method(Contig, "dibase_count", method_dibase_count, 1);
35
+ rb_define_method(Contig, "kmer_count", method_kmer_count, 2);
36
+ rb_define_method(Contig, "longest_orf", method_longest_orf, 1);
37
+ // ReadMetrics
34
38
  }
35
39
 
36
40
  VALUE method_composition(VALUE self, VALUE _seq) {
37
- int i,len, idx;
38
- char * seq;
39
- char base;
40
- char prevbase;
41
- seq = StringValueCStr(_seq);
42
- len = RSTRING_LEN(_seq);
43
- base_counts = malloc(5 * sizeof(int));
44
- dibase_counts = malloc(25 * sizeof(int));
41
+ int i, len, idx;
42
+ char * seq;
43
+ char base;
44
+ char prevbase;
45
+ seq = StringValueCStr(_seq);
46
+ len = RSTRING_LEN(_seq);
47
+ base_counts = malloc(5 * sizeof(int));
48
+ dibase_counts = malloc(25 * sizeof(int));
45
49
 
46
- for (i=0; i < 5; i++) {
47
- base_counts[i]=0;
50
+ for (i=0; i < 5; i++) {
51
+ base_counts[i]=0;
52
+ }
53
+ for (i=0; i < 25; i++) {
54
+ dibase_counts[i]=0;
55
+ }
56
+ for (i=0; i < len; i++) {
57
+ base = seq[i];
58
+ switch (base) {
59
+ case 'A': {
60
+ idx=0;
61
+ break;
62
+ }
63
+ case 'C': {
64
+ idx=1;
65
+ break;
66
+ }
67
+ case 'G': {
68
+ idx=2;
69
+ break;
70
+ }
71
+ case 'T': {
72
+ idx=3;
73
+ break;
74
+ }
75
+ default: {
76
+ idx=4;
77
+ break;
78
+ }
48
79
  }
49
- for (i=0; i < 25; i++) {
50
- dibase_counts[i]=0;
51
- }
52
- for (i=0; i < len; i++) {
53
- base = seq[i];
54
- switch (base) {
55
- case 'A': {
56
- idx=0;
57
- break;
58
- }
59
- case 'C': {
60
- idx=1;
61
- break;
62
- }
63
- case 'G': {
64
- idx=2;
65
- break;
66
- }
67
- case 'T': {
68
- idx=3;
69
- break;
70
- }
71
- default: {
72
- idx=4;
73
- break;
74
- }
75
- }
76
- base_counts[idx]++;
80
+ base_counts[idx]++;
77
81
 
78
- if (i > 0) {
79
- prevbase = seq[i-1];
80
- switch (prevbase) {
81
- case 'A': {
82
- idx=idx;
83
- break;
84
- }
85
- case 'C': {
86
- idx=idx+5;
87
- break;
88
- }
89
- case 'G': {
90
- idx=idx+10;
91
- break;
92
- }
93
- case 'T': {
94
- idx=idx+15;
95
- break;
96
- }
97
- default: {
98
- idx=idx+20;
99
- break;
100
- }
101
- }
102
- dibase_counts[idx]++;
82
+ if (i > 0) {
83
+ prevbase = seq[i-1];
84
+ switch (prevbase) {
85
+ case 'A': {
86
+ idx=idx;
87
+ break;
88
+ }
89
+ case 'C': {
90
+ idx=idx+5;
91
+ break;
92
+ }
93
+ case 'G': {
94
+ idx=idx+10;
95
+ break;
96
+ }
97
+ case 'T': {
98
+ idx=idx+15;
99
+ break;
103
100
  }
101
+ default: {
102
+ idx=idx+20;
103
+ break;
104
+ }
105
+ }
106
+ dibase_counts[idx]++;
104
107
  }
105
- return INT2NUM(0);
108
+ }
109
+ return INT2NUM(0);
106
110
  }
107
111
 
108
112
  VALUE method_dibase_count(VALUE self, VALUE idx) {
109
- return INT2NUM(dibase_counts[NUM2INT(idx)]);
113
+ return INT2NUM(dibase_counts[NUM2INT(idx)]);
110
114
  }
111
115
 
112
116
  VALUE method_base_count(VALUE self, VALUE idx) {
113
- return INT2NUM(base_counts[NUM2INT(idx)]);
117
+ return INT2NUM(base_counts[NUM2INT(idx)]);
114
118
  }
115
119
 
116
120
  VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
117
- int n, i, start, k, len, h, size = 0;
118
- char * c_str;
119
- char base;
120
- len = RSTRING_LEN(_s);
121
- c_str = StringValueCStr(_s);
122
- k = NUM2INT(_k);
123
- size = 1;
124
- for(h=0;h<k;h++) {
125
- size *= 4;
126
- }
127
- short set[size];
128
- for(start=0;start<size;start++) {
129
- set[start]=0;
130
- }
131
- for(start=0; start<len-k+1; start++) {
132
- i = 0;
133
- h = 0;
134
- n = 0;
135
- for(i = start; i < start+k; i++) {
136
- base = c_str[i];
137
- switch (base) {
138
- case 'A': {
139
- h = h << 2;
140
- h += 0;
141
- break;
142
- }
143
- case 'C': {
144
- h = h << 2;
145
- h += 1;
146
- break;
147
- }
148
- case 'G': {
149
- h = h << 2;
150
- h += 2;
151
- break;
152
- }
153
- case 'T': {
154
- h = h << 2;
155
- h += 3;
156
- break;
157
- }
158
- default: {
159
- n++;
160
- break;
161
- }
162
- }
121
+ int n, i, start, k, len, h, size = 0;
122
+ char * c_str;
123
+ char base;
124
+ len = RSTRING_LEN(_s);
125
+ c_str = StringValueCStr(_s);
126
+ k = NUM2INT(_k);
127
+ size = 1;
128
+ for(h=0;h<k;h++) {
129
+ size *= 4;
130
+ }
131
+ short set[size];
132
+ for(start=0;start<size;start++) {
133
+ set[start]=0;
134
+ }
135
+ for(start=0; start<len-k+1; start++) {
136
+ i = 0;
137
+ h = 0;
138
+ n = 0;
139
+ for(i = start; i < start+k; i++) {
140
+ base = c_str[i];
141
+ switch (base) {
142
+ case 'A': {
143
+ h = h << 2;
144
+ h += 0;
145
+ break;
163
146
  }
164
- if (n==0) {
165
- set[h] += 1;
147
+ case 'C': {
148
+ h = h << 2;
149
+ h += 1;
150
+ break;
166
151
  }
167
- }
168
- i = 0; // count how many in array are set //
169
- for(start = 0; start < size; start++) {
170
- if (set[start]>0) {
171
- i++;
152
+ case 'G': {
153
+ h = h << 2;
154
+ h += 2;
155
+ break;
156
+ }
157
+ case 'T': {
158
+ h = h << 2;
159
+ h += 3;
160
+ break;
172
161
  }
162
+ default: {
163
+ n++;
164
+ break;
165
+ }
166
+ }
167
+ }
168
+ if (n==0) {
169
+ set[h] += 1;
170
+ }
171
+ }
172
+ i = 0; // count how many in array are set //
173
+ for(start = 0; start < size; start++) {
174
+ if (set[start]>0) {
175
+ i++;
173
176
  }
174
- return INT2NUM(i);
177
+ }
178
+ return INT2NUM(i);
175
179
  }
176
180
 
177
181
  // takes in a string and calculates the longest open reading frame
178
182
  // in any of the 6 frames
179
183
  // an open reading frame is defined as the number of bases between
180
- // either the start of the sequence or a stop codon and either the
184
+ // either the start of the sequence or a start codon and either the
181
185
  // end of the sequence or a stop codon
182
- VALUE method_longest_orf(VALUE self, VALUE _s) {
183
- int i,sl,longest=0;
184
- int len[6];
185
- char * c_str;
186
186
 
187
- sl = RSTRING_LEN(_s);
188
- c_str = StringValueCStr(_s);
189
- for (i=0;i<6;i++) {
190
- len[i]=0;
191
- }
192
- for (i=0;i<sl-2;i++) {
193
- if (c_str[i]=='T' &&
194
- ((c_str[i+1]=='A' && c_str[i+2]=='G') ||
195
- (c_str[i+1]=='A' && c_str[i+2]=='A') ||
196
- (c_str[i+1]=='G' && c_str[i+2]=='A'))) {
197
- if (len[i%3] > longest) {
198
- longest = len[i%3];
199
- }
200
- len[i%3]=0;
201
- } else {
202
- len[i%3]++;
187
+ VALUE method_longest_orf(VALUE self, VALUE _str) {
188
+ int i,sl,longest=0;
189
+ int len[3];
190
+ char * str;
191
+ sl = RSTRING_LEN(_str);
192
+ str = StringValueCStr(_str);
193
+ for (i=0;i<3;i++) {
194
+ len[i]=0;
195
+ }
196
+ for(i=0;i<sl-2;i++) {
197
+ if (str[i]=='A' && str[i+1]=='T' && str[i+2]=='G') { //Methionine
198
+ if (len[i%3]>=0) {
199
+ len[i%3]++;
200
+ } else {
201
+ len[i%3]=1;
202
+ }
203
+ } else {
204
+ if (str[i]=='T' &&
205
+ ((str[i+1]=='A' && str[i+2]=='G') || //amber
206
+ (str[i+1]=='A' && str[i+2]=='A') || //ochre stops
207
+ (str[i+1]=='G' && str[i+2]=='A'))) { //umber
208
+ if (len[i%3]>longest) {
209
+ longest = len[i%3];
203
210
  }
204
- if (c_str[i+2]=='A' &&
205
- ((c_str[i]=='C' && c_str[i+1]=='T') ||
206
- (c_str[i]=='T' && c_str[i+1]=='T') ||
207
- (c_str[i]=='T' && c_str[i+1]=='C'))) {
208
- if (len[3+i%3] > longest) {
209
- longest = len[3+i%3];
210
- }
211
- len[3+i%3]=0;
212
- } else {
213
- len[3+i%3]++;
211
+ len[i%3]=-1;
212
+ } else { // any other codon
213
+ if (len[i%3]>=0) {
214
+ len[i%3]++;
214
215
  }
216
+ }
215
217
  }
218
+ }
219
+ for(i=0;i<3;i++) {
216
220
  if (len[i%3] > longest) {
217
- longest = len[i%3];
221
+ longest = len[i%3];
222
+ }
223
+ }
224
+ for (i=0;i<3;i++) {
225
+ len[i]=0;
226
+ }
227
+ for(i=sl-1;i>=2;i--) {
228
+ if (str[i]=='T' && str[i-1]=='A' && str[i-2]=='C') { //Methionine
229
+ if (len[i%3]>=0) {
230
+ len[i%3]++;
231
+ } else {
232
+ len[i%3]=1;
233
+ }
234
+ } else {
235
+ if (str[i]=='A' &&
236
+ ((str[i-1]=='T' && str[i-2]=='C') || //amber
237
+ (str[i-1]=='T' && str[i-2]=='T') || //ochre stops
238
+ (str[i-1]=='C' && str[i-2]=='T'))) { //umber
239
+ if (len[i%3]>longest) {
240
+ longest = len[i%3];
241
+ }
242
+ len[i%3]=-1;
243
+ } else { // any other codon
244
+ if (len[i%3]>=0) {
245
+ len[i%3]++;
246
+ }
247
+ }
218
248
  }
219
- if (len[3+i%3] > longest) {
220
- longest = len[3+i%3];
249
+ }
250
+ for(i=0;i<3;i++) {
251
+ if (len[i%3] > longest) {
252
+ longest = len[i%3];
221
253
  }
222
- return INT2NUM(longest);
223
- }
254
+ }
255
+ return INT2NUM(longest);
256
+ }
257
+