transrate 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +1 -0
- data/LICENSE +2 -15
- data/README.md +14 -132
- data/Rakefile +19 -2
- data/bin/transrate +49 -10
- data/deps/deps.yaml +0 -10
- data/docs/transrate_logo_full.png +0 -0
- data/ext/transrate/extconf.rb +13 -0
- data/ext/transrate/transrate.c +223 -0
- data/lib/transrate.rb +1 -0
- data/lib/transrate/assembly.rb +12 -10
- data/lib/transrate/bowtie2.rb +7 -0
- data/lib/transrate/comparative_metrics.rb +103 -73
- data/lib/transrate/contig.rb +94 -93
- data/lib/transrate/contig_metrics.rb +1 -2
- data/lib/transrate/read_metrics.rb +13 -7
- data/lib/transrate/version.rb +1 -1
- data/test/helper.rb +1 -31
- data/test/test_bin.rb +99 -0
- data/test/test_bowtie.rb +12 -0
- data/test/test_comp_metrics.rb +161 -104
- data/test/test_contig.rb +62 -6
- data/test/test_contig_metrics.rb +2 -2
- data/test/test_inline.rb +2 -2
- data/test/test_transrater.rb +1 -1
- data/transrate.gemspec +5 -4
- metadata +40 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d2c7c5983fe07d76b6fbb4bf66bea5c93424f23d
|
4
|
+
data.tar.gz: 8c581edb8de7a7975ce0099a5f6bcd5ae6b51e65
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05ee942313dfb447366cbaa2609b8b1ceffe789e73c734791a20f6da25afb884ff2e7fbc05ac3ea8d5523023f842ad572286979691a1552c3aab61ec5b1c7733
|
7
|
+
data.tar.gz: c94ec455063b857ef41a4fc334a84f568cb47efa17a8785635911a02d222622d3948a262733323a9f96cefce750294a385a3315d8d7dc421b3d5028278e5ced6
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/LICENSE
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
## Summary
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
SNAP and CD-HIT-2D are bundled as binaries under their respective licenses
|
6
|
-
as described below.
|
3
|
+
Transrate is released under the MIT license.
|
7
4
|
|
8
5
|
## The MIT License (MIT)
|
9
6
|
|
10
|
-
Copyright (c)
|
7
|
+
Copyright (c) 2014 Richard Smith-Unna & Chris Boursnell
|
11
8
|
|
12
9
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
13
10
|
this software and associated documentation files (the "Software"), to deal in
|
@@ -25,13 +22,3 @@ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
25
22
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
26
23
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
27
24
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
-
|
29
|
-
## SNAP
|
30
|
-
|
31
|
-
SNAP is distributed as a binary in accordance with its Apache license.
|
32
|
-
The source code for SNAP is available at https://github.com/amplab/snap
|
33
|
-
|
34
|
-
## CD-HIT-2D
|
35
|
-
|
36
|
-
CD-HIT-2D is distributed as a binary in accordance with ith GPLv2 license.
|
37
|
-
The source code for CD-HIT-2D is available at https://code.google.com/p/cdhit/
|
data/README.md
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
<p align="center">
|
2
|
+
<img alt="Transrate - understand your transcriptome assembly" src="https://github.com/Blahah/transrate/raw/master/docs/transrate_logo_full.png">
|
3
|
+
</p>
|
3
4
|
|
4
|
-
|
5
|
+
## Development status
|
5
6
|
|
6
7
|
[![Gem Version](https://badge.fury.io/rb/transrate.png)][gem]
|
7
8
|
[![Build Status](https://secure.travis-ci.org/Blahah/transrate.png?branch=master)][travis]
|
@@ -15,138 +16,19 @@ Quality analysis and comparison of transcriptome assemblies.
|
|
15
16
|
[codeclimate]: https://codeclimate.com/github/Blahah/transrate
|
16
17
|
[coveralls]: https://coveralls.io/r/Blahah/transrate
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
1. [Development status](https://github.com/Blahah/transrate#development-status)
|
21
|
-
2. [Transcriptome assembly quality metrics](https://github.com/Blahah/transrate#transcriptome-assembly-quality-metrics)
|
22
|
-
3. [Installation](https://github.com/Blahah/transrate#installation)
|
23
|
-
4. [Usage](https://github.com/Blahah/transrate#usage)
|
24
|
-
- [Command line](https://github.com/Blahah/transrate#command-line)
|
25
|
-
- [example](https://github.com/Blahah/transrate#example)
|
26
|
-
- [As a library](https://github.com/Blahah/transrate#as-a-library)
|
27
|
-
5. [Requirements](https://github.com/Blahah/transrate#requirements)
|
28
|
-
- [Ruby](https://github.com/Blahah/transrate#ruby)
|
29
|
-
- [RubyGems](https://github.com/Blahah/transrate#rubygems)
|
30
|
-
- [Blast+, Bowtie 2](https://github.com/Blahah/transrate#blast+-and-bowtie2)
|
31
|
-
6. [Getting help](https://github.com/Blahah/transrate#getting-help)
|
32
|
-
|
33
|
-
## Development status
|
34
|
-
|
35
|
-
This software is being actively developed. Please be aware that they may be bugs. If you find any, please report them on the [issue tracker](https://github.com/Blahah/transrate/issues).
|
36
|
-
|
37
|
-
## Transcriptome assembly quality metrics
|
38
|
-
|
39
|
-
**transrate** implements a variety of established and new metrics. They are explained in detail [on the wiki](https://github.com/Blahah/transrate/wiki/Transcriptome-assembly-quality-metrics).
|
40
|
-
|
41
|
-
## Installation
|
42
|
-
|
43
|
-
Assuming you've got a recent version of Ruby installed (see below), you can install transrate very easily. Just run at the terminal:
|
44
|
-
|
45
|
-
`gem install transrate`
|
46
|
-
|
47
|
-
Next all the software Transrate depends on needs to be installed. Luckily, transrate is clever enough to do this itself. Simply run
|
48
|
-
|
49
|
-
`transrate --install-deps`
|
50
|
-
|
51
|
-
Transrate will check whether its dependencies are installed, and if not will download and install them for you.
|
52
|
-
|
53
|
-
## Usage
|
54
|
-
|
55
|
-
### Command line
|
56
|
-
|
57
|
-
`transrate --help` will display basic usage instructions.
|
58
|
-
|
59
|
-
```
|
60
|
-
Transrate v0.2.0 by Richard Smith-Unna <rds45@cam.ac.uk>
|
61
|
-
|
62
|
-
DESCRIPTION:
|
63
|
-
Analyse a de-novo transcriptome
|
64
|
-
assembly using three kinds of metrics:
|
65
|
-
|
66
|
-
1. contig-based
|
67
|
-
2. read-mapping (if --left and --right are provided)
|
68
|
-
3. reference-based (if --reference is provided)
|
69
|
-
|
70
|
-
Bug reports and feature requests at:
|
71
|
-
http://github.com/blahah/transrate
|
72
|
-
|
73
|
-
USAGE:
|
74
|
-
transrate <options>
|
75
|
-
|
76
|
-
EXAMPLES:
|
77
|
-
transrate --assembly contigs.fa --reference Athaliana_protein.fa --threads 8
|
78
|
-
|
79
|
-
OPTIONS:
|
80
|
-
--assembly, -a <s>: assembly file(s) in FASTA format, comma-separated
|
81
|
-
--reference, -r <s>: reference proteome file in FASTA format
|
82
|
-
--left, -l <s>: left reads file in FASTQ format
|
83
|
-
--right, -i <s>: right reads file in FASTQ format
|
84
|
-
--insertsize, -n <i>: mean insert size (default: 200)
|
85
|
-
--insertsd, -s <i>: insert size standard deviation (default: 50)
|
86
|
-
--threads, -t <i>: number of threads to use (default: 8)
|
87
|
-
--outfile, -o <s>: filename to use for CSV output (default: transate_results.csv)
|
88
|
-
--loglevel, -g <s>: the amount of information to print. one of [error, info, warn, debug] (default: info)
|
89
|
-
--install-deps, -d: install any missing dependencies
|
90
|
-
--profile, -p: debug option: profile the code as it runs
|
91
|
-
--version, -v: Print version and exit
|
92
|
-
--help, -h: Show this message
|
93
|
-
```
|
94
|
-
|
95
|
-
See the [getting started guide] on the website for more instructions, and see the [command-line options] part of the manual for details.
|
96
|
-
|
97
|
-
#### Example
|
98
|
-
|
99
|
-
```
|
100
|
-
transrate --assembly assembly.fasta \
|
101
|
-
--reference reference.fasta \
|
102
|
-
--left l.fq \
|
103
|
-
--right r.fq \
|
104
|
-
--threads 4
|
105
|
-
```
|
106
|
-
|
107
|
-
### As a library
|
108
|
-
|
109
|
-
```ruby
|
110
|
-
require 'transrate'
|
111
|
-
|
112
|
-
assembly = Transrate::Assembly.new(File.expand_path('assembly.fasta'))
|
113
|
-
reference = Transrate::Assembly.new(File.expand_path('reference.fasta'))
|
114
|
-
|
115
|
-
t = Transrate::Transrater.new(assembly, reference)
|
116
|
-
|
117
|
-
left = File.expand_path('left.fq')
|
118
|
-
right = File.expand_path('right.fq')
|
119
|
-
|
120
|
-
puts t.all_metrics(left, right)
|
121
|
-
puts t.assembly_score
|
122
|
-
```
|
123
|
-
|
124
|
-
## Requirements
|
125
|
-
|
126
|
-
### Ruby
|
127
|
-
|
128
|
-
First, you'll need Ruby v2.0.0 or greater installed. You can check with:
|
129
|
-
|
130
|
-
`ruby --version`
|
131
|
-
|
132
|
-
If you don't have Ruby installed, or you need a higher version, I recommend using [RVM](http://rvm.io/) as your Ruby Version Manager. To install RVM along with the latest Ruby, just run:
|
133
|
-
|
134
|
-
`\curl -L https://get.rvm.io | bash -s stable --ruby`
|
135
|
-
|
136
|
-
### Rubygems
|
137
|
-
|
138
|
-
Your Ruby installation *should* come with RubyGems, the package manager for Ruby. You can check with:
|
139
|
-
|
140
|
-
`gem --version`
|
19
|
+
This software is being actively developed. Please be aware that there may be bugs. If you find any, please report them on the [issue tracker](https://github.com/Blahah/transrate/issues).
|
141
20
|
|
142
|
-
|
21
|
+
## Documentation
|
143
22
|
|
144
|
-
|
23
|
+
**transrate** is documented [on the website](http://hibberdlab.com/transrate).
|
145
24
|
|
146
|
-
|
25
|
+
## Contributing
|
147
26
|
|
148
|
-
|
27
|
+
Interested in helping? Great! We particularly would like help with the following:
|
149
28
|
|
150
|
-
|
29
|
+
- code review
|
30
|
+
- documentation review
|
31
|
+
- adding features
|
32
|
+
- tackling bugs
|
151
33
|
|
152
|
-
|
34
|
+
For any of these, please just pick an appropriate issue [on the tracker](https://github.com/Blahah/transrate/issues) and make a pull request.
|
data/Rakefile
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
require 'rake/testtask'
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
|
4
|
+
Rake::ExtensionTask.new('transrate') do |ext|
|
5
|
+
ext.lib_dir = "lib/transrate"
|
6
|
+
end
|
2
7
|
|
3
8
|
Rake::TestTask.new do |t|
|
4
9
|
t.libs << 'test'
|
@@ -17,7 +22,7 @@ Rake::TestTask.new do |t|
|
|
17
22
|
end
|
18
23
|
|
19
24
|
Rake::TestTask.new do |t|
|
20
|
-
t.name = :
|
25
|
+
t.name = :contig_metrics
|
21
26
|
t.libs << 'test'
|
22
27
|
t.test_files = ['test/test_contig_metrics.rb']
|
23
28
|
end
|
@@ -40,5 +45,17 @@ Rake::TestTask.new do |t|
|
|
40
45
|
t.test_files = ['test/test_transrater.rb']
|
41
46
|
end
|
42
47
|
|
48
|
+
Rake::TestTask.new do |t|
|
49
|
+
t.name = :bin
|
50
|
+
t.libs << 'test'
|
51
|
+
t.test_files = ['test/test_bin.rb']
|
52
|
+
end
|
53
|
+
|
54
|
+
Rake::TestTask.new do |t|
|
55
|
+
t.name = :contig
|
56
|
+
t.libs << 'test'
|
57
|
+
t.test_files = ['test/test_contig.rb']
|
58
|
+
end
|
59
|
+
|
43
60
|
desc "Run tests"
|
44
|
-
task :default => :test
|
61
|
+
task :default => :test
|
data/bin/transrate
CHANGED
@@ -55,8 +55,8 @@ opts = Trollop::options do
|
|
55
55
|
opt :threads, "number of threads to use",
|
56
56
|
:default => 8,
|
57
57
|
:type => Integer
|
58
|
-
opt :outfile, "filename to use for CSV output",
|
59
|
-
:default => '
|
58
|
+
opt :outfile, "prefix filename to use for CSV output",
|
59
|
+
:default => 'transrate'
|
60
60
|
opt :loglevel, "the amount of information to print. " +
|
61
61
|
"one of [error, info, warn, debug]",
|
62
62
|
:default => 'info'
|
@@ -64,16 +64,32 @@ opts = Trollop::options do
|
|
64
64
|
opt :profile, "debug option: profile the code as it runs"
|
65
65
|
end
|
66
66
|
|
67
|
+
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
|
68
|
+
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
67
69
|
if opts.install_deps
|
68
70
|
puts "Checking dependencies"
|
69
|
-
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
|
70
|
-
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
71
71
|
Bindeps.require gem_deps
|
72
72
|
puts "All dependencies installed"
|
73
73
|
exit
|
74
|
+
else
|
75
|
+
missing = Bindeps.missing gem_deps
|
76
|
+
if missing.length > 0
|
77
|
+
puts "Dependencies are missing:"
|
78
|
+
missing.each do |dep|
|
79
|
+
puts " - #{dep}"
|
80
|
+
end
|
81
|
+
puts "To install all missing dependencies, run `transrate --install-deps`"
|
82
|
+
exit(1)
|
83
|
+
end
|
74
84
|
end
|
75
85
|
|
76
|
-
|
86
|
+
if opts.assembly
|
87
|
+
opts.assembly.split(',').each do |assembly_file|
|
88
|
+
unless File.exist?(assembly_file)
|
89
|
+
raise IOError.new "Assembly fasta file does not exist: #{assembly_file}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
77
93
|
raise ArgumentError.new "Option --assembly must be specified. " +
|
78
94
|
"Try --help for help."
|
79
95
|
end
|
@@ -194,15 +210,38 @@ opts.assembly.split(',').each do |assembly|
|
|
194
210
|
logger.info "No reference provided, skipping comparative diagnostics"
|
195
211
|
end
|
196
212
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
213
|
+
# write contig metrics to file for each contig
|
214
|
+
outfile = "#{opts.outfile}_contigs.csv"
|
215
|
+
logger.info "Writing contig metrics for each contig to #{outfile}"
|
216
|
+
# have option to turn off, default on
|
217
|
+
first=true
|
218
|
+
CSV.open(outfile, 'wb') do |csv|
|
219
|
+
a.each do |name, contig|
|
220
|
+
basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
|
221
|
+
if opts.reference
|
222
|
+
comp_metrics = contig.comparative_metrics
|
223
|
+
basic_metrics.merge!(comp_metrics)
|
224
|
+
end
|
225
|
+
if opts.left and opts.right
|
226
|
+
read_metrics = contig.read_metrics
|
227
|
+
basic_metrics.merge!(read_metrics)
|
228
|
+
end
|
229
|
+
if first
|
230
|
+
csv << basic_metrics.keys
|
231
|
+
first = false
|
232
|
+
end
|
233
|
+
csv << basic_metrics.values
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
all << contig_results.merge(read_results)
|
238
|
+
.merge(comparative_results)
|
239
|
+
.merge({ :assembly => assembly })
|
201
240
|
|
202
241
|
end
|
203
242
|
|
204
243
|
# write out all resuls to .csv
|
205
|
-
outfile = opts.
|
244
|
+
outfile = "#{opts.outfile}_assemblies.csv"
|
206
245
|
logger.info "Writing analysis results to #{outfile}"
|
207
246
|
CSV.open(outfile, 'wb') do |file|
|
208
247
|
keys = all[0].keys
|
data/deps/deps.yaml
CHANGED
@@ -43,13 +43,3 @@ bowtie2:
|
|
43
43
|
64bit:
|
44
44
|
linux: http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.3/bowtie2-2.2.3-linux-x86_64.zip
|
45
45
|
macosx: http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.3/bowtie2-2.2.3-macos-x86_64.zip
|
46
|
-
express:
|
47
|
-
binaries:
|
48
|
-
- express
|
49
|
-
version:
|
50
|
-
number: '1.5.1'
|
51
|
-
command: 'express --version'
|
52
|
-
url:
|
53
|
-
64bit:
|
54
|
-
linux: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-linux_x86_64.tgz
|
55
|
-
macosx: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-macosx_x86_64.tgz
|
Binary file
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Loads mkmf which is used to make makefiles for Ruby extensions
|
2
|
+
require 'mkmf'
|
3
|
+
|
4
|
+
# Give it a name
|
5
|
+
extension_name = 'transrate/transrate'
|
6
|
+
|
7
|
+
$CFLAGS = '-Wall -O3' # O for optimise
|
8
|
+
|
9
|
+
# The destination
|
10
|
+
dir_config(extension_name)
|
11
|
+
|
12
|
+
# Do the work
|
13
|
+
create_makefile(extension_name)
|
@@ -0,0 +1,223 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include <stdlib.h>
|
3
|
+
|
4
|
+
// Defining a space for information and references about the module to be
|
5
|
+
// stored internally
|
6
|
+
VALUE Contig = Qnil;
|
7
|
+
VALUE Transrate = Qnil;
|
8
|
+
|
9
|
+
// Prototype for the initialization method - Ruby calls this, not you
|
10
|
+
void Init_transrate();
|
11
|
+
|
12
|
+
// methods are prefixed by 'method_' here
|
13
|
+
//VALUE TestInit(VALUE, VALUE, VALUE, VALUE, VALUE);
|
14
|
+
VALUE method_composition(VALUE, VALUE);
|
15
|
+
VALUE method_base_count(VALUE,VALUE);
|
16
|
+
VALUE method_dibase_count(VALUE,VALUE);
|
17
|
+
VALUE method_kmer_count(VALUE,VALUE,VALUE);
|
18
|
+
VALUE method_longest_orf(VALUE, VALUE);
|
19
|
+
|
20
|
+
int * base_counts;
|
21
|
+
int * dibase_counts;
|
22
|
+
|
23
|
+
// The initialization method for this module
|
24
|
+
void Init_transrate() {
|
25
|
+
Transrate = rb_define_module("Transrate");
|
26
|
+
// VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
|
27
|
+
Contig = rb_define_class_under(Transrate, "Contig", rb_cObject);
|
28
|
+
// rb_define_method(Contig, "initialize", TestInit, 2);
|
29
|
+
rb_define_method(Contig, "composition", method_composition, 1);
|
30
|
+
rb_define_method(Contig, "base_count", method_base_count, 1);
|
31
|
+
rb_define_method(Contig, "dibase_count", method_dibase_count, 1);
|
32
|
+
rb_define_method(Contig, "kmer_count", method_kmer_count, 2);
|
33
|
+
rb_define_method(Contig, "longest_orf", method_longest_orf, 1);
|
34
|
+
}
|
35
|
+
|
36
|
+
VALUE method_composition(VALUE self, VALUE _seq) {
|
37
|
+
int i,len, idx;
|
38
|
+
char * seq;
|
39
|
+
char base;
|
40
|
+
char prevbase;
|
41
|
+
seq = StringValueCStr(_seq);
|
42
|
+
len = RSTRING_LEN(_seq);
|
43
|
+
base_counts = malloc(5 * sizeof(int));
|
44
|
+
dibase_counts = malloc(25 * sizeof(int));
|
45
|
+
|
46
|
+
for (i=0; i < 5; i++) {
|
47
|
+
base_counts[i]=0;
|
48
|
+
}
|
49
|
+
for (i=0; i < 25; i++) {
|
50
|
+
dibase_counts[i]=0;
|
51
|
+
}
|
52
|
+
for (i=0; i < len; i++) {
|
53
|
+
base = seq[i];
|
54
|
+
switch (base) {
|
55
|
+
case 'A': {
|
56
|
+
idx=0;
|
57
|
+
break;
|
58
|
+
}
|
59
|
+
case 'C': {
|
60
|
+
idx=1;
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
case 'G': {
|
64
|
+
idx=2;
|
65
|
+
break;
|
66
|
+
}
|
67
|
+
case 'T': {
|
68
|
+
idx=3;
|
69
|
+
break;
|
70
|
+
}
|
71
|
+
default: {
|
72
|
+
idx=4;
|
73
|
+
break;
|
74
|
+
}
|
75
|
+
}
|
76
|
+
base_counts[idx]++;
|
77
|
+
|
78
|
+
if (i > 0) {
|
79
|
+
prevbase = seq[i-1];
|
80
|
+
switch (prevbase) {
|
81
|
+
case 'A': {
|
82
|
+
idx=idx;
|
83
|
+
break;
|
84
|
+
}
|
85
|
+
case 'C': {
|
86
|
+
idx=idx+5;
|
87
|
+
break;
|
88
|
+
}
|
89
|
+
case 'G': {
|
90
|
+
idx=idx+10;
|
91
|
+
break;
|
92
|
+
}
|
93
|
+
case 'T': {
|
94
|
+
idx=idx+15;
|
95
|
+
break;
|
96
|
+
}
|
97
|
+
default: {
|
98
|
+
idx=idx+20;
|
99
|
+
break;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
dibase_counts[idx]++;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
return INT2NUM(0);
|
106
|
+
}
|
107
|
+
|
108
|
+
VALUE method_dibase_count(VALUE self, VALUE idx) {
|
109
|
+
return INT2NUM(dibase_counts[NUM2INT(idx)]);
|
110
|
+
}
|
111
|
+
|
112
|
+
VALUE method_base_count(VALUE self, VALUE idx) {
|
113
|
+
return INT2NUM(base_counts[NUM2INT(idx)]);
|
114
|
+
}
|
115
|
+
|
116
|
+
VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
|
117
|
+
int n, i, start, k, len, h, size = 0;
|
118
|
+
char * c_str;
|
119
|
+
char base;
|
120
|
+
len = RSTRING_LEN(_s);
|
121
|
+
c_str = StringValueCStr(_s);
|
122
|
+
k = NUM2INT(_k);
|
123
|
+
size = 1;
|
124
|
+
for(h=0;h<k;h++) {
|
125
|
+
size *= 4;
|
126
|
+
}
|
127
|
+
short set[size];
|
128
|
+
for(start=0;start<size;start++) {
|
129
|
+
set[start]=0;
|
130
|
+
}
|
131
|
+
for(start=0; start<len-k+1; start++) {
|
132
|
+
i = 0;
|
133
|
+
h = 0;
|
134
|
+
n = 0;
|
135
|
+
for(i = start; i < start+k; i++) {
|
136
|
+
base = c_str[i];
|
137
|
+
switch (base) {
|
138
|
+
case 'A': {
|
139
|
+
h = h << 2;
|
140
|
+
h += 0;
|
141
|
+
break;
|
142
|
+
}
|
143
|
+
case 'C': {
|
144
|
+
h = h << 2;
|
145
|
+
h += 1;
|
146
|
+
break;
|
147
|
+
}
|
148
|
+
case 'G': {
|
149
|
+
h = h << 2;
|
150
|
+
h += 2;
|
151
|
+
break;
|
152
|
+
}
|
153
|
+
case 'T': {
|
154
|
+
h = h << 2;
|
155
|
+
h += 3;
|
156
|
+
break;
|
157
|
+
}
|
158
|
+
default: {
|
159
|
+
n++;
|
160
|
+
break;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
if (n==0) {
|
165
|
+
set[h] += 1;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
i = 0; // count how many in array are set //
|
169
|
+
for(start = 0; start < size; start++) {
|
170
|
+
if (set[start]>0) {
|
171
|
+
i++;
|
172
|
+
}
|
173
|
+
}
|
174
|
+
return INT2NUM(i);
|
175
|
+
}
|
176
|
+
|
177
|
+
// takes in a string and calculates the longest open reading frame
|
178
|
+
// in any of the 6 frames
|
179
|
+
// an open reading frame is defined as the number of bases between
|
180
|
+
// either the start of the sequence or a stop codon and either the
|
181
|
+
// end of the sequence or a stop codon
|
182
|
+
VALUE method_longest_orf(VALUE self, VALUE _s) {
|
183
|
+
int i,sl,longest=0;
|
184
|
+
int len[6];
|
185
|
+
char * c_str;
|
186
|
+
|
187
|
+
sl = RSTRING_LEN(_s);
|
188
|
+
c_str = StringValueCStr(_s);
|
189
|
+
for (i=0;i<6;i++) {
|
190
|
+
len[i]=0;
|
191
|
+
}
|
192
|
+
for (i=0;i<sl-2;i++) {
|
193
|
+
if (c_str[i]=='T' &&
|
194
|
+
((c_str[i+1]=='A' && c_str[i+2]=='G') ||
|
195
|
+
(c_str[i+1]=='A' && c_str[i+2]=='A') ||
|
196
|
+
(c_str[i+1]=='G' && c_str[i+2]=='A'))) {
|
197
|
+
if (len[i%3] > longest) {
|
198
|
+
longest = len[i%3];
|
199
|
+
}
|
200
|
+
len[i%3]=0;
|
201
|
+
} else {
|
202
|
+
len[i%3]++;
|
203
|
+
}
|
204
|
+
if (c_str[i+2]=='A' &&
|
205
|
+
((c_str[i]=='C' && c_str[i+1]=='T') ||
|
206
|
+
(c_str[i]=='T' && c_str[i+1]=='T') ||
|
207
|
+
(c_str[i]=='T' && c_str[i+1]=='C'))) {
|
208
|
+
if (len[3+i%3] > longest) {
|
209
|
+
longest = len[3+i%3];
|
210
|
+
}
|
211
|
+
len[3+i%3]=0;
|
212
|
+
} else {
|
213
|
+
len[3+i%3]++;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
if (len[i%3] > longest) {
|
217
|
+
longest = len[i%3];
|
218
|
+
}
|
219
|
+
if (len[3+i%3] > longest) {
|
220
|
+
longest = len[3+i%3];
|
221
|
+
}
|
222
|
+
return INT2NUM(longest);
|
223
|
+
}
|