transrate 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +1 -0
- data/LICENSE +2 -15
- data/README.md +14 -132
- data/Rakefile +19 -2
- data/bin/transrate +49 -10
- data/deps/deps.yaml +0 -10
- data/docs/transrate_logo_full.png +0 -0
- data/ext/transrate/extconf.rb +13 -0
- data/ext/transrate/transrate.c +223 -0
- data/lib/transrate.rb +1 -0
- data/lib/transrate/assembly.rb +12 -10
- data/lib/transrate/bowtie2.rb +7 -0
- data/lib/transrate/comparative_metrics.rb +103 -73
- data/lib/transrate/contig.rb +94 -93
- data/lib/transrate/contig_metrics.rb +1 -2
- data/lib/transrate/read_metrics.rb +13 -7
- data/lib/transrate/version.rb +1 -1
- data/test/helper.rb +1 -31
- data/test/test_bin.rb +99 -0
- data/test/test_bowtie.rb +12 -0
- data/test/test_comp_metrics.rb +161 -104
- data/test/test_contig.rb +62 -6
- data/test/test_contig_metrics.rb +2 -2
- data/test/test_inline.rb +2 -2
- data/test/test_transrater.rb +1 -1
- data/transrate.gemspec +5 -4
- metadata +40 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d2c7c5983fe07d76b6fbb4bf66bea5c93424f23d
|
4
|
+
data.tar.gz: 8c581edb8de7a7975ce0099a5f6bcd5ae6b51e65
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05ee942313dfb447366cbaa2609b8b1ceffe789e73c734791a20f6da25afb884ff2e7fbc05ac3ea8d5523023f842ad572286979691a1552c3aab61ec5b1c7733
|
7
|
+
data.tar.gz: c94ec455063b857ef41a4fc334a84f568cb47efa17a8785635911a02d222622d3948a262733323a9f96cefce750294a385a3315d8d7dc421b3d5028278e5ced6
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/LICENSE
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
## Summary
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
SNAP and CD-HIT-2D are bundled as binaries under their respective licenses
|
6
|
-
as described below.
|
3
|
+
Transrate is released under the MIT license.
|
7
4
|
|
8
5
|
## The MIT License (MIT)
|
9
6
|
|
10
|
-
Copyright (c)
|
7
|
+
Copyright (c) 2014 Richard Smith-Unna & Chris Boursnell
|
11
8
|
|
12
9
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
13
10
|
this software and associated documentation files (the "Software"), to deal in
|
@@ -25,13 +22,3 @@ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
25
22
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
26
23
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
27
24
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
28
|
-
|
29
|
-
## SNAP
|
30
|
-
|
31
|
-
SNAP is distributed as a binary in accordance with its Apache license.
|
32
|
-
The source code for SNAP is available at https://github.com/amplab/snap
|
33
|
-
|
34
|
-
## CD-HIT-2D
|
35
|
-
|
36
|
-
CD-HIT-2D is distributed as a binary in accordance with ith GPLv2 license.
|
37
|
-
The source code for CD-HIT-2D is available at https://code.google.com/p/cdhit/
|
data/README.md
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
<p align="center">
|
2
|
+
<img alt="Transrate - understand your transcriptome assembly" src="https://github.com/Blahah/transrate/raw/master/docs/transrate_logo_full.png">
|
3
|
+
</p>
|
3
4
|
|
4
|
-
|
5
|
+
## Development status
|
5
6
|
|
6
7
|
[][gem]
|
7
8
|
[][travis]
|
@@ -15,138 +16,19 @@ Quality analysis and comparison of transcriptome assemblies.
|
|
15
16
|
[codeclimate]: https://codeclimate.com/github/Blahah/transrate
|
16
17
|
[coveralls]: https://coveralls.io/r/Blahah/transrate
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
1. [Development status](https://github.com/Blahah/transrate#development-status)
|
21
|
-
2. [Transcriptome assembly quality metrics](https://github.com/Blahah/transrate#transcriptome-assembly-quality-metrics)
|
22
|
-
3. [Installation](https://github.com/Blahah/transrate#installation)
|
23
|
-
4. [Usage](https://github.com/Blahah/transrate#usage)
|
24
|
-
- [Command line](https://github.com/Blahah/transrate#command-line)
|
25
|
-
- [example](https://github.com/Blahah/transrate#example)
|
26
|
-
- [As a library](https://github.com/Blahah/transrate#as-a-library)
|
27
|
-
5. [Requirements](https://github.com/Blahah/transrate#requirements)
|
28
|
-
- [Ruby](https://github.com/Blahah/transrate#ruby)
|
29
|
-
- [RubyGems](https://github.com/Blahah/transrate#rubygems)
|
30
|
-
- [Blast+, Bowtie 2](https://github.com/Blahah/transrate#blast+-and-bowtie2)
|
31
|
-
6. [Getting help](https://github.com/Blahah/transrate#getting-help)
|
32
|
-
|
33
|
-
## Development status
|
34
|
-
|
35
|
-
This software is being actively developed. Please be aware that they may be bugs. If you find any, please report them on the [issue tracker](https://github.com/Blahah/transrate/issues).
|
36
|
-
|
37
|
-
## Transcriptome assembly quality metrics
|
38
|
-
|
39
|
-
**transrate** implements a variety of established and new metrics. They are explained in detail [on the wiki](https://github.com/Blahah/transrate/wiki/Transcriptome-assembly-quality-metrics).
|
40
|
-
|
41
|
-
## Installation
|
42
|
-
|
43
|
-
Assuming you've got a recent version of Ruby installed (see below), you can install transrate very easily. Just run at the terminal:
|
44
|
-
|
45
|
-
`gem install transrate`
|
46
|
-
|
47
|
-
Next all the software Transrate depends on needs to be installed. Luckily, transrate is clever enough to do this itself. Simply run
|
48
|
-
|
49
|
-
`transrate --install-deps`
|
50
|
-
|
51
|
-
Transrate will check whether its dependencies are installed, and if not will download and install them for you.
|
52
|
-
|
53
|
-
## Usage
|
54
|
-
|
55
|
-
### Command line
|
56
|
-
|
57
|
-
`transrate --help` will display basic usage instructions.
|
58
|
-
|
59
|
-
```
|
60
|
-
Transrate v0.2.0 by Richard Smith-Unna <rds45@cam.ac.uk>
|
61
|
-
|
62
|
-
DESCRIPTION:
|
63
|
-
Analyse a de-novo transcriptome
|
64
|
-
assembly using three kinds of metrics:
|
65
|
-
|
66
|
-
1. contig-based
|
67
|
-
2. read-mapping (if --left and --right are provided)
|
68
|
-
3. reference-based (if --reference is provided)
|
69
|
-
|
70
|
-
Bug reports and feature requests at:
|
71
|
-
http://github.com/blahah/transrate
|
72
|
-
|
73
|
-
USAGE:
|
74
|
-
transrate <options>
|
75
|
-
|
76
|
-
EXAMPLES:
|
77
|
-
transrate --assembly contigs.fa --reference Athaliana_protein.fa --threads 8
|
78
|
-
|
79
|
-
OPTIONS:
|
80
|
-
--assembly, -a <s>: assembly file(s) in FASTA format, comma-separated
|
81
|
-
--reference, -r <s>: reference proteome file in FASTA format
|
82
|
-
--left, -l <s>: left reads file in FASTQ format
|
83
|
-
--right, -i <s>: right reads file in FASTQ format
|
84
|
-
--insertsize, -n <i>: mean insert size (default: 200)
|
85
|
-
--insertsd, -s <i>: insert size standard deviation (default: 50)
|
86
|
-
--threads, -t <i>: number of threads to use (default: 8)
|
87
|
-
--outfile, -o <s>: filename to use for CSV output (default: transate_results.csv)
|
88
|
-
--loglevel, -g <s>: the amount of information to print. one of [error, info, warn, debug] (default: info)
|
89
|
-
--install-deps, -d: install any missing dependencies
|
90
|
-
--profile, -p: debug option: profile the code as it runs
|
91
|
-
--version, -v: Print version and exit
|
92
|
-
--help, -h: Show this message
|
93
|
-
```
|
94
|
-
|
95
|
-
See the [getting started guide] on the website for more instructions, and see the [command-line options] part of the manual for details.
|
96
|
-
|
97
|
-
#### Example
|
98
|
-
|
99
|
-
```
|
100
|
-
transrate --assembly assembly.fasta \
|
101
|
-
--reference reference.fasta \
|
102
|
-
--left l.fq \
|
103
|
-
--right r.fq \
|
104
|
-
--threads 4
|
105
|
-
```
|
106
|
-
|
107
|
-
### As a library
|
108
|
-
|
109
|
-
```ruby
|
110
|
-
require 'transrate'
|
111
|
-
|
112
|
-
assembly = Transrate::Assembly.new(File.expand_path('assembly.fasta'))
|
113
|
-
reference = Transrate::Assembly.new(File.expand_path('reference.fasta'))
|
114
|
-
|
115
|
-
t = Transrate::Transrater.new(assembly, reference)
|
116
|
-
|
117
|
-
left = File.expand_path('left.fq')
|
118
|
-
right = File.expand_path('right.fq')
|
119
|
-
|
120
|
-
puts t.all_metrics(left, right)
|
121
|
-
puts t.assembly_score
|
122
|
-
```
|
123
|
-
|
124
|
-
## Requirements
|
125
|
-
|
126
|
-
### Ruby
|
127
|
-
|
128
|
-
First, you'll need Ruby v2.0.0 or greater installed. You can check with:
|
129
|
-
|
130
|
-
`ruby --version`
|
131
|
-
|
132
|
-
If you don't have Ruby installed, or you need a higher version, I recommend using [RVM](http://rvm.io/) as your Ruby Version Manager. To install RVM along with the latest Ruby, just run:
|
133
|
-
|
134
|
-
`\curl -L https://get.rvm.io | bash -s stable --ruby`
|
135
|
-
|
136
|
-
### Rubygems
|
137
|
-
|
138
|
-
Your Ruby installation *should* come with RubyGems, the package manager for Ruby. You can check with:
|
139
|
-
|
140
|
-
`gem --version`
|
19
|
+
This software is being actively developed. Please be aware that there may be bugs. If you find any, please report them on the [issue tracker](https://github.com/Blahah/transrate/issues).
|
141
20
|
|
142
|
-
|
21
|
+
## Documentation
|
143
22
|
|
144
|
-
|
23
|
+
**transrate** is documented [on the website](http://hibberdlab.com/transrate).
|
145
24
|
|
146
|
-
|
25
|
+
## Contributing
|
147
26
|
|
148
|
-
|
27
|
+
Interested in helping? Great! We particularly would like help with the following:
|
149
28
|
|
150
|
-
|
29
|
+
- code review
|
30
|
+
- documentation review
|
31
|
+
- adding features
|
32
|
+
- tackling bugs
|
151
33
|
|
152
|
-
|
34
|
+
For any of these, please just pick an appropriate issue [on the tracker](https://github.com/Blahah/transrate/issues) and make a pull request.
|
data/Rakefile
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
require 'rake/testtask'
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
|
4
|
+
Rake::ExtensionTask.new('transrate') do |ext|
|
5
|
+
ext.lib_dir = "lib/transrate"
|
6
|
+
end
|
2
7
|
|
3
8
|
Rake::TestTask.new do |t|
|
4
9
|
t.libs << 'test'
|
@@ -17,7 +22,7 @@ Rake::TestTask.new do |t|
|
|
17
22
|
end
|
18
23
|
|
19
24
|
Rake::TestTask.new do |t|
|
20
|
-
t.name = :
|
25
|
+
t.name = :contig_metrics
|
21
26
|
t.libs << 'test'
|
22
27
|
t.test_files = ['test/test_contig_metrics.rb']
|
23
28
|
end
|
@@ -40,5 +45,17 @@ Rake::TestTask.new do |t|
|
|
40
45
|
t.test_files = ['test/test_transrater.rb']
|
41
46
|
end
|
42
47
|
|
48
|
+
Rake::TestTask.new do |t|
|
49
|
+
t.name = :bin
|
50
|
+
t.libs << 'test'
|
51
|
+
t.test_files = ['test/test_bin.rb']
|
52
|
+
end
|
53
|
+
|
54
|
+
Rake::TestTask.new do |t|
|
55
|
+
t.name = :contig
|
56
|
+
t.libs << 'test'
|
57
|
+
t.test_files = ['test/test_contig.rb']
|
58
|
+
end
|
59
|
+
|
43
60
|
desc "Run tests"
|
44
|
-
task :default => :test
|
61
|
+
task :default => :test
|
data/bin/transrate
CHANGED
@@ -55,8 +55,8 @@ opts = Trollop::options do
|
|
55
55
|
opt :threads, "number of threads to use",
|
56
56
|
:default => 8,
|
57
57
|
:type => Integer
|
58
|
-
opt :outfile, "filename to use for CSV output",
|
59
|
-
:default => '
|
58
|
+
opt :outfile, "prefix filename to use for CSV output",
|
59
|
+
:default => 'transrate'
|
60
60
|
opt :loglevel, "the amount of information to print. " +
|
61
61
|
"one of [error, info, warn, debug]",
|
62
62
|
:default => 'info'
|
@@ -64,16 +64,32 @@ opts = Trollop::options do
|
|
64
64
|
opt :profile, "debug option: profile the code as it runs"
|
65
65
|
end
|
66
66
|
|
67
|
+
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
|
68
|
+
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
67
69
|
if opts.install_deps
|
68
70
|
puts "Checking dependencies"
|
69
|
-
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
|
70
|
-
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
71
71
|
Bindeps.require gem_deps
|
72
72
|
puts "All dependencies installed"
|
73
73
|
exit
|
74
|
+
else
|
75
|
+
missing = Bindeps.missing gem_deps
|
76
|
+
if missing.length > 0
|
77
|
+
puts "Dependencies are missing:"
|
78
|
+
missing.each do |dep|
|
79
|
+
puts " - #{dep}"
|
80
|
+
end
|
81
|
+
puts "To install all missing dependencies, run `transrate --install-deps`"
|
82
|
+
exit(1)
|
83
|
+
end
|
74
84
|
end
|
75
85
|
|
76
|
-
|
86
|
+
if opts.assembly
|
87
|
+
opts.assembly.split(',').each do |assembly_file|
|
88
|
+
unless File.exist?(assembly_file)
|
89
|
+
raise IOError.new "Assembly fasta file does not exist: #{assembly_file}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
77
93
|
raise ArgumentError.new "Option --assembly must be specified. " +
|
78
94
|
"Try --help for help."
|
79
95
|
end
|
@@ -194,15 +210,38 @@ opts.assembly.split(',').each do |assembly|
|
|
194
210
|
logger.info "No reference provided, skipping comparative diagnostics"
|
195
211
|
end
|
196
212
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
213
|
+
# write contig metrics to file for each contig
|
214
|
+
outfile = "#{opts.outfile}_contigs.csv"
|
215
|
+
logger.info "Writing contig metrics for each contig to #{outfile}"
|
216
|
+
# have option to turn off, default on
|
217
|
+
first=true
|
218
|
+
CSV.open(outfile, 'wb') do |csv|
|
219
|
+
a.each do |name, contig|
|
220
|
+
basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
|
221
|
+
if opts.reference
|
222
|
+
comp_metrics = contig.comparative_metrics
|
223
|
+
basic_metrics.merge!(comp_metrics)
|
224
|
+
end
|
225
|
+
if opts.left and opts.right
|
226
|
+
read_metrics = contig.read_metrics
|
227
|
+
basic_metrics.merge!(read_metrics)
|
228
|
+
end
|
229
|
+
if first
|
230
|
+
csv << basic_metrics.keys
|
231
|
+
first = false
|
232
|
+
end
|
233
|
+
csv << basic_metrics.values
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
all << contig_results.merge(read_results)
|
238
|
+
.merge(comparative_results)
|
239
|
+
.merge({ :assembly => assembly })
|
201
240
|
|
202
241
|
end
|
203
242
|
|
204
243
|
# write out all resuls to .csv
|
205
|
-
outfile = opts.
|
244
|
+
outfile = "#{opts.outfile}_assemblies.csv"
|
206
245
|
logger.info "Writing analysis results to #{outfile}"
|
207
246
|
CSV.open(outfile, 'wb') do |file|
|
208
247
|
keys = all[0].keys
|
data/deps/deps.yaml
CHANGED
@@ -43,13 +43,3 @@ bowtie2:
|
|
43
43
|
64bit:
|
44
44
|
linux: http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.3/bowtie2-2.2.3-linux-x86_64.zip
|
45
45
|
macosx: http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.3/bowtie2-2.2.3-macos-x86_64.zip
|
46
|
-
express:
|
47
|
-
binaries:
|
48
|
-
- express
|
49
|
-
version:
|
50
|
-
number: '1.5.1'
|
51
|
-
command: 'express --version'
|
52
|
-
url:
|
53
|
-
64bit:
|
54
|
-
linux: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-linux_x86_64.tgz
|
55
|
-
macosx: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-macosx_x86_64.tgz
|
Binary file
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Loads mkmf which is used to make makefiles for Ruby extensions
|
2
|
+
require 'mkmf'
|
3
|
+
|
4
|
+
# Give it a name
|
5
|
+
extension_name = 'transrate/transrate'
|
6
|
+
|
7
|
+
$CFLAGS = '-Wall -O3' # O for optimise
|
8
|
+
|
9
|
+
# The destination
|
10
|
+
dir_config(extension_name)
|
11
|
+
|
12
|
+
# Do the work
|
13
|
+
create_makefile(extension_name)
|
@@ -0,0 +1,223 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include <stdlib.h>
|
3
|
+
|
4
|
+
// Defining a space for information and references about the module to be
|
5
|
+
// stored internally
|
6
|
+
VALUE Contig = Qnil;
|
7
|
+
VALUE Transrate = Qnil;
|
8
|
+
|
9
|
+
// Prototype for the initialization method - Ruby calls this, not you
|
10
|
+
void Init_transrate();
|
11
|
+
|
12
|
+
// methods are prefixed by 'method_' here
|
13
|
+
//VALUE TestInit(VALUE, VALUE, VALUE, VALUE, VALUE);
|
14
|
+
VALUE method_composition(VALUE, VALUE);
|
15
|
+
VALUE method_base_count(VALUE,VALUE);
|
16
|
+
VALUE method_dibase_count(VALUE,VALUE);
|
17
|
+
VALUE method_kmer_count(VALUE,VALUE,VALUE);
|
18
|
+
VALUE method_longest_orf(VALUE, VALUE);
|
19
|
+
|
20
|
+
int * base_counts;
|
21
|
+
int * dibase_counts;
|
22
|
+
|
23
|
+
// The initialization method for this module
|
24
|
+
void Init_transrate() {
|
25
|
+
Transrate = rb_define_module("Transrate");
|
26
|
+
// VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
|
27
|
+
Contig = rb_define_class_under(Transrate, "Contig", rb_cObject);
|
28
|
+
// rb_define_method(Contig, "initialize", TestInit, 2);
|
29
|
+
rb_define_method(Contig, "composition", method_composition, 1);
|
30
|
+
rb_define_method(Contig, "base_count", method_base_count, 1);
|
31
|
+
rb_define_method(Contig, "dibase_count", method_dibase_count, 1);
|
32
|
+
rb_define_method(Contig, "kmer_count", method_kmer_count, 2);
|
33
|
+
rb_define_method(Contig, "longest_orf", method_longest_orf, 1);
|
34
|
+
}
|
35
|
+
|
36
|
+
VALUE method_composition(VALUE self, VALUE _seq) {
|
37
|
+
int i,len, idx;
|
38
|
+
char * seq;
|
39
|
+
char base;
|
40
|
+
char prevbase;
|
41
|
+
seq = StringValueCStr(_seq);
|
42
|
+
len = RSTRING_LEN(_seq);
|
43
|
+
base_counts = malloc(5 * sizeof(int));
|
44
|
+
dibase_counts = malloc(25 * sizeof(int));
|
45
|
+
|
46
|
+
for (i=0; i < 5; i++) {
|
47
|
+
base_counts[i]=0;
|
48
|
+
}
|
49
|
+
for (i=0; i < 25; i++) {
|
50
|
+
dibase_counts[i]=0;
|
51
|
+
}
|
52
|
+
for (i=0; i < len; i++) {
|
53
|
+
base = seq[i];
|
54
|
+
switch (base) {
|
55
|
+
case 'A': {
|
56
|
+
idx=0;
|
57
|
+
break;
|
58
|
+
}
|
59
|
+
case 'C': {
|
60
|
+
idx=1;
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
case 'G': {
|
64
|
+
idx=2;
|
65
|
+
break;
|
66
|
+
}
|
67
|
+
case 'T': {
|
68
|
+
idx=3;
|
69
|
+
break;
|
70
|
+
}
|
71
|
+
default: {
|
72
|
+
idx=4;
|
73
|
+
break;
|
74
|
+
}
|
75
|
+
}
|
76
|
+
base_counts[idx]++;
|
77
|
+
|
78
|
+
if (i > 0) {
|
79
|
+
prevbase = seq[i-1];
|
80
|
+
switch (prevbase) {
|
81
|
+
case 'A': {
|
82
|
+
idx=idx;
|
83
|
+
break;
|
84
|
+
}
|
85
|
+
case 'C': {
|
86
|
+
idx=idx+5;
|
87
|
+
break;
|
88
|
+
}
|
89
|
+
case 'G': {
|
90
|
+
idx=idx+10;
|
91
|
+
break;
|
92
|
+
}
|
93
|
+
case 'T': {
|
94
|
+
idx=idx+15;
|
95
|
+
break;
|
96
|
+
}
|
97
|
+
default: {
|
98
|
+
idx=idx+20;
|
99
|
+
break;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
dibase_counts[idx]++;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
return INT2NUM(0);
|
106
|
+
}
|
107
|
+
|
108
|
+
VALUE method_dibase_count(VALUE self, VALUE idx) {
|
109
|
+
return INT2NUM(dibase_counts[NUM2INT(idx)]);
|
110
|
+
}
|
111
|
+
|
112
|
+
VALUE method_base_count(VALUE self, VALUE idx) {
|
113
|
+
return INT2NUM(base_counts[NUM2INT(idx)]);
|
114
|
+
}
|
115
|
+
|
116
|
+
VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
|
117
|
+
int n, i, start, k, len, h, size = 0;
|
118
|
+
char * c_str;
|
119
|
+
char base;
|
120
|
+
len = RSTRING_LEN(_s);
|
121
|
+
c_str = StringValueCStr(_s);
|
122
|
+
k = NUM2INT(_k);
|
123
|
+
size = 1;
|
124
|
+
for(h=0;h<k;h++) {
|
125
|
+
size *= 4;
|
126
|
+
}
|
127
|
+
short set[size];
|
128
|
+
for(start=0;start<size;start++) {
|
129
|
+
set[start]=0;
|
130
|
+
}
|
131
|
+
for(start=0; start<len-k+1; start++) {
|
132
|
+
i = 0;
|
133
|
+
h = 0;
|
134
|
+
n = 0;
|
135
|
+
for(i = start; i < start+k; i++) {
|
136
|
+
base = c_str[i];
|
137
|
+
switch (base) {
|
138
|
+
case 'A': {
|
139
|
+
h = h << 2;
|
140
|
+
h += 0;
|
141
|
+
break;
|
142
|
+
}
|
143
|
+
case 'C': {
|
144
|
+
h = h << 2;
|
145
|
+
h += 1;
|
146
|
+
break;
|
147
|
+
}
|
148
|
+
case 'G': {
|
149
|
+
h = h << 2;
|
150
|
+
h += 2;
|
151
|
+
break;
|
152
|
+
}
|
153
|
+
case 'T': {
|
154
|
+
h = h << 2;
|
155
|
+
h += 3;
|
156
|
+
break;
|
157
|
+
}
|
158
|
+
default: {
|
159
|
+
n++;
|
160
|
+
break;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
if (n==0) {
|
165
|
+
set[h] += 1;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
i = 0; // count how many in array are set //
|
169
|
+
for(start = 0; start < size; start++) {
|
170
|
+
if (set[start]>0) {
|
171
|
+
i++;
|
172
|
+
}
|
173
|
+
}
|
174
|
+
return INT2NUM(i);
|
175
|
+
}
|
176
|
+
|
177
|
+
// takes in a string and calculates the longest open reading frame
|
178
|
+
// in any of the 6 frames
|
179
|
+
// an open reading frame is defined as the number of bases between
|
180
|
+
// either the start of the sequence or a stop codon and either the
|
181
|
+
// end of the sequence or a stop codon
|
182
|
+
VALUE method_longest_orf(VALUE self, VALUE _s) {
|
183
|
+
int i,sl,longest=0;
|
184
|
+
int len[6];
|
185
|
+
char * c_str;
|
186
|
+
|
187
|
+
sl = RSTRING_LEN(_s);
|
188
|
+
c_str = StringValueCStr(_s);
|
189
|
+
for (i=0;i<6;i++) {
|
190
|
+
len[i]=0;
|
191
|
+
}
|
192
|
+
for (i=0;i<sl-2;i++) {
|
193
|
+
if (c_str[i]=='T' &&
|
194
|
+
((c_str[i+1]=='A' && c_str[i+2]=='G') ||
|
195
|
+
(c_str[i+1]=='A' && c_str[i+2]=='A') ||
|
196
|
+
(c_str[i+1]=='G' && c_str[i+2]=='A'))) {
|
197
|
+
if (len[i%3] > longest) {
|
198
|
+
longest = len[i%3];
|
199
|
+
}
|
200
|
+
len[i%3]=0;
|
201
|
+
} else {
|
202
|
+
len[i%3]++;
|
203
|
+
}
|
204
|
+
if (c_str[i+2]=='A' &&
|
205
|
+
((c_str[i]=='C' && c_str[i+1]=='T') ||
|
206
|
+
(c_str[i]=='T' && c_str[i+1]=='T') ||
|
207
|
+
(c_str[i]=='T' && c_str[i+1]=='C'))) {
|
208
|
+
if (len[3+i%3] > longest) {
|
209
|
+
longest = len[3+i%3];
|
210
|
+
}
|
211
|
+
len[3+i%3]=0;
|
212
|
+
} else {
|
213
|
+
len[3+i%3]++;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
if (len[i%3] > longest) {
|
217
|
+
longest = len[i%3];
|
218
|
+
}
|
219
|
+
if (len[3+i%3] > longest) {
|
220
|
+
longest = len[3+i%3];
|
221
|
+
}
|
222
|
+
return INT2NUM(longest);
|
223
|
+
}
|