bio-bigbio 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +96 -0
- data/VERSION +1 -1
- data/bio-bigbio.gemspec +4 -4
- data/lib/bigbio/db/fasta/fastawriter.rb +8 -3
- data/lib/bigbio/sequence/predictorf.rb +1 -1
- metadata +17 -17
- data/README.rdoc +0 -28
data/README.md
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# BIGBIO
|
2
|
+
|
3
|
+
BigBio = BIG DATA for Ruby
|
4
|
+
|
5
|
+
BigBio is an initiative to a create high performance libraries for big data
|
6
|
+
computing in biology.
|
7
|
+
|
8
|
+
BigBio may use BioLib C/C++/D functions for increasing performance and
|
9
|
+
reducing memory consumption.
|
10
|
+
|
11
|
+
This is an experimental project. If you wish to contribute subscribe
|
12
|
+
to the BioRuby and/or BioLib mailing lists.
|
13
|
+
|
14
|
+
# Overview
|
15
|
+
|
16
|
+
* BigBio can translate nucleotide sequences to amino acid
|
17
|
+
sequences using an EMBOSS C function, or BioRuby's translator.
|
18
|
+
* BigBio has an ORF emitter which parses DNA/RNA sequences and emits
|
19
|
+
ORFs between START_STOP or STOP_STOP codons.
|
20
|
+
* BigBio has a FASTA file emitter, with iterates FASTA files and
|
21
|
+
iterates sequences without loading everything in memory.
|
22
|
+
|
23
|
+
# Examples
|
24
|
+
|
25
|
+
## Iterate through a FASTA file
|
26
|
+
|
27
|
+
Read a file without loading the whole thing in memory
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
fasta = FastaReader.new(fn)
|
31
|
+
fasta.each do | rec |
|
32
|
+
print rec.descr,rec.seq
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
36
|
+
Write a FASTA file. Any object can be passed in, as long
|
37
|
+
as it responds to 'descr' and 'seq.to_s', or 'id' and 'seq.to_s'. E.g.
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
class StorageObject
|
41
|
+
attr_accessor :descr, :seq
|
42
|
+
end
|
43
|
+
|
44
|
+
mysequence = StorageObject.new
|
45
|
+
mysequence.descr = 'Test'
|
46
|
+
mysequence.seq = "agtcta"
|
47
|
+
```
|
48
|
+
|
49
|
+
now we can write a FASTA file
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
fasta = FastaWriter.new(fn)
|
53
|
+
fasta.write(mysequence)
|
54
|
+
```
|
55
|
+
|
56
|
+
## Fetch ORFs from a sequence
|
57
|
+
|
58
|
+
BigBio can parse a sequence for ORFs. Together with the FastaReader
|
59
|
+
little memory gets used
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
predictorf = PredictORF.new(id,descr,"ATCATTAGCAACACCAGCTTCCTCTCTCTCGCTTCAAAGTTCACTACTCGTGGATCTCGT")
|
63
|
+
# get all ORFs between start and stop codons, longer than 30 bps
|
64
|
+
orfs = predictorf.startstop(30)
|
65
|
+
# get all sequences between stop codons
|
66
|
+
seqs = predictorf.stopstop(0)
|
67
|
+
```
|
68
|
+
|
69
|
+
## Rapid DNA/RNA to amino acid translation
|
70
|
+
|
71
|
+
Translate with EMBOSS C library, if linked, otherwise use BioRuby
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
trn_table = Bio::Big::TranslationAdapter.translation_table(1)
|
75
|
+
translate = Nucleotide::Translate.new(trn_table)
|
76
|
+
aa_frames = translate.aa_6_frames("ATCATTAGCAACACCAGCTTCCTCTCTCTCGCTTCAAAGTTCACTACTCGTGGATCTCGT")
|
77
|
+
```
|
78
|
+
|
79
|
+
# Install
|
80
|
+
|
81
|
+
The easy way
|
82
|
+
|
83
|
+
```sh
|
84
|
+
gem install bio-bigbio
|
85
|
+
```
|
86
|
+
|
87
|
+
in your code
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
require 'bigbio'
|
91
|
+
```
|
92
|
+
|
93
|
+
# Copyright
|
94
|
+
|
95
|
+
Copyright (c) 2011-2012 Pjotr Prins. See LICENSE for further details.
|
96
|
+
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/bio-bigbio.gemspec
CHANGED
@@ -5,23 +5,23 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-bigbio"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-02-03"
|
13
13
|
s.description = "Fasta reader, ORF emitter, sequence translation"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["getorf", "nt2aa.rb"]
|
16
16
|
s.extra_rdoc_files = [
|
17
17
|
"LICENSE",
|
18
|
-
"README.
|
18
|
+
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
21
|
"Gemfile",
|
22
22
|
"Gemfile.lock",
|
23
23
|
"LICENSE",
|
24
|
-
"README.
|
24
|
+
"README.md",
|
25
25
|
"Rakefile",
|
26
26
|
"VERSION",
|
27
27
|
"bin/getorf",
|
@@ -7,10 +7,15 @@ class FastaWriter
|
|
7
7
|
@f = File.open(fn,"w")
|
8
8
|
end
|
9
9
|
|
10
|
-
# write a FASTA item
|
10
|
+
# write a FASTA item. An itex should respond to descr and seq,
|
11
|
+
# or id and seq
|
11
12
|
def write item
|
12
|
-
|
13
|
-
|
13
|
+
if item.respond_to?(:descr)
|
14
|
+
@f.write ">"+item.descr+"\n"
|
15
|
+
else
|
16
|
+
@f.write ">"+item.id+"\n"
|
17
|
+
end
|
18
|
+
@f.write item.seq.to_s.strip+"\n"
|
14
19
|
end
|
15
20
|
|
16
21
|
def close
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-bigbio
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-02-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement: &
|
16
|
+
requirement: &13483940 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *13483940
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bio-logger
|
27
|
-
requirement: &
|
27
|
+
requirement: &13483220 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.9.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *13483220
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rspec
|
38
|
-
requirement: &
|
38
|
+
requirement: &13482380 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 2.3.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *13482380
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
|
-
requirement: &
|
49
|
+
requirement: &13467960 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.0.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *13467960
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: jeweler
|
60
|
-
requirement: &
|
60
|
+
requirement: &13466160 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.5.2
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *13466160
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rcov
|
71
|
-
requirement: &
|
71
|
+
requirement: &13465220 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *13465220
|
80
80
|
description: Fasta reader, ORF emitter, sequence translation
|
81
81
|
email: pjotr.public01@thebird.nl
|
82
82
|
executables:
|
@@ -85,12 +85,12 @@ executables:
|
|
85
85
|
extensions: []
|
86
86
|
extra_rdoc_files:
|
87
87
|
- LICENSE
|
88
|
-
- README.
|
88
|
+
- README.md
|
89
89
|
files:
|
90
90
|
- Gemfile
|
91
91
|
- Gemfile.lock
|
92
92
|
- LICENSE
|
93
|
-
- README.
|
93
|
+
- README.md
|
94
94
|
- Rakefile
|
95
95
|
- VERSION
|
96
96
|
- bin/getorf
|
@@ -139,7 +139,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
139
139
|
version: '0'
|
140
140
|
segments:
|
141
141
|
- 0
|
142
|
-
hash: -
|
142
|
+
hash: -2925433248995829761
|
143
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
144
|
none: false
|
145
145
|
requirements:
|
data/README.rdoc
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
= BIGBIO
|
2
|
-
|
3
|
-
BigBio = BIG DATA for Ruby
|
4
|
-
|
5
|
-
BigBio is an initiative to a create high performance libraries for big data
|
6
|
-
computing in biology.
|
7
|
-
|
8
|
-
BigBio may use BioLib C/C++/D functions for increasing performance and
|
9
|
-
reducing memory consumption.
|
10
|
-
|
11
|
-
This is an experimental project. If you wish to contribute subscribe
|
12
|
-
to the BioRuby and/or BioLib mailing lists.
|
13
|
-
|
14
|
-
== Functionality
|
15
|
-
|
16
|
-
* BigBio can translate nucleotide sequences to amino acid
|
17
|
-
sequences using an EMBOSS C function, or BioRuby's translator.
|
18
|
-
|
19
|
-
* BigBio has an ORF emitter which parses DNA/RNA sequences and emits
|
20
|
-
ORFs between START_STOP or STOP_STOP codons.
|
21
|
-
|
22
|
-
* BigBio has a FASTA file emitter, with iterates FASTA files and
|
23
|
-
iterates sequences without loading everything in memory.
|
24
|
-
|
25
|
-
== Copyright
|
26
|
-
|
27
|
-
Copyright (c) 2011-2012 Pjotr Prins. See LICENSE for further details.
|
28
|
-
|