bio-bigbio 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # BIGBIO
2
+
3
+ BigBio = BIG DATA for Ruby
4
+
5
+ BigBio is an initiative to a create high performance libraries for big data
6
+ computing in biology.
7
+
8
+ BigBio may use BioLib C/C++/D functions for increasing performance and
9
+ reducing memory consumption.
10
+
11
+ This is an experimental project. If you wish to contribute subscribe
12
+ to the BioRuby and/or BioLib mailing lists.
13
+
14
+ # Overview
15
+
16
+ * BigBio can translate nucleotide sequences to amino acid
17
+ sequences using an EMBOSS C function, or BioRuby's translator.
18
+ * BigBio has an ORF emitter which parses DNA/RNA sequences and emits
19
+ ORFs between START_STOP or STOP_STOP codons.
20
+ * BigBio has a FASTA file emitter, with iterates FASTA files and
21
+ iterates sequences without loading everything in memory.
22
+
23
+ # Examples
24
+
25
+ ## Iterate through a FASTA file
26
+
27
+ Read a file without loading the whole thing in memory
28
+
29
+ ```ruby
30
+ fasta = FastaReader.new(fn)
31
+ fasta.each do | rec |
32
+ print rec.descr,rec.seq
33
+ end
34
+ ```
35
+
36
+ Write a FASTA file. Any object can be passed in, as long
37
+ as it responds to 'descr' and 'seq.to_s', or 'id' and 'seq.to_s'. E.g.
38
+
39
+ ```ruby
40
+ class StorageObject
41
+ attr_accessor :descr, :seq
42
+ end
43
+
44
+ mysequence = StorageObject.new
45
+ mysequence.descr = 'Test'
46
+ mysequence.seq = "agtcta"
47
+ ```
48
+
49
+ now we can write a FASTA file
50
+
51
+ ```ruby
52
+ fasta = FastaWriter.new(fn)
53
+ fasta.write(mysequence)
54
+ ```
55
+
56
+ ## Fetch ORFs from a sequence
57
+
58
+ BigBio can parse a sequence for ORFs. Together with the FastaReader
59
+ little memory gets used
60
+
61
+ ```ruby
62
+ predictorf = PredictORF.new(id,descr,"ATCATTAGCAACACCAGCTTCCTCTCTCTCGCTTCAAAGTTCACTACTCGTGGATCTCGT")
63
+ # get all ORFs between start and stop codons, longer than 30 bps
64
+ orfs = predictorf.startstop(30)
65
+ # get all sequences between stop codons
66
+ seqs = predictorf.stopstop(0)
67
+ ```
68
+
69
+ ## Rapid DNA/RNA to amino acid translation
70
+
71
+ Translate with EMBOSS C library, if linked, otherwise use BioRuby
72
+
73
+ ```ruby
74
+ trn_table = Bio::Big::TranslationAdapter.translation_table(1)
75
+ translate = Nucleotide::Translate.new(trn_table)
76
+ aa_frames = translate.aa_6_frames("ATCATTAGCAACACCAGCTTCCTCTCTCTCGCTTCAAAGTTCACTACTCGTGGATCTCGT")
77
+ ```
78
+
79
+ # Install
80
+
81
+ The easy way
82
+
83
+ ```sh
84
+ gem install bio-bigbio
85
+ ```
86
+
87
+ in your code
88
+
89
+ ```ruby
90
+ require 'bigbio'
91
+ ```
92
+
93
+ # Copyright
94
+
95
+ Copyright (c) 2011-2012 Pjotr Prins. See LICENSE for further details.
96
+
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
data/bio-bigbio.gemspec CHANGED
@@ -5,23 +5,23 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-bigbio"
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2012-01-30"
12
+ s.date = "2012-02-03"
13
13
  s.description = "Fasta reader, ORF emitter, sequence translation"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["getorf", "nt2aa.rb"]
16
16
  s.extra_rdoc_files = [
17
17
  "LICENSE",
18
- "README.rdoc"
18
+ "README.md"
19
19
  ]
20
20
  s.files = [
21
21
  "Gemfile",
22
22
  "Gemfile.lock",
23
23
  "LICENSE",
24
- "README.rdoc",
24
+ "README.md",
25
25
  "Rakefile",
26
26
  "VERSION",
27
27
  "bin/getorf",
@@ -7,10 +7,15 @@ class FastaWriter
7
7
  @f = File.open(fn,"w")
8
8
  end
9
9
 
10
- # write a FASTA item
10
+ # write a FASTA item. An itex should respond to descr and seq,
11
+ # or id and seq
11
12
  def write item
12
- @f.write ">"+item.id+' '+item.descr+"\n"
13
- @f.write item.seq.strip+"\n"
13
+ if item.respond_to?(:descr)
14
+ @f.write ">"+item.descr+"\n"
15
+ else
16
+ @f.write ">"+item.id+"\n"
17
+ end
18
+ @f.write item.seq.to_s.strip+"\n"
14
19
  end
15
20
 
16
21
  def close
@@ -81,7 +81,7 @@ class PredictORF
81
81
 
82
82
  include Bio::Big::FrameCodonHelpers
83
83
 
84
- def initialize id, descr, seq, trn_table
84
+ def initialize id, descr, seq, trn_table = nil
85
85
  @id = id
86
86
  @descr = descr
87
87
  @seq = seq.gsub(/\s/,'')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-bigbio
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-30 00:00:00.000000000Z
12
+ date: 2012-02-03 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
16
- requirement: &27003400 !ruby/object:Gem::Requirement
16
+ requirement: &13483940 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.4.1
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *27003400
24
+ version_requirements: *13483940
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bio-logger
27
- requirement: &27002560 !ruby/object:Gem::Requirement
27
+ requirement: &13483220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.9.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *27002560
35
+ version_requirements: *13483220
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &27001520 !ruby/object:Gem::Requirement
38
+ requirement: &13482380 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 2.3.0
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *27001520
46
+ version_requirements: *13482380
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
- requirement: &26974980 !ruby/object:Gem::Requirement
49
+ requirement: &13467960 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.0.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *26974980
57
+ version_requirements: *13467960
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: jeweler
60
- requirement: &26974040 !ruby/object:Gem::Requirement
60
+ requirement: &13466160 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.5.2
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *26974040
68
+ version_requirements: *13466160
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rcov
71
- requirement: &26972580 !ruby/object:Gem::Requirement
71
+ requirement: &13465220 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *26972580
79
+ version_requirements: *13465220
80
80
  description: Fasta reader, ORF emitter, sequence translation
81
81
  email: pjotr.public01@thebird.nl
82
82
  executables:
@@ -85,12 +85,12 @@ executables:
85
85
  extensions: []
86
86
  extra_rdoc_files:
87
87
  - LICENSE
88
- - README.rdoc
88
+ - README.md
89
89
  files:
90
90
  - Gemfile
91
91
  - Gemfile.lock
92
92
  - LICENSE
93
- - README.rdoc
93
+ - README.md
94
94
  - Rakefile
95
95
  - VERSION
96
96
  - bin/getorf
@@ -139,7 +139,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
139
139
  version: '0'
140
140
  segments:
141
141
  - 0
142
- hash: -1176702177880796219
142
+ hash: -2925433248995829761
143
143
  required_rubygems_version: !ruby/object:Gem::Requirement
144
144
  none: false
145
145
  requirements:
data/README.rdoc DELETED
@@ -1,28 +0,0 @@
1
- = BIGBIO
2
-
3
- BigBio = BIG DATA for Ruby
4
-
5
- BigBio is an initiative to a create high performance libraries for big data
6
- computing in biology.
7
-
8
- BigBio may use BioLib C/C++/D functions for increasing performance and
9
- reducing memory consumption.
10
-
11
- This is an experimental project. If you wish to contribute subscribe
12
- to the BioRuby and/or BioLib mailing lists.
13
-
14
- == Functionality
15
-
16
- * BigBio can translate nucleotide sequences to amino acid
17
- sequences using an EMBOSS C function, or BioRuby's translator.
18
-
19
- * BigBio has an ORF emitter which parses DNA/RNA sequences and emits
20
- ORFs between START_STOP or STOP_STOP codons.
21
-
22
- * BigBio has a FASTA file emitter, with iterates FASTA files and
23
- iterates sequences without loading everything in memory.
24
-
25
- == Copyright
26
-
27
- Copyright (c) 2011-2012 Pjotr Prins. See LICENSE for further details.
28
-