ms-fasta 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/History ADDED
@@ -0,0 +1,4 @@
1
+ == 0.1.0 / 2009-03-24
2
+
3
+ Initial release with tasks to load entries
4
+ from a FASTA file.
data/MIT-LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2009, Regents of the University of Colorado.
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ = {Ms-Fasta}[http://mspire.rubyforge.org/projects/ms-fasta]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library for working with {FASTA}[http://en.wikipedia.org/wiki/Fasta_format] files.
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/bahuvrihi/ms-fasta/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Fasta is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-fasta
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2009, Regents of the University of Colorado.
20
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
21
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
22
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,21 @@
1
+ require 'external'
2
+ require 'ms/fasta/entry'
3
+
4
+ module Ms
5
+ module Fasta
6
+ # Provides access to a FASTA file as an ExternalArchive.
7
+ class Archive < ExternalArchive
8
+
9
+ # Reindexes self to each FASTA entry in io
10
+ def reindex(&block)
11
+ reindex_by_sep(nil, :sep_regexp => /\n>/, :sep_length => 1, :entry_follows_sep => true, &block)
12
+ end
13
+
14
+ # Returns a Fasta::Entry initialized using str
15
+ def str_to_entry(str)
16
+ Entry.parse(str)
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,75 @@
1
+ module Ms
2
+ module Fasta
3
+
4
+ # Entry represents a FASTA formatted entry.
5
+ #
6
+ # entry = Entry.parse %q{
7
+ # >gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
8
+ # LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
9
+ # EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
10
+ # LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
11
+ # GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
12
+ # IENY
13
+ # }.strip
14
+ #
15
+ # entry.header[0,30] # => 'gi|5524211|gb|AAD44166.1| cyto'
16
+ # entry.sequence[0,30] # => 'LCLYTHIGRNIYYGSYLYSETWNTGIMLLL'
17
+ #
18
+ class Entry
19
+ class << self
20
+
21
+ # Parses the entry string into a Fasta::Entry. The entry string must
22
+ # be well-formatted, ie begin with '>'.
23
+ def parse(str)
24
+ unless str[0] == ?>
25
+ raise "input should begin with '>'"
26
+ end
27
+
28
+ seq_begin = str.index("\n")
29
+ Entry.new(str[1, seq_begin-1], str[seq_begin, str.length - seq_begin].gsub(/\r?\n/, ""))
30
+ end
31
+ end
32
+
33
+ # The header for self
34
+ attr_accessor :header
35
+
36
+ # The sequence of self
37
+ attr_accessor :sequence
38
+
39
+ def initialize(header="", sequence="")
40
+ @header = header
41
+ @sequence = sequence
42
+ end
43
+
44
+ # Returns the header and sequence formated into lines of line_length
45
+ # or less. The '>' delimiter is added to the header line.
46
+ def lines(line_length=70)
47
+ raise ArgumentError.new("line length must be greater than 0") unless line_length > 0
48
+
49
+ lines = [">#{header}"]
50
+ current = 0
51
+ while current < sequence.length
52
+ lines << sequence[current, line_length]
53
+ current += line_length
54
+ end
55
+
56
+ lines
57
+ end
58
+
59
+ # Formats and dumps self to the target. Use the options to modify the output:
60
+ #
61
+ # line_length:: the length of each output line (default 70)
62
+ def dump(target="", options={})
63
+ line_length = options.has_key?(:line_length) ? options[:line_length ] : 70
64
+ target << self.lines(line_length).join("\n")
65
+ target << "\n"
66
+ target
67
+ end
68
+
69
+ # Returns self formatted as a string
70
+ def to_s
71
+ dump
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,29 @@
1
+ require 'ms/fasta/archive'
2
+
3
+ module Ms
4
+ module Load
5
+ # :startdoc::manifest loads entries in a fasta file
6
+ #
7
+ # Loads entries from a fasta file. Entries are returned as an array and
8
+ # by default as Ms::Fasta::Entry objects.
9
+ #
10
+ class Fasta < Tap::Task
11
+
12
+ config :range, 0..10, &c.range # the range of entries to select
13
+ config :fasta, false, &c.switch # returns entries as fasta strings
14
+
15
+ def process(fasta_file)
16
+ Ms::Fasta::Archive.open(fasta_file) do |archive|
17
+ entries = archive[range]
18
+
19
+ # totally wasteful... ExternalArchive needs
20
+ # a way to read a selection of string without
21
+ # conversion to entries.
22
+ # watch (http://bahuvrihi.lighthouseapp.com/projects/10590-external/tickets/7-for-strings)
23
+ entries.collect! {|entry| entry.to_s } if fasta
24
+ entries
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ require 'ms/fasta/archive'
2
+
3
+ module Ms
4
+ module Load
5
+ # :startdoc::manifest selects n random fasta entries
6
+ #
7
+ # Selects random fasta entries from a fasta file. Entries are returned as
8
+ # an array, and by default as Ms::Fasta::Entry objects.
9
+ #
10
+ class RandomFasta < Tap::Task
11
+
12
+ config :n, 1, &c.integer # the number of fasta to select
13
+ config :fasta, false, &c.switch # returns entries as fasta strings
14
+ config :distinct, true, &c.switch # requires entries to be unique by sequence
15
+
16
+ def process(fasta_file)
17
+ entries = []
18
+
19
+ log :index, fasta_file unless File.exist?("#{fasta_file}.index")
20
+ Ms::Fasta::Archive.open(fasta_file) do |archive|
21
+ total_entries = archive.length
22
+ log :select, "#{n} entries"
23
+
24
+ # pick entries, filtering by sequence
25
+ while entries.length < n
26
+ entry = archive[rand(total_entries)]
27
+ next if distinct && entries.find {|existing| existing.sequence == entry.sequence }
28
+
29
+ entries << entry
30
+ end
31
+ end
32
+
33
+ entries.collect! do |entry|
34
+ entry.to_s
35
+ end if fasta
36
+
37
+ entries
38
+ end
39
+ end
40
+ end
41
+ end
data/tap.yml ADDED
File without changes
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-fasta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-24 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.12.4
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: external
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.0
34
+ version:
35
+ description:
36
+ email: simon.a.chiang@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - History
43
+ - README
44
+ - MIT-LICENSE
45
+ files:
46
+ - lib/ms/fasta/archive.rb
47
+ - lib/ms/fasta/entry.rb
48
+ - lib/ms/load/fasta.rb
49
+ - lib/ms/load/random_fasta.rb
50
+ - tap.yml
51
+ - History
52
+ - README
53
+ - MIT-LICENSE
54
+ has_rdoc: true
55
+ homepage: http://mspire.rubyforge.org/projects/ms-fasta
56
+ post_install_message:
57
+ rdoc_options:
58
+ - --main
59
+ - README
60
+ - -S
61
+ - -N
62
+ - --title
63
+ - Ms-Fasta
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: "0"
77
+ version:
78
+ requirements: []
79
+
80
+ rubyforge_project: mspire
81
+ rubygems_version: 1.3.1
82
+ signing_key:
83
+ specification_version: 2
84
+ summary: ms-fasta
85
+ test_files: []
86
+