ms-fasta 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History ADDED
@@ -0,0 +1,4 @@
1
+ == 0.1.0 / 2009-03-24
2
+
3
+ Initial release with tasks to load entries
4
+ from a FASTA file.
data/MIT-LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2009, Regents of the University of Colorado.
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ = {Ms-Fasta}[http://mspire.rubyforge.org/projects/ms-fasta]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library for working with {FASTA}[http://en.wikipedia.org/wiki/Fasta_format] files.
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/bahuvrihi/ms-fasta/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Fasta is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-fasta
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2009, Regents of the University of Colorado.
20
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
21
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
22
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,21 @@
1
+ require 'external'
2
+ require 'ms/fasta/entry'
3
+
4
+ module Ms
5
+ module Fasta
6
+ # Provides access to a FASTA file as an ExternalArchive.
7
+ class Archive < ExternalArchive
8
+
9
+ # Reindexes self to each FASTA entry in io
10
+ def reindex(&block)
11
+ reindex_by_sep(nil, :sep_regexp => /\n>/, :sep_length => 1, :entry_follows_sep => true, &block)
12
+ end
13
+
14
+ # Returns a Fasta::Entry initialized using str
15
+ def str_to_entry(str)
16
+ Entry.parse(str)
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,75 @@
1
+ module Ms
2
+ module Fasta
3
+
4
+ # Entry represents a FASTA formatted entry.
5
+ #
6
+ # entry = Entry.parse %q{
7
+ # >gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
8
+ # LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
9
+ # EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
10
+ # LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
11
+ # GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
12
+ # IENY
13
+ # }.strip
14
+ #
15
+ # entry.header[0,30] # => 'gi|5524211|gb|AAD44166.1| cyto'
16
+ # entry.sequence[0,30] # => 'LCLYTHIGRNIYYGSYLYSETWNTGIMLLL'
17
+ #
18
+ class Entry
19
+ class << self
20
+
21
+ # Parses the entry string into a Fasta::Entry. The entry string must
22
+ # be well-formatted, ie begin with '>'.
23
+ def parse(str)
24
+ unless str[0] == ?>
25
+ raise "input should begin with '>'"
26
+ end
27
+
28
+ seq_begin = str.index("\n")
29
+ Entry.new(str[1, seq_begin-1], str[seq_begin, str.length - seq_begin].gsub(/\r?\n/, ""))
30
+ end
31
+ end
32
+
33
+ # The header for self
34
+ attr_accessor :header
35
+
36
+ # The sequence of self
37
+ attr_accessor :sequence
38
+
39
+ def initialize(header="", sequence="")
40
+ @header = header
41
+ @sequence = sequence
42
+ end
43
+
44
+ # Returns the header and sequence formated into lines of line_length
45
+ # or less. The '>' delimiter is added to the header line.
46
+ def lines(line_length=70)
47
+ raise ArgumentError.new("line length must be greater than 0") unless line_length > 0
48
+
49
+ lines = [">#{header}"]
50
+ current = 0
51
+ while current < sequence.length
52
+ lines << sequence[current, line_length]
53
+ current += line_length
54
+ end
55
+
56
+ lines
57
+ end
58
+
59
+ # Formats and dumps self to the target. Use the options to modify the output:
60
+ #
61
+ # line_length:: the length of each output line (default 70)
62
+ def dump(target="", options={})
63
+ line_length = options.has_key?(:line_length) ? options[:line_length ] : 70
64
+ target << self.lines(line_length).join("\n")
65
+ target << "\n"
66
+ target
67
+ end
68
+
69
+ # Returns self formatted as a string
70
+ def to_s
71
+ dump
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,29 @@
1
+ require 'ms/fasta/archive'
2
+
3
+ module Ms
4
+ module Load
5
+ # :startdoc::manifest loads entries in a fasta file
6
+ #
7
+ # Loads entries from a fasta file. Entries are returned as an array and
8
+ # by default as Ms::Fasta::Entry objects.
9
+ #
10
+ class Fasta < Tap::Task
11
+
12
+ config :range, 0..10, &c.range # the range of entries to select
13
+ config :fasta, false, &c.switch # returns entries as fasta strings
14
+
15
+ def process(fasta_file)
16
+ Ms::Fasta::Archive.open(fasta_file) do |archive|
17
+ entries = archive[range]
18
+
19
+ # totally wasteful... ExternalArchive needs
20
+ # a way to read a selection of string without
21
+ # conversion to entries.
22
+ # watch (http://bahuvrihi.lighthouseapp.com/projects/10590-external/tickets/7-for-strings)
23
+ entries.collect! {|entry| entry.to_s } if fasta
24
+ entries
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ require 'ms/fasta/archive'
2
+
3
+ module Ms
4
+ module Load
5
+ # :startdoc::manifest selects n random fasta entries
6
+ #
7
+ # Selects random fasta entries from a fasta file. Entries are returned as
8
+ # an array, and by default as Ms::Fasta::Entry objects.
9
+ #
10
+ class RandomFasta < Tap::Task
11
+
12
+ config :n, 1, &c.integer # the number of fasta to select
13
+ config :fasta, false, &c.switch # returns entries as fasta strings
14
+ config :distinct, true, &c.switch # requires entries to be unique by sequence
15
+
16
+ def process(fasta_file)
17
+ entries = []
18
+
19
+ log :index, fasta_file unless File.exist?("#{fasta_file}.index")
20
+ Ms::Fasta::Archive.open(fasta_file) do |archive|
21
+ total_entries = archive.length
22
+ log :select, "#{n} entries"
23
+
24
+ # pick entries, filtering by sequence
25
+ while entries.length < n
26
+ entry = archive[rand(total_entries)]
27
+ next if distinct && entries.find {|existing| existing.sequence == entry.sequence }
28
+
29
+ entries << entry
30
+ end
31
+ end
32
+
33
+ entries.collect! do |entry|
34
+ entry.to_s
35
+ end if fasta
36
+
37
+ entries
38
+ end
39
+ end
40
+ end
41
+ end
data/tap.yml ADDED
File without changes
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-fasta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-24 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.12.4
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: external
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.0
34
+ version:
35
+ description:
36
+ email: simon.a.chiang@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - History
43
+ - README
44
+ - MIT-LICENSE
45
+ files:
46
+ - lib/ms/fasta/archive.rb
47
+ - lib/ms/fasta/entry.rb
48
+ - lib/ms/load/fasta.rb
49
+ - lib/ms/load/random_fasta.rb
50
+ - tap.yml
51
+ - History
52
+ - README
53
+ - MIT-LICENSE
54
+ has_rdoc: true
55
+ homepage: http://mspire.rubyforge.org/projects/ms-fasta
56
+ post_install_message:
57
+ rdoc_options:
58
+ - --main
59
+ - README
60
+ - -S
61
+ - -N
62
+ - --title
63
+ - Ms-Fasta
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: "0"
77
+ version:
78
+ requirements: []
79
+
80
+ rubyforge_project: mspire
81
+ rubygems_version: 1.3.1
82
+ signing_key:
83
+ specification_version: 2
84
+ summary: ms-fasta
85
+ test_files: []
86
+