ms-fasta 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +4 -0
- data/MIT-LICENSE +22 -0
- data/README +22 -0
- data/lib/ms/fasta/archive.rb +21 -0
- data/lib/ms/fasta/entry.rb +75 -0
- data/lib/ms/load/fasta.rb +29 -0
- data/lib/ms/load/random_fasta.rb +41 -0
- data/tap.yml +0 -0
- metadata +86 -0
data/History
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009, Regents of the University of Colorado.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
= {Ms-Fasta}[http://mspire.rubyforge.org/projects/ms-fasta]
|
2
|
+
|
3
|
+
An {Mspire}[http://mspire.rubyforge.org] library for working with {FASTA}[http://en.wikipedia.org/wiki/Fasta_format] files.
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
|
8
|
+
* Github[http://github.com/bahuvrihi/ms-fasta/tree/master]
|
9
|
+
* {Google Group}[http://groups.google.com/group/mspire-forum]
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
Ms-Fasta is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
|
14
|
+
|
15
|
+
% gem install ms-fasta
|
16
|
+
|
17
|
+
== Info
|
18
|
+
|
19
|
+
Copyright (c) 2009, Regents of the University of Colorado.
|
20
|
+
Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
|
21
|
+
Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
22
|
+
Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'external'
|
2
|
+
require 'ms/fasta/entry'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Fasta
|
6
|
+
# Provides access to a FASTA file as an ExternalArchive.
|
7
|
+
class Archive < ExternalArchive
|
8
|
+
|
9
|
+
# Reindexes self to each FASTA entry in io
|
10
|
+
def reindex(&block)
|
11
|
+
reindex_by_sep(nil, :sep_regexp => /\n>/, :sep_length => 1, :entry_follows_sep => true, &block)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns a Fasta::Entry initialized using str
|
15
|
+
def str_to_entry(str)
|
16
|
+
Entry.parse(str)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Ms
|
2
|
+
module Fasta
|
3
|
+
|
4
|
+
# Entry represents a FASTA formatted entry.
|
5
|
+
#
|
6
|
+
# entry = Entry.parse %q{
|
7
|
+
# >gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
|
8
|
+
# LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
|
9
|
+
# EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
|
10
|
+
# LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
|
11
|
+
# GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
|
12
|
+
# IENY
|
13
|
+
# }.strip
|
14
|
+
#
|
15
|
+
# entry.header[0,30] # => 'gi|5524211|gb|AAD44166.1| cyto'
|
16
|
+
# entry.sequence[0,30] # => 'LCLYTHIGRNIYYGSYLYSETWNTGIMLLL'
|
17
|
+
#
|
18
|
+
class Entry
|
19
|
+
class << self
|
20
|
+
|
21
|
+
# Parses the entry string into a Fasta::Entry. The entry string must
|
22
|
+
# be well-formatted, ie begin with '>'.
|
23
|
+
def parse(str)
|
24
|
+
unless str[0] == ?>
|
25
|
+
raise "input should begin with '>'"
|
26
|
+
end
|
27
|
+
|
28
|
+
seq_begin = str.index("\n")
|
29
|
+
Entry.new(str[1, seq_begin-1], str[seq_begin, str.length - seq_begin].gsub(/\r?\n/, ""))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# The header for self
|
34
|
+
attr_accessor :header
|
35
|
+
|
36
|
+
# The sequence of self
|
37
|
+
attr_accessor :sequence
|
38
|
+
|
39
|
+
def initialize(header="", sequence="")
|
40
|
+
@header = header
|
41
|
+
@sequence = sequence
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the header and sequence formated into lines of line_length
|
45
|
+
# or less. The '>' delimiter is added to the header line.
|
46
|
+
def lines(line_length=70)
|
47
|
+
raise ArgumentError.new("line length must be greater than 0") unless line_length > 0
|
48
|
+
|
49
|
+
lines = [">#{header}"]
|
50
|
+
current = 0
|
51
|
+
while current < sequence.length
|
52
|
+
lines << sequence[current, line_length]
|
53
|
+
current += line_length
|
54
|
+
end
|
55
|
+
|
56
|
+
lines
|
57
|
+
end
|
58
|
+
|
59
|
+
# Formats and dumps self to the target. Use the options to modify the output:
|
60
|
+
#
|
61
|
+
# line_length:: the length of each output line (default 70)
|
62
|
+
def dump(target="", options={})
|
63
|
+
line_length = options.has_key?(:line_length) ? options[:line_length ] : 70
|
64
|
+
target << self.lines(line_length).join("\n")
|
65
|
+
target << "\n"
|
66
|
+
target
|
67
|
+
end
|
68
|
+
|
69
|
+
# Returns self formatted as a string
|
70
|
+
def to_s
|
71
|
+
dump
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'ms/fasta/archive'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Load
|
5
|
+
# :startdoc::manifest loads entries in a fasta file
|
6
|
+
#
|
7
|
+
# Loads entries from a fasta file. Entries are returned as an array and
|
8
|
+
# by default as Ms::Fasta::Entry objects.
|
9
|
+
#
|
10
|
+
class Fasta < Tap::Task
|
11
|
+
|
12
|
+
config :range, 0..10, &c.range # the range of entries to select
|
13
|
+
config :fasta, false, &c.switch # returns entries as fasta strings
|
14
|
+
|
15
|
+
def process(fasta_file)
|
16
|
+
Ms::Fasta::Archive.open(fasta_file) do |archive|
|
17
|
+
entries = archive[range]
|
18
|
+
|
19
|
+
# totally wasteful... ExternalArchive needs
|
20
|
+
# a way to read a selection of string without
|
21
|
+
# conversion to entries.
|
22
|
+
# watch (http://bahuvrihi.lighthouseapp.com/projects/10590-external/tickets/7-for-strings)
|
23
|
+
entries.collect! {|entry| entry.to_s } if fasta
|
24
|
+
entries
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'ms/fasta/archive'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Load
|
5
|
+
# :startdoc::manifest selects n random fasta entries
|
6
|
+
#
|
7
|
+
# Selects random fasta entries from a fasta file. Entries are returned as
|
8
|
+
# an array, and by default as Ms::Fasta::Entry objects.
|
9
|
+
#
|
10
|
+
class RandomFasta < Tap::Task
|
11
|
+
|
12
|
+
config :n, 1, &c.integer # the number of fasta to select
|
13
|
+
config :fasta, false, &c.switch # returns entries as fasta strings
|
14
|
+
config :distinct, true, &c.switch # requires entries to be unique by sequence
|
15
|
+
|
16
|
+
def process(fasta_file)
|
17
|
+
entries = []
|
18
|
+
|
19
|
+
log :index, fasta_file unless File.exist?("#{fasta_file}.index")
|
20
|
+
Ms::Fasta::Archive.open(fasta_file) do |archive|
|
21
|
+
total_entries = archive.length
|
22
|
+
log :select, "#{n} entries"
|
23
|
+
|
24
|
+
# pick entries, filtering by sequence
|
25
|
+
while entries.length < n
|
26
|
+
entry = archive[rand(total_entries)]
|
27
|
+
next if distinct && entries.find {|existing| existing.sequence == entry.sequence }
|
28
|
+
|
29
|
+
entries << entry
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
entries.collect! do |entry|
|
34
|
+
entry.to_s
|
35
|
+
end if fasta
|
36
|
+
|
37
|
+
entries
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/tap.yml
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ms-fasta
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Simon Chiang
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-24 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: tap
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.12.4
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: external
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.3.0
|
34
|
+
version:
|
35
|
+
description:
|
36
|
+
email: simon.a.chiang@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- History
|
43
|
+
- README
|
44
|
+
- MIT-LICENSE
|
45
|
+
files:
|
46
|
+
- lib/ms/fasta/archive.rb
|
47
|
+
- lib/ms/fasta/entry.rb
|
48
|
+
- lib/ms/load/fasta.rb
|
49
|
+
- lib/ms/load/random_fasta.rb
|
50
|
+
- tap.yml
|
51
|
+
- History
|
52
|
+
- README
|
53
|
+
- MIT-LICENSE
|
54
|
+
has_rdoc: true
|
55
|
+
homepage: http://mspire.rubyforge.org/projects/ms-fasta
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options:
|
58
|
+
- --main
|
59
|
+
- README
|
60
|
+
- -S
|
61
|
+
- -N
|
62
|
+
- --title
|
63
|
+
- Ms-Fasta
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project: mspire
|
81
|
+
rubygems_version: 1.3.1
|
82
|
+
signing_key:
|
83
|
+
specification_version: 2
|
84
|
+
summary: ms-fasta
|
85
|
+
test_files: []
|
86
|
+
|