ms-fasta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +4 -0
- data/MIT-LICENSE +22 -0
- data/README +22 -0
- data/lib/ms/fasta/archive.rb +21 -0
- data/lib/ms/fasta/entry.rb +75 -0
- data/lib/ms/load/fasta.rb +29 -0
- data/lib/ms/load/random_fasta.rb +41 -0
- data/tap.yml +0 -0
- metadata +86 -0
data/History
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009, Regents of the University of Colorado.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
= {Ms-Fasta}[http://mspire.rubyforge.org/projects/ms-fasta]
|
2
|
+
|
3
|
+
An {Mspire}[http://mspire.rubyforge.org] library for working with {FASTA}[http://en.wikipedia.org/wiki/Fasta_format] files.
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
|
8
|
+
* Github[http://github.com/bahuvrihi/ms-fasta/tree/master]
|
9
|
+
* {Google Group}[http://groups.google.com/group/mspire-forum]
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
Ms-Fasta is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
|
14
|
+
|
15
|
+
% gem install ms-fasta
|
16
|
+
|
17
|
+
== Info
|
18
|
+
|
19
|
+
Copyright (c) 2009, Regents of the University of Colorado.
|
20
|
+
Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
|
21
|
+
Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
22
|
+
Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'external'
|
2
|
+
require 'ms/fasta/entry'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Fasta
|
6
|
+
# Provides access to a FASTA file as an ExternalArchive.
|
7
|
+
class Archive < ExternalArchive
|
8
|
+
|
9
|
+
# Reindexes self to each FASTA entry in io
|
10
|
+
def reindex(&block)
|
11
|
+
reindex_by_sep(nil, :sep_regexp => /\n>/, :sep_length => 1, :entry_follows_sep => true, &block)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns a Fasta::Entry initialized using str
|
15
|
+
def str_to_entry(str)
|
16
|
+
Entry.parse(str)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Ms
|
2
|
+
module Fasta
|
3
|
+
|
4
|
+
# Entry represents a FASTA formatted entry.
|
5
|
+
#
|
6
|
+
# entry = Entry.parse %q{
|
7
|
+
# >gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
|
8
|
+
# LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
|
9
|
+
# EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
|
10
|
+
# LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
|
11
|
+
# GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
|
12
|
+
# IENY
|
13
|
+
# }.strip
|
14
|
+
#
|
15
|
+
# entry.header[0,30] # => 'gi|5524211|gb|AAD44166.1| cyto'
|
16
|
+
# entry.sequence[0,30] # => 'LCLYTHIGRNIYYGSYLYSETWNTGIMLLL'
|
17
|
+
#
|
18
|
+
class Entry
|
19
|
+
class << self
|
20
|
+
|
21
|
+
# Parses the entry string into a Fasta::Entry. The entry string must
|
22
|
+
# be well-formatted, ie begin with '>'.
|
23
|
+
def parse(str)
|
24
|
+
unless str[0] == ?>
|
25
|
+
raise "input should begin with '>'"
|
26
|
+
end
|
27
|
+
|
28
|
+
seq_begin = str.index("\n")
|
29
|
+
Entry.new(str[1, seq_begin-1], str[seq_begin, str.length - seq_begin].gsub(/\r?\n/, ""))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# The header for self
|
34
|
+
attr_accessor :header
|
35
|
+
|
36
|
+
# The sequence of self
|
37
|
+
attr_accessor :sequence
|
38
|
+
|
39
|
+
def initialize(header="", sequence="")
|
40
|
+
@header = header
|
41
|
+
@sequence = sequence
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the header and sequence formated into lines of line_length
|
45
|
+
# or less. The '>' delimiter is added to the header line.
|
46
|
+
def lines(line_length=70)
|
47
|
+
raise ArgumentError.new("line length must be greater than 0") unless line_length > 0
|
48
|
+
|
49
|
+
lines = [">#{header}"]
|
50
|
+
current = 0
|
51
|
+
while current < sequence.length
|
52
|
+
lines << sequence[current, line_length]
|
53
|
+
current += line_length
|
54
|
+
end
|
55
|
+
|
56
|
+
lines
|
57
|
+
end
|
58
|
+
|
59
|
+
# Formats and dumps self to the target. Use the options to modify the output:
|
60
|
+
#
|
61
|
+
# line_length:: the length of each output line (default 70)
|
62
|
+
def dump(target="", options={})
|
63
|
+
line_length = options.has_key?(:line_length) ? options[:line_length ] : 70
|
64
|
+
target << self.lines(line_length).join("\n")
|
65
|
+
target << "\n"
|
66
|
+
target
|
67
|
+
end
|
68
|
+
|
69
|
+
# Returns self formatted as a string
|
70
|
+
def to_s
|
71
|
+
dump
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'ms/fasta/archive'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Load
|
5
|
+
# :startdoc::manifest loads entries in a fasta file
|
6
|
+
#
|
7
|
+
# Loads entries from a fasta file. Entries are returned as an array and
|
8
|
+
# by default as Ms::Fasta::Entry objects.
|
9
|
+
#
|
10
|
+
class Fasta < Tap::Task
|
11
|
+
|
12
|
+
config :range, 0..10, &c.range # the range of entries to select
|
13
|
+
config :fasta, false, &c.switch # returns entries as fasta strings
|
14
|
+
|
15
|
+
def process(fasta_file)
|
16
|
+
Ms::Fasta::Archive.open(fasta_file) do |archive|
|
17
|
+
entries = archive[range]
|
18
|
+
|
19
|
+
# totally wasteful... ExternalArchive needs
|
20
|
+
# a way to read a selection of string without
|
21
|
+
# conversion to entries.
|
22
|
+
# watch (http://bahuvrihi.lighthouseapp.com/projects/10590-external/tickets/7-for-strings)
|
23
|
+
entries.collect! {|entry| entry.to_s } if fasta
|
24
|
+
entries
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'ms/fasta/archive'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Load
|
5
|
+
# :startdoc::manifest selects n random fasta entries
|
6
|
+
#
|
7
|
+
# Selects random fasta entries from a fasta file. Entries are returned as
|
8
|
+
# an array, and by default as Ms::Fasta::Entry objects.
|
9
|
+
#
|
10
|
+
class RandomFasta < Tap::Task
|
11
|
+
|
12
|
+
config :n, 1, &c.integer # the number of fasta to select
|
13
|
+
config :fasta, false, &c.switch # returns entries as fasta strings
|
14
|
+
config :distinct, true, &c.switch # requires entries to be unique by sequence
|
15
|
+
|
16
|
+
def process(fasta_file)
|
17
|
+
entries = []
|
18
|
+
|
19
|
+
log :index, fasta_file unless File.exist?("#{fasta_file}.index")
|
20
|
+
Ms::Fasta::Archive.open(fasta_file) do |archive|
|
21
|
+
total_entries = archive.length
|
22
|
+
log :select, "#{n} entries"
|
23
|
+
|
24
|
+
# pick entries, filtering by sequence
|
25
|
+
while entries.length < n
|
26
|
+
entry = archive[rand(total_entries)]
|
27
|
+
next if distinct && entries.find {|existing| existing.sequence == entry.sequence }
|
28
|
+
|
29
|
+
entries << entry
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
entries.collect! do |entry|
|
34
|
+
entry.to_s
|
35
|
+
end if fasta
|
36
|
+
|
37
|
+
entries
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/tap.yml
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ms-fasta
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Simon Chiang
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-24 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: tap
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.12.4
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: external
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.3.0
|
34
|
+
version:
|
35
|
+
description:
|
36
|
+
email: simon.a.chiang@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- History
|
43
|
+
- README
|
44
|
+
- MIT-LICENSE
|
45
|
+
files:
|
46
|
+
- lib/ms/fasta/archive.rb
|
47
|
+
- lib/ms/fasta/entry.rb
|
48
|
+
- lib/ms/load/fasta.rb
|
49
|
+
- lib/ms/load/random_fasta.rb
|
50
|
+
- tap.yml
|
51
|
+
- History
|
52
|
+
- README
|
53
|
+
- MIT-LICENSE
|
54
|
+
has_rdoc: true
|
55
|
+
homepage: http://mspire.rubyforge.org/projects/ms-fasta
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options:
|
58
|
+
- --main
|
59
|
+
- README
|
60
|
+
- -S
|
61
|
+
- -N
|
62
|
+
- --title
|
63
|
+
- Ms-Fasta
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project: mspire
|
81
|
+
rubygems_version: 1.3.1
|
82
|
+
signing_key:
|
83
|
+
specification_version: 2
|
84
|
+
summary: ms-fasta
|
85
|
+
test_files: []
|
86
|
+
|