ms-core 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/VERSION +1 -1
- data/lib/ms/fasta.rb +59 -0
- data/spec/ms/fasta_spec.rb +96 -0
- metadata +29 -11
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -2,6 +2,7 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
bacon (1.1.0)
|
5
|
+
bio (1.4.1)
|
5
6
|
git (1.2.5)
|
6
7
|
jeweler (1.5.2)
|
7
8
|
bundler (~> 1.0.0)
|
@@ -16,6 +17,7 @@ PLATFORMS
|
|
16
17
|
ruby
|
17
18
|
|
18
19
|
DEPENDENCIES
|
20
|
+
bio (>= 1.4.1)
|
19
21
|
bundler (~> 1.0.0)
|
20
22
|
jeweler (~> 1.5.2)
|
21
23
|
rcov
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.11
|
data/lib/ms/fasta.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'bio'
|
2
|
+
|
3
|
+
class Bio::FlatFile
|
4
|
+
include Enumerable
|
5
|
+
end
|
6
|
+
|
7
|
+
class Bio::FastaFormat
|
8
|
+
alias_method :header, :definition
|
9
|
+
alias_method :sequence, :seq
|
10
|
+
end
|
11
|
+
|
12
|
+
module Ms
|
13
|
+
# A convenience class for working with fasta formatted sequence databases.
|
14
|
+
# the file which includes this class also includes Enumerable with
|
15
|
+
# Bio::FlatFile so you can do things like this:
|
16
|
+
#
|
17
|
+
# accessions = Ms::Fasta.open("file.fasta") do |fasta|
|
18
|
+
# fasta.map(&:accession)
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# A few aliases are added to Bio::FastaFormat
|
22
|
+
#
|
23
|
+
# entry.header == entry.definition
|
24
|
+
# entry.sequence == entry.seq
|
25
|
+
#
|
26
|
+
# Ms::Fasta.new accepts both an IO object or a String (a fasta formatted
|
27
|
+
# string itself)
|
28
|
+
#
|
29
|
+
# # taking an io object:
|
30
|
+
# File.open("file.fasta") do |io|
|
31
|
+
# fasta = Ms::Fasta.new(io)
|
32
|
+
# ... do something with it
|
33
|
+
# end
|
34
|
+
# # taking a string
|
35
|
+
# string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
36
|
+
# fasta = Ms::Fasta.new(string)
|
37
|
+
# (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
38
|
+
module Fasta
|
39
|
+
|
40
|
+
# opens the flatfile and yields a Bio::FlatFile object
|
41
|
+
def self.open(file, &block)
|
42
|
+
Bio::FlatFile.open(Bio::FastaFormat, file, &block)
|
43
|
+
end
|
44
|
+
|
45
|
+
# yields each Bio::FastaFormat object in turn
|
46
|
+
def self.foreach(file, &block)
|
47
|
+
Bio::FlatFile.open(Bio::FastaFormat, file) do |fasta|
|
48
|
+
fasta.each(&block)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# takes an IO object or a string that is the fasta data itself
|
53
|
+
def self.new(io)
|
54
|
+
io = StringIO.new(io) if io.is_a?(String)
|
55
|
+
Bio::FlatFile.new(Bio::FastaFormat, io)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/fasta'
|
4
|
+
|
5
|
+
describe 'basic fasta operations' do
|
6
|
+
before do
|
7
|
+
@headers = [">gi|5524211 [hello]", ">another B", ">again C"]
|
8
|
+
@entries = ["LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV\nGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX\nIENY", "ABCDEF\nGHIJK", "ABCD"]
|
9
|
+
@sequences = @entries.map {|v| v.gsub("\n", '') }
|
10
|
+
@data = {}
|
11
|
+
@data['newlines'] = @headers.zip(@entries).map do |header, data|
|
12
|
+
header + "\n" + data
|
13
|
+
end.join("\n")
|
14
|
+
@data['carriage_returns_and_newlines'] = @data['newlines'].gsub("\n", "\r\n")
|
15
|
+
file_key_to_filename_pairs = @data.map do |k,v|
|
16
|
+
file_key = k + '_file'
|
17
|
+
filename = k + '.tmp'
|
18
|
+
File.open(filename, 'w') {|out| out.print v }
|
19
|
+
[file_key, filename]
|
20
|
+
end
|
21
|
+
file_key_to_filename_pairs.each {|k,v| @data[k] = v }
|
22
|
+
end
|
23
|
+
|
24
|
+
after do
|
25
|
+
@data.select {|k,v| k =~ /_file$/ }.each do |k,filename|
|
26
|
+
index = filename.sub('.tmp', '.index')
|
27
|
+
[filename, index].each do |fn|
|
28
|
+
File.unlink(fn) if File.exist? fn
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def fasta_correct?(fasta)
|
34
|
+
entries = fasta.map
|
35
|
+
@headers.size.times.zip(entries) do |i,entry|
|
36
|
+
header, sequence, entry = @headers[i], @sequences[i], entry
|
37
|
+
entry.header.isnt nil
|
38
|
+
entry.sequence.isnt nil
|
39
|
+
entry.header.is header[1..-1]
|
40
|
+
entry.sequence.is sequence
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'can read a file' do
|
45
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
46
|
+
Ms::Fasta.open(@data[file]) do |fasta|
|
47
|
+
fasta_correct? fasta
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'can read an IO object' do
|
53
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
54
|
+
File.open(@data[file]) do |io|
|
55
|
+
fasta = Ms::Fasta.new(io)
|
56
|
+
fasta_correct? fasta
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'can read a string' do
|
62
|
+
%w(newlines carriage_returns_and_newlines).each do |key|
|
63
|
+
fasta = Ms::Fasta.new @data[key]
|
64
|
+
fasta_correct? fasta
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'iterates entries with foreach' do
|
69
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
70
|
+
Ms::Fasta.foreach(@data[file]) do |entry|
|
71
|
+
entry.isa Bio::FastaFormat
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'runs the documentation' do
|
77
|
+
fasta_file = @data['newlines_file']
|
78
|
+
ids = Ms::Fasta.open(fasta_file) do |fasta|
|
79
|
+
fasta.map(&:entry_id)
|
80
|
+
end
|
81
|
+
ids.is_a?(Array)
|
82
|
+
ids.enums %w(gi|5524211 another again)
|
83
|
+
|
84
|
+
# this code is already tested above
|
85
|
+
# File.open(fasta_file) do |io|
|
86
|
+
# fasta = Ms::Fasta.new(io)
|
87
|
+
# end
|
88
|
+
|
89
|
+
# taking a string
|
90
|
+
string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
91
|
+
fasta = Ms::Fasta.new(string)
|
92
|
+
(simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
93
|
+
simple.first.header.include?("simple").is true
|
94
|
+
not_simple.first.header.include?("simple").is false
|
95
|
+
end
|
96
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 11
|
9
|
+
version: 0.0.11
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John T. Prince
|
@@ -19,8 +19,23 @@ date: 2011-02-24 00:00:00 -07:00
|
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: bio
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 4
|
31
|
+
- 1
|
32
|
+
version: 1.4.1
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: spec-more
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
24
39
|
none: false
|
25
40
|
requirements:
|
26
41
|
- - ">="
|
@@ -30,10 +45,10 @@ dependencies:
|
|
30
45
|
version: "0"
|
31
46
|
type: :development
|
32
47
|
prerelease: false
|
33
|
-
version_requirements: *
|
48
|
+
version_requirements: *id002
|
34
49
|
- !ruby/object:Gem::Dependency
|
35
50
|
name: bundler
|
36
|
-
requirement: &
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
37
52
|
none: false
|
38
53
|
requirements:
|
39
54
|
- - ~>
|
@@ -45,10 +60,10 @@ dependencies:
|
|
45
60
|
version: 1.0.0
|
46
61
|
type: :development
|
47
62
|
prerelease: false
|
48
|
-
version_requirements: *
|
63
|
+
version_requirements: *id003
|
49
64
|
- !ruby/object:Gem::Dependency
|
50
65
|
name: jeweler
|
51
|
-
requirement: &
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
67
|
none: false
|
53
68
|
requirements:
|
54
69
|
- - ~>
|
@@ -60,10 +75,10 @@ dependencies:
|
|
60
75
|
version: 1.5.2
|
61
76
|
type: :development
|
62
77
|
prerelease: false
|
63
|
-
version_requirements: *
|
78
|
+
version_requirements: *id004
|
64
79
|
- !ruby/object:Gem::Dependency
|
65
80
|
name: rcov
|
66
|
-
requirement: &
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
82
|
none: false
|
68
83
|
requirements:
|
69
84
|
- - ">="
|
@@ -73,7 +88,7 @@ dependencies:
|
|
73
88
|
version: "0"
|
74
89
|
type: :development
|
75
90
|
prerelease: false
|
76
|
-
version_requirements: *
|
91
|
+
version_requirements: *id005
|
77
92
|
description: basic, shared functionality for mspire libraries
|
78
93
|
email: jtprince@gmail.com
|
79
94
|
executables: []
|
@@ -101,6 +116,7 @@ files:
|
|
101
116
|
- lib/ms/data/lazy_string.rb
|
102
117
|
- lib/ms/data/simple.rb
|
103
118
|
- lib/ms/data/transposed.rb
|
119
|
+
- lib/ms/fasta.rb
|
104
120
|
- lib/ms/format/format_error.rb
|
105
121
|
- lib/ms/id/peptide.rb
|
106
122
|
- lib/ms/id/protein.rb
|
@@ -112,6 +128,7 @@ files:
|
|
112
128
|
- lib/openany.rb
|
113
129
|
- spec/ms/calc_spec.rb
|
114
130
|
- spec/ms/data_spec.rb
|
131
|
+
- spec/ms/fasta_spec.rb
|
115
132
|
- spec/ms/mass/aa_spec.rb
|
116
133
|
- spec/ms/support/binary_search_spec.rb
|
117
134
|
- spec/spec_helper.rb
|
@@ -129,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
146
|
requirements:
|
130
147
|
- - ">="
|
131
148
|
- !ruby/object:Gem::Version
|
132
|
-
hash: -
|
149
|
+
hash: -4553226932043159578
|
133
150
|
segments:
|
134
151
|
- 0
|
135
152
|
version: "0"
|
@@ -151,6 +168,7 @@ summary: basic, shared functionality for mspire libraries
|
|
151
168
|
test_files:
|
152
169
|
- spec/ms/calc_spec.rb
|
153
170
|
- spec/ms/data_spec.rb
|
171
|
+
- spec/ms/fasta_spec.rb
|
154
172
|
- spec/ms/mass/aa_spec.rb
|
155
173
|
- spec/ms/support/binary_search_spec.rb
|
156
174
|
- spec/spec_helper.rb
|