ms-core 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/VERSION +1 -1
- data/lib/ms/fasta.rb +59 -0
- data/spec/ms/fasta_spec.rb +96 -0
- metadata +29 -11
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -2,6 +2,7 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
bacon (1.1.0)
|
5
|
+
bio (1.4.1)
|
5
6
|
git (1.2.5)
|
6
7
|
jeweler (1.5.2)
|
7
8
|
bundler (~> 1.0.0)
|
@@ -16,6 +17,7 @@ PLATFORMS
|
|
16
17
|
ruby
|
17
18
|
|
18
19
|
DEPENDENCIES
|
20
|
+
bio (>= 1.4.1)
|
19
21
|
bundler (~> 1.0.0)
|
20
22
|
jeweler (~> 1.5.2)
|
21
23
|
rcov
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.11
|
data/lib/ms/fasta.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'bio'
|
2
|
+
|
3
|
+
class Bio::FlatFile
|
4
|
+
include Enumerable
|
5
|
+
end
|
6
|
+
|
7
|
+
class Bio::FastaFormat
|
8
|
+
alias_method :header, :definition
|
9
|
+
alias_method :sequence, :seq
|
10
|
+
end
|
11
|
+
|
12
|
+
module Ms
|
13
|
+
# A convenience class for working with fasta formatted sequence databases.
|
14
|
+
# the file which includes this class also includes Enumerable with
|
15
|
+
# Bio::FlatFile so you can do things like this:
|
16
|
+
#
|
17
|
+
# accessions = Ms::Fasta.open("file.fasta") do |fasta|
|
18
|
+
# fasta.map(&:accession)
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# A few aliases are added to Bio::FastaFormat
|
22
|
+
#
|
23
|
+
# entry.header == entry.definition
|
24
|
+
# entry.sequence == entry.seq
|
25
|
+
#
|
26
|
+
# Ms::Fasta.new accepts both an IO object or a String (a fasta formatted
|
27
|
+
# string itself)
|
28
|
+
#
|
29
|
+
# # taking an io object:
|
30
|
+
# File.open("file.fasta") do |io|
|
31
|
+
# fasta = Ms::Fasta.new(io)
|
32
|
+
# ... do something with it
|
33
|
+
# end
|
34
|
+
# # taking a string
|
35
|
+
# string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
36
|
+
# fasta = Ms::Fasta.new(string)
|
37
|
+
# (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
38
|
+
module Fasta
|
39
|
+
|
40
|
+
# opens the flatfile and yields a Bio::FlatFile object
|
41
|
+
def self.open(file, &block)
|
42
|
+
Bio::FlatFile.open(Bio::FastaFormat, file, &block)
|
43
|
+
end
|
44
|
+
|
45
|
+
# yields each Bio::FastaFormat object in turn
|
46
|
+
def self.foreach(file, &block)
|
47
|
+
Bio::FlatFile.open(Bio::FastaFormat, file) do |fasta|
|
48
|
+
fasta.each(&block)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# takes an IO object or a string that is the fasta data itself
|
53
|
+
def self.new(io)
|
54
|
+
io = StringIO.new(io) if io.is_a?(String)
|
55
|
+
Bio::FlatFile.new(Bio::FastaFormat, io)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/fasta'
|
4
|
+
|
5
|
+
describe 'basic fasta operations' do
|
6
|
+
before do
|
7
|
+
@headers = [">gi|5524211 [hello]", ">another B", ">again C"]
|
8
|
+
@entries = ["LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV\nGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX\nIENY", "ABCDEF\nGHIJK", "ABCD"]
|
9
|
+
@sequences = @entries.map {|v| v.gsub("\n", '') }
|
10
|
+
@data = {}
|
11
|
+
@data['newlines'] = @headers.zip(@entries).map do |header, data|
|
12
|
+
header + "\n" + data
|
13
|
+
end.join("\n")
|
14
|
+
@data['carriage_returns_and_newlines'] = @data['newlines'].gsub("\n", "\r\n")
|
15
|
+
file_key_to_filename_pairs = @data.map do |k,v|
|
16
|
+
file_key = k + '_file'
|
17
|
+
filename = k + '.tmp'
|
18
|
+
File.open(filename, 'w') {|out| out.print v }
|
19
|
+
[file_key, filename]
|
20
|
+
end
|
21
|
+
file_key_to_filename_pairs.each {|k,v| @data[k] = v }
|
22
|
+
end
|
23
|
+
|
24
|
+
after do
|
25
|
+
@data.select {|k,v| k =~ /_file$/ }.each do |k,filename|
|
26
|
+
index = filename.sub('.tmp', '.index')
|
27
|
+
[filename, index].each do |fn|
|
28
|
+
File.unlink(fn) if File.exist? fn
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def fasta_correct?(fasta)
|
34
|
+
entries = fasta.map
|
35
|
+
@headers.size.times.zip(entries) do |i,entry|
|
36
|
+
header, sequence, entry = @headers[i], @sequences[i], entry
|
37
|
+
entry.header.isnt nil
|
38
|
+
entry.sequence.isnt nil
|
39
|
+
entry.header.is header[1..-1]
|
40
|
+
entry.sequence.is sequence
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'can read a file' do
|
45
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
46
|
+
Ms::Fasta.open(@data[file]) do |fasta|
|
47
|
+
fasta_correct? fasta
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'can read an IO object' do
|
53
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
54
|
+
File.open(@data[file]) do |io|
|
55
|
+
fasta = Ms::Fasta.new(io)
|
56
|
+
fasta_correct? fasta
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'can read a string' do
|
62
|
+
%w(newlines carriage_returns_and_newlines).each do |key|
|
63
|
+
fasta = Ms::Fasta.new @data[key]
|
64
|
+
fasta_correct? fasta
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'iterates entries with foreach' do
|
69
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
70
|
+
Ms::Fasta.foreach(@data[file]) do |entry|
|
71
|
+
entry.isa Bio::FastaFormat
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'runs the documentation' do
|
77
|
+
fasta_file = @data['newlines_file']
|
78
|
+
ids = Ms::Fasta.open(fasta_file) do |fasta|
|
79
|
+
fasta.map(&:entry_id)
|
80
|
+
end
|
81
|
+
ids.is_a?(Array)
|
82
|
+
ids.enums %w(gi|5524211 another again)
|
83
|
+
|
84
|
+
# this code is already tested above
|
85
|
+
# File.open(fasta_file) do |io|
|
86
|
+
# fasta = Ms::Fasta.new(io)
|
87
|
+
# end
|
88
|
+
|
89
|
+
# taking a string
|
90
|
+
string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
91
|
+
fasta = Ms::Fasta.new(string)
|
92
|
+
(simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
93
|
+
simple.first.header.include?("simple").is true
|
94
|
+
not_simple.first.header.include?("simple").is false
|
95
|
+
end
|
96
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 11
|
9
|
+
version: 0.0.11
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John T. Prince
|
@@ -19,8 +19,23 @@ date: 2011-02-24 00:00:00 -07:00
|
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: bio
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 4
|
31
|
+
- 1
|
32
|
+
version: 1.4.1
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: spec-more
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
24
39
|
none: false
|
25
40
|
requirements:
|
26
41
|
- - ">="
|
@@ -30,10 +45,10 @@ dependencies:
|
|
30
45
|
version: "0"
|
31
46
|
type: :development
|
32
47
|
prerelease: false
|
33
|
-
version_requirements: *
|
48
|
+
version_requirements: *id002
|
34
49
|
- !ruby/object:Gem::Dependency
|
35
50
|
name: bundler
|
36
|
-
requirement: &
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
37
52
|
none: false
|
38
53
|
requirements:
|
39
54
|
- - ~>
|
@@ -45,10 +60,10 @@ dependencies:
|
|
45
60
|
version: 1.0.0
|
46
61
|
type: :development
|
47
62
|
prerelease: false
|
48
|
-
version_requirements: *
|
63
|
+
version_requirements: *id003
|
49
64
|
- !ruby/object:Gem::Dependency
|
50
65
|
name: jeweler
|
51
|
-
requirement: &
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
67
|
none: false
|
53
68
|
requirements:
|
54
69
|
- - ~>
|
@@ -60,10 +75,10 @@ dependencies:
|
|
60
75
|
version: 1.5.2
|
61
76
|
type: :development
|
62
77
|
prerelease: false
|
63
|
-
version_requirements: *
|
78
|
+
version_requirements: *id004
|
64
79
|
- !ruby/object:Gem::Dependency
|
65
80
|
name: rcov
|
66
|
-
requirement: &
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
82
|
none: false
|
68
83
|
requirements:
|
69
84
|
- - ">="
|
@@ -73,7 +88,7 @@ dependencies:
|
|
73
88
|
version: "0"
|
74
89
|
type: :development
|
75
90
|
prerelease: false
|
76
|
-
version_requirements: *
|
91
|
+
version_requirements: *id005
|
77
92
|
description: basic, shared functionality for mspire libraries
|
78
93
|
email: jtprince@gmail.com
|
79
94
|
executables: []
|
@@ -101,6 +116,7 @@ files:
|
|
101
116
|
- lib/ms/data/lazy_string.rb
|
102
117
|
- lib/ms/data/simple.rb
|
103
118
|
- lib/ms/data/transposed.rb
|
119
|
+
- lib/ms/fasta.rb
|
104
120
|
- lib/ms/format/format_error.rb
|
105
121
|
- lib/ms/id/peptide.rb
|
106
122
|
- lib/ms/id/protein.rb
|
@@ -112,6 +128,7 @@ files:
|
|
112
128
|
- lib/openany.rb
|
113
129
|
- spec/ms/calc_spec.rb
|
114
130
|
- spec/ms/data_spec.rb
|
131
|
+
- spec/ms/fasta_spec.rb
|
115
132
|
- spec/ms/mass/aa_spec.rb
|
116
133
|
- spec/ms/support/binary_search_spec.rb
|
117
134
|
- spec/spec_helper.rb
|
@@ -129,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
146
|
requirements:
|
130
147
|
- - ">="
|
131
148
|
- !ruby/object:Gem::Version
|
132
|
-
hash: -
|
149
|
+
hash: -4553226932043159578
|
133
150
|
segments:
|
134
151
|
- 0
|
135
152
|
version: "0"
|
@@ -151,6 +168,7 @@ summary: basic, shared functionality for mspire libraries
|
|
151
168
|
test_files:
|
152
169
|
- spec/ms/calc_spec.rb
|
153
170
|
- spec/ms/data_spec.rb
|
171
|
+
- spec/ms/fasta_spec.rb
|
154
172
|
- spec/ms/mass/aa_spec.rb
|
155
173
|
- spec/ms/support/binary_search_spec.rb
|
156
174
|
- spec/spec_helper.rb
|