ms-core 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -2,6 +2,7 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
+ gem 'bio', '>= 1.4.1'
5
6
 
6
7
  # Add dependencies to develop your gem here.
7
8
  # Include everything needed to run rake, tests, features, etc.
data/Gemfile.lock CHANGED
@@ -2,6 +2,7 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  bacon (1.1.0)
5
+ bio (1.4.1)
5
6
  git (1.2.5)
6
7
  jeweler (1.5.2)
7
8
  bundler (~> 1.0.0)
@@ -16,6 +17,7 @@ PLATFORMS
16
17
  ruby
17
18
 
18
19
  DEPENDENCIES
20
+ bio (>= 1.4.1)
19
21
  bundler (~> 1.0.0)
20
22
  jeweler (~> 1.5.2)
21
23
  rcov
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
data/lib/ms/fasta.rb ADDED
@@ -0,0 +1,59 @@
1
+ require 'bio'
2
+
3
+ class Bio::FlatFile
4
+ include Enumerable
5
+ end
6
+
7
+ class Bio::FastaFormat
8
+ alias_method :header, :definition
9
+ alias_method :sequence, :seq
10
+ end
11
+
12
+ module Ms
13
+ # A convenience class for working with fasta formatted sequence databases.
14
+ # the file which includes this class also includes Enumerable with
15
+ # Bio::FlatFile so you can do things like this:
16
+ #
17
+ # accessions = Ms::Fasta.open("file.fasta") do |fasta|
18
+ # fasta.map(&:accession)
19
+ # end
20
+ #
21
+ # A few aliases are added to Bio::FastaFormat
22
+ #
23
+ # entry.header == entry.definition
24
+ # entry.sequence == entry.seq
25
+ #
26
+ # Ms::Fasta.new accepts both an IO object or a String (a fasta formatted
27
+ # string itself)
28
+ #
29
+ # # taking an io object:
30
+ # File.open("file.fasta") do |io|
31
+ # fasta = Ms::Fasta.new(io)
32
+ # ... do something with it
33
+ # end
34
+ # # taking a string
35
+ # string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
36
+ # fasta = Ms::Fasta.new(string)
37
+ # (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
38
+ module Fasta
39
+
40
+ # opens the flatfile and yields a Bio::FlatFile object
41
+ def self.open(file, &block)
42
+ Bio::FlatFile.open(Bio::FastaFormat, file, &block)
43
+ end
44
+
45
+ # yields each Bio::FastaFormat object in turn
46
+ def self.foreach(file, &block)
47
+ Bio::FlatFile.open(Bio::FastaFormat, file) do |fasta|
48
+ fasta.each(&block)
49
+ end
50
+ end
51
+
52
+ # takes an IO object or a string that is the fasta data itself
53
+ def self.new(io)
54
+ io = StringIO.new(io) if io.is_a?(String)
55
+ Bio::FlatFile.new(Bio::FastaFormat, io)
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/fasta'
4
+
5
+ describe 'basic fasta operations' do
6
+ before do
7
+ @headers = [">gi|5524211 [hello]", ">another B", ">again C"]
8
+ @entries = ["LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV\nGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX\nIENY", "ABCDEF\nGHIJK", "ABCD"]
9
+ @sequences = @entries.map {|v| v.gsub("\n", '') }
10
+ @data = {}
11
+ @data['newlines'] = @headers.zip(@entries).map do |header, data|
12
+ header + "\n" + data
13
+ end.join("\n")
14
+ @data['carriage_returns_and_newlines'] = @data['newlines'].gsub("\n", "\r\n")
15
+ file_key_to_filename_pairs = @data.map do |k,v|
16
+ file_key = k + '_file'
17
+ filename = k + '.tmp'
18
+ File.open(filename, 'w') {|out| out.print v }
19
+ [file_key, filename]
20
+ end
21
+ file_key_to_filename_pairs.each {|k,v| @data[k] = v }
22
+ end
23
+
24
+ after do
25
+ @data.select {|k,v| k =~ /_file$/ }.each do |k,filename|
26
+ index = filename.sub('.tmp', '.index')
27
+ [filename, index].each do |fn|
28
+ File.unlink(fn) if File.exist? fn
29
+ end
30
+ end
31
+ end
32
+
33
+ def fasta_correct?(fasta)
34
+ entries = fasta.map
35
+ @headers.size.times.zip(entries) do |i,entry|
36
+ header, sequence, entry = @headers[i], @sequences[i], entry
37
+ entry.header.isnt nil
38
+ entry.sequence.isnt nil
39
+ entry.header.is header[1..-1]
40
+ entry.sequence.is sequence
41
+ end
42
+ end
43
+
44
+ it 'can read a file' do
45
+ %w(newlines_file carriage_returns_and_newlines_file).each do |file|
46
+ Ms::Fasta.open(@data[file]) do |fasta|
47
+ fasta_correct? fasta
48
+ end
49
+ end
50
+ end
51
+
52
+ it 'can read an IO object' do
53
+ %w(newlines_file carriage_returns_and_newlines_file).each do |file|
54
+ File.open(@data[file]) do |io|
55
+ fasta = Ms::Fasta.new(io)
56
+ fasta_correct? fasta
57
+ end
58
+ end
59
+ end
60
+
61
+ it 'can read a string' do
62
+ %w(newlines carriage_returns_and_newlines).each do |key|
63
+ fasta = Ms::Fasta.new @data[key]
64
+ fasta_correct? fasta
65
+ end
66
+ end
67
+
68
+ it 'iterates entries with foreach' do
69
+ %w(newlines_file carriage_returns_and_newlines_file).each do |file|
70
+ Ms::Fasta.foreach(@data[file]) do |entry|
71
+ entry.isa Bio::FastaFormat
72
+ end
73
+ end
74
+ end
75
+
76
+ it 'runs the documentation' do
77
+ fasta_file = @data['newlines_file']
78
+ ids = Ms::Fasta.open(fasta_file) do |fasta|
79
+ fasta.map(&:entry_id)
80
+ end
81
+ ids.is_a?(Array)
82
+ ids.enums %w(gi|5524211 another again)
83
+
84
+ # this code is already tested above
85
+ # File.open(fasta_file) do |io|
86
+ # fasta = Ms::Fasta.new(io)
87
+ # end
88
+
89
+ # taking a string
90
+ string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
91
+ fasta = Ms::Fasta.new(string)
92
+ (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
93
+ simple.first.header.include?("simple").is true
94
+ not_simple.first.header.include?("simple").is false
95
+ end
96
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 10
9
- version: 0.0.10
8
+ - 11
9
+ version: 0.0.11
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -19,8 +19,23 @@ date: 2011-02-24 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: spec-more
22
+ name: bio
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 1
30
+ - 4
31
+ - 1
32
+ version: 1.4.1
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: spec-more
38
+ requirement: &id002 !ruby/object:Gem::Requirement
24
39
  none: false
25
40
  requirements:
26
41
  - - ">="
@@ -30,10 +45,10 @@ dependencies:
30
45
  version: "0"
31
46
  type: :development
32
47
  prerelease: false
33
- version_requirements: *id001
48
+ version_requirements: *id002
34
49
  - !ruby/object:Gem::Dependency
35
50
  name: bundler
36
- requirement: &id002 !ruby/object:Gem::Requirement
51
+ requirement: &id003 !ruby/object:Gem::Requirement
37
52
  none: false
38
53
  requirements:
39
54
  - - ~>
@@ -45,10 +60,10 @@ dependencies:
45
60
  version: 1.0.0
46
61
  type: :development
47
62
  prerelease: false
48
- version_requirements: *id002
63
+ version_requirements: *id003
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: jeweler
51
- requirement: &id003 !ruby/object:Gem::Requirement
66
+ requirement: &id004 !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ~>
@@ -60,10 +75,10 @@ dependencies:
60
75
  version: 1.5.2
61
76
  type: :development
62
77
  prerelease: false
63
- version_requirements: *id003
78
+ version_requirements: *id004
64
79
  - !ruby/object:Gem::Dependency
65
80
  name: rcov
66
- requirement: &id004 !ruby/object:Gem::Requirement
81
+ requirement: &id005 !ruby/object:Gem::Requirement
67
82
  none: false
68
83
  requirements:
69
84
  - - ">="
@@ -73,7 +88,7 @@ dependencies:
73
88
  version: "0"
74
89
  type: :development
75
90
  prerelease: false
76
- version_requirements: *id004
91
+ version_requirements: *id005
77
92
  description: basic, shared functionality for mspire libraries
78
93
  email: jtprince@gmail.com
79
94
  executables: []
@@ -101,6 +116,7 @@ files:
101
116
  - lib/ms/data/lazy_string.rb
102
117
  - lib/ms/data/simple.rb
103
118
  - lib/ms/data/transposed.rb
119
+ - lib/ms/fasta.rb
104
120
  - lib/ms/format/format_error.rb
105
121
  - lib/ms/id/peptide.rb
106
122
  - lib/ms/id/protein.rb
@@ -112,6 +128,7 @@ files:
112
128
  - lib/openany.rb
113
129
  - spec/ms/calc_spec.rb
114
130
  - spec/ms/data_spec.rb
131
+ - spec/ms/fasta_spec.rb
115
132
  - spec/ms/mass/aa_spec.rb
116
133
  - spec/ms/support/binary_search_spec.rb
117
134
  - spec/spec_helper.rb
@@ -129,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
129
146
  requirements:
130
147
  - - ">="
131
148
  - !ruby/object:Gem::Version
132
- hash: -2752488455848385314
149
+ hash: -4553226932043159578
133
150
  segments:
134
151
  - 0
135
152
  version: "0"
@@ -151,6 +168,7 @@ summary: basic, shared functionality for mspire libraries
151
168
  test_files:
152
169
  - spec/ms/calc_spec.rb
153
170
  - spec/ms/data_spec.rb
171
+ - spec/ms/fasta_spec.rb
154
172
  - spec/ms/mass/aa_spec.rb
155
173
  - spec/ms/support/binary_search_spec.rb
156
174
  - spec/spec_helper.rb