ms-core 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -2,6 +2,7 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
+ gem 'bio', '>= 1.4.1'
5
6
 
6
7
  # Add dependencies to develop your gem here.
7
8
  # Include everything needed to run rake, tests, features, etc.
data/Gemfile.lock CHANGED
@@ -2,6 +2,7 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  bacon (1.1.0)
5
+ bio (1.4.1)
5
6
  git (1.2.5)
6
7
  jeweler (1.5.2)
7
8
  bundler (~> 1.0.0)
@@ -16,6 +17,7 @@ PLATFORMS
16
17
  ruby
17
18
 
18
19
  DEPENDENCIES
20
+ bio (>= 1.4.1)
19
21
  bundler (~> 1.0.0)
20
22
  jeweler (~> 1.5.2)
21
23
  rcov
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
data/lib/ms/fasta.rb ADDED
@@ -0,0 +1,59 @@
1
+ require 'bio'
2
+
3
+ class Bio::FlatFile
4
+ include Enumerable
5
+ end
6
+
7
+ class Bio::FastaFormat
8
+ alias_method :header, :definition
9
+ alias_method :sequence, :seq
10
+ end
11
+
12
+ module Ms
13
+ # A convenience class for working with fasta formatted sequence databases.
14
+ # the file which includes this class also includes Enumerable with
15
+ # Bio::FlatFile so you can do things like this:
16
+ #
17
+ # accessions = Ms::Fasta.open("file.fasta") do |fasta|
18
+ # fasta.map(&:accession)
19
+ # end
20
+ #
21
+ # A few aliases are added to Bio::FastaFormat
22
+ #
23
+ # entry.header == entry.definition
24
+ # entry.sequence == entry.seq
25
+ #
26
+ # Ms::Fasta.new accepts both an IO object or a String (a fasta formatted
27
+ # string itself)
28
+ #
29
+ # # taking an io object:
30
+ # File.open("file.fasta") do |io|
31
+ # fasta = Ms::Fasta.new(io)
32
+ # ... do something with it
33
+ # end
34
+ # # taking a string
35
+ # string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
36
+ # fasta = Ms::Fasta.new(string)
37
+ # (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
38
+ module Fasta
39
+
40
+ # opens the flatfile and yields a Bio::FlatFile object
41
+ def self.open(file, &block)
42
+ Bio::FlatFile.open(Bio::FastaFormat, file, &block)
43
+ end
44
+
45
+ # yields each Bio::FastaFormat object in turn
46
+ def self.foreach(file, &block)
47
+ Bio::FlatFile.open(Bio::FastaFormat, file) do |fasta|
48
+ fasta.each(&block)
49
+ end
50
+ end
51
+
52
+ # takes an IO object or a string that is the fasta data itself
53
+ def self.new(io)
54
+ io = StringIO.new(io) if io.is_a?(String)
55
+ Bio::FlatFile.new(Bio::FastaFormat, io)
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/fasta'
4
+
5
+ describe 'basic fasta operations' do
6
+ before do
7
+ @headers = [">gi|5524211 [hello]", ">another B", ">again C"]
8
+ @entries = ["LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV\nGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX\nIENY", "ABCDEF\nGHIJK", "ABCD"]
9
+ @sequences = @entries.map {|v| v.gsub("\n", '') }
10
+ @data = {}
11
+ @data['newlines'] = @headers.zip(@entries).map do |header, data|
12
+ header + "\n" + data
13
+ end.join("\n")
14
+ @data['carriage_returns_and_newlines'] = @data['newlines'].gsub("\n", "\r\n")
15
+ file_key_to_filename_pairs = @data.map do |k,v|
16
+ file_key = k + '_file'
17
+ filename = k + '.tmp'
18
+ File.open(filename, 'w') {|out| out.print v }
19
+ [file_key, filename]
20
+ end
21
+ file_key_to_filename_pairs.each {|k,v| @data[k] = v }
22
+ end
23
+
24
+ after do
25
+ @data.select {|k,v| k =~ /_file$/ }.each do |k,filename|
26
+ index = filename.sub('.tmp', '.index')
27
+ [filename, index].each do |fn|
28
+ File.unlink(fn) if File.exist? fn
29
+ end
30
+ end
31
+ end
32
+
33
+ def fasta_correct?(fasta)
34
+ entries = fasta.map
35
+ @headers.size.times.zip(entries) do |i,entry|
36
+ header, sequence, entry = @headers[i], @sequences[i], entry
37
+ entry.header.isnt nil
38
+ entry.sequence.isnt nil
39
+ entry.header.is header[1..-1]
40
+ entry.sequence.is sequence
41
+ end
42
+ end
43
+
44
+ it 'can read a file' do
45
+ %w(newlines_file carriage_returns_and_newlines_file).each do |file|
46
+ Ms::Fasta.open(@data[file]) do |fasta|
47
+ fasta_correct? fasta
48
+ end
49
+ end
50
+ end
51
+
52
+ it 'can read an IO object' do
53
+ %w(newlines_file carriage_returns_and_newlines_file).each do |file|
54
+ File.open(@data[file]) do |io|
55
+ fasta = Ms::Fasta.new(io)
56
+ fasta_correct? fasta
57
+ end
58
+ end
59
+ end
60
+
61
+ it 'can read a string' do
62
+ %w(newlines carriage_returns_and_newlines).each do |key|
63
+ fasta = Ms::Fasta.new @data[key]
64
+ fasta_correct? fasta
65
+ end
66
+ end
67
+
68
+ it 'iterates entries with foreach' do
69
+ %w(newlines_file carriage_returns_and_newlines_file).each do |file|
70
+ Ms::Fasta.foreach(@data[file]) do |entry|
71
+ entry.isa Bio::FastaFormat
72
+ end
73
+ end
74
+ end
75
+
76
+ it 'runs the documentation' do
77
+ fasta_file = @data['newlines_file']
78
+ ids = Ms::Fasta.open(fasta_file) do |fasta|
79
+ fasta.map(&:entry_id)
80
+ end
81
+ ids.is_a?(Array)
82
+ ids.enums %w(gi|5524211 another again)
83
+
84
+ # this code is already tested above
85
+ # File.open(fasta_file) do |io|
86
+ # fasta = Ms::Fasta.new(io)
87
+ # end
88
+
89
+ # taking a string
90
+ string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
91
+ fasta = Ms::Fasta.new(string)
92
+ (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
93
+ simple.first.header.include?("simple").is true
94
+ not_simple.first.header.include?("simple").is false
95
+ end
96
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 10
9
- version: 0.0.10
8
+ - 11
9
+ version: 0.0.11
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -19,8 +19,23 @@ date: 2011-02-24 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: spec-more
22
+ name: bio
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 1
30
+ - 4
31
+ - 1
32
+ version: 1.4.1
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: spec-more
38
+ requirement: &id002 !ruby/object:Gem::Requirement
24
39
  none: false
25
40
  requirements:
26
41
  - - ">="
@@ -30,10 +45,10 @@ dependencies:
30
45
  version: "0"
31
46
  type: :development
32
47
  prerelease: false
33
- version_requirements: *id001
48
+ version_requirements: *id002
34
49
  - !ruby/object:Gem::Dependency
35
50
  name: bundler
36
- requirement: &id002 !ruby/object:Gem::Requirement
51
+ requirement: &id003 !ruby/object:Gem::Requirement
37
52
  none: false
38
53
  requirements:
39
54
  - - ~>
@@ -45,10 +60,10 @@ dependencies:
45
60
  version: 1.0.0
46
61
  type: :development
47
62
  prerelease: false
48
- version_requirements: *id002
63
+ version_requirements: *id003
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: jeweler
51
- requirement: &id003 !ruby/object:Gem::Requirement
66
+ requirement: &id004 !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ~>
@@ -60,10 +75,10 @@ dependencies:
60
75
  version: 1.5.2
61
76
  type: :development
62
77
  prerelease: false
63
- version_requirements: *id003
78
+ version_requirements: *id004
64
79
  - !ruby/object:Gem::Dependency
65
80
  name: rcov
66
- requirement: &id004 !ruby/object:Gem::Requirement
81
+ requirement: &id005 !ruby/object:Gem::Requirement
67
82
  none: false
68
83
  requirements:
69
84
  - - ">="
@@ -73,7 +88,7 @@ dependencies:
73
88
  version: "0"
74
89
  type: :development
75
90
  prerelease: false
76
- version_requirements: *id004
91
+ version_requirements: *id005
77
92
  description: basic, shared functionality for mspire libraries
78
93
  email: jtprince@gmail.com
79
94
  executables: []
@@ -101,6 +116,7 @@ files:
101
116
  - lib/ms/data/lazy_string.rb
102
117
  - lib/ms/data/simple.rb
103
118
  - lib/ms/data/transposed.rb
119
+ - lib/ms/fasta.rb
104
120
  - lib/ms/format/format_error.rb
105
121
  - lib/ms/id/peptide.rb
106
122
  - lib/ms/id/protein.rb
@@ -112,6 +128,7 @@ files:
112
128
  - lib/openany.rb
113
129
  - spec/ms/calc_spec.rb
114
130
  - spec/ms/data_spec.rb
131
+ - spec/ms/fasta_spec.rb
115
132
  - spec/ms/mass/aa_spec.rb
116
133
  - spec/ms/support/binary_search_spec.rb
117
134
  - spec/spec_helper.rb
@@ -129,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
129
146
  requirements:
130
147
  - - ">="
131
148
  - !ruby/object:Gem::Version
132
- hash: -2752488455848385314
149
+ hash: -4553226932043159578
133
150
  segments:
134
151
  - 0
135
152
  version: "0"
@@ -151,6 +168,7 @@ summary: basic, shared functionality for mspire libraries
151
168
  test_files:
152
169
  - spec/ms/calc_spec.rb
153
170
  - spec/ms/data_spec.rb
171
+ - spec/ms/fasta_spec.rb
154
172
  - spec/ms/mass/aa_spec.rb
155
173
  - spec/ms/support/binary_search_spec.rb
156
174
  - spec/spec_helper.rb