mspire-sequest 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/.autotest +30 -0
  2. data/.gitmodules +9 -0
  3. data/History +79 -0
  4. data/LICENSE +22 -0
  5. data/README.rdoc +85 -0
  6. data/Rakefile +52 -0
  7. data/VERSION +1 -0
  8. data/bin/srf_to_pepxml.rb +7 -0
  9. data/bin/srf_to_search.rb +7 -0
  10. data/bin/srf_to_sqt.rb +8 -0
  11. data/lib/mspire/sequest/params.rb +331 -0
  12. data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
  13. data/lib/mspire/sequest/pepxml/params.rb +32 -0
  14. data/lib/mspire/sequest/sqt.rb +393 -0
  15. data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
  16. data/lib/mspire/sequest/srf/pepxml.rb +333 -0
  17. data/lib/mspire/sequest/srf/search.rb +158 -0
  18. data/lib/mspire/sequest/srf/sqt.rb +218 -0
  19. data/lib/mspire/sequest/srf.rb +715 -0
  20. data/lib/mspire/sequest.rb +6 -0
  21. data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
  22. data/spec/mspire/sequest/params_spec.rb +135 -0
  23. data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
  24. data/spec/mspire/sequest/pepxml_spec.rb +311 -0
  25. data/spec/mspire/sequest/sqt_spec.rb +51 -0
  26. data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
  27. data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
  28. data/spec/mspire/sequest/srf/search_spec.rb +131 -0
  29. data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
  30. data/spec/mspire/sequest/srf_spec.rb +113 -0
  31. data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
  32. data/spec/spec_helper.rb +22 -0
  33. data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  34. data/spec/testfiles/bioworks31.params +77 -0
  35. data/spec/testfiles/bioworks32.params +62 -0
  36. data/spec/testfiles/bioworks33.params +63 -0
  37. data/spec/testfiles/corrupted_900.srf +0 -0
  38. data/spec/testfiles/small.sqt +87 -0
  39. data/spec/testfiles/small2.sqt +176 -0
  40. metadata +185 -0
@@ -0,0 +1,6 @@
1
+
2
+ module Mspire
3
+ module Sequest
4
+ VERSION = File.open(File.dirname(__FILE__) + '/../../VERSION') {|io| io.gets.chomp }
5
+ end
6
+ end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+
3
+ if ARGV.size == 0
4
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
5
+ puts "outputs: <file>_NCBI.fasta ..."
6
+ puts ""
7
+ puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
8
+ puts "formatted fasta database header lines. This will change an"
9
+ puts "IPI format to an NCBI style format that Bioworks can read."
10
+ exit
11
+ end
12
+
13
+ ARGV.each do |file|
14
+ base = file.chomp(File.extname(file))
15
+ outfile = base + '_NCBI' + ".fasta"
16
+ File.open(outfile, 'w') do |out|
17
+ IO.foreach(file) do |line|
18
+ if line =~ /^>/
19
+ (codes, *description) = line[1..-1].split(" ")
20
+ description = description.join(" ")
21
+ code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
22
+ out.puts ">#{code_section} #{description}"
23
+ else
24
+ out.print line
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,135 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/sequest/params'
4
+
5
+ # returns a hash of all params
6
+ def simple_parse(filename)
7
+ hash = {}
8
+ data = File.open(filename) do |io|
9
+ # this makes it work with ruby 1.9:
10
+ io.set_encoding("ASCII-8BIT") if io.respond_to?(:set_encoding)
11
+ io.read
12
+ end
13
+ data.split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
14
+ if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
15
+ hash[$1.dup] = $2.rstrip
16
+ end
17
+ end
18
+ hash
19
+ end
20
+
21
+ shared_examples_for 'sequest params' do |params_file, api_hash, backwards_hash|
22
+
23
+ subject { Mspire::Sequest::Params.new(params_file) }
24
+
25
+ it 'has a method for every parameter in the file' do
26
+ hash = simple_parse(params_file)
27
+ hash.each do |k,v|
28
+ subject.send(k.to_sym).should == v
29
+ end
30
+ end
31
+
32
+ it 'returns zero length string for params with no information' do
33
+ subject.second_database_name.should == ""
34
+ subject.sequence_header_filter.should == ""
35
+ end
36
+
37
+ it 'returns nil for params that do not exist and have no translation' do
38
+ subject.google_plex.should == nil
39
+ end
40
+
41
+ it 'provides consistent API between versions for important info' do
42
+ message = capture_stderr do
43
+ api_hash.each do |k,v|
44
+ subject.send(k).should == v
45
+ end
46
+ end
47
+ end
48
+
49
+ it 'provides some backwards compatibility' do
50
+ backwards_hash.each do |k,v|
51
+ subject.send(k).should == v
52
+ end
53
+ end
54
+
55
+ end
56
+
57
+ describe 'sequest params v 3.1' do
58
+
59
+ file = TESTFILES + '/bioworks31.params'
60
+ api_hash = {
61
+ :version => '3.1',
62
+ :enzyme => 'Trypsin',
63
+ :database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
64
+ :enzyme_specificity => [1, 'KR', ''],
65
+ :precursor_mass_type => "average",
66
+ :fragment_mass_type => "average",
67
+ :min_number_termini => '1',
68
+ }
69
+
70
+ backwards_hash = {
71
+ :max_num_internal_cleavages => '2',
72
+ :fragment_ion_tol => '0.0000',
73
+ }
74
+
75
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
76
+ end
77
+
78
+ describe 'sequest params v 3.2' do
79
+ file = TESTFILES + '/bioworks32.params'
80
+ api_hash = {
81
+ :version => '3.2',
82
+ :enzyme => 'Trypsin',
83
+ :database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
84
+ :enzyme_specificity => [1, 'KR', 'P'],
85
+ :precursor_mass_type => "average",
86
+ :fragment_mass_type => "average",
87
+ :min_number_termini => '2',
88
+ }
89
+
90
+ backwards_hash = {
91
+ :max_num_internal_cleavages => '2',
92
+ :fragment_ion_tol => '1.0000',
93
+ }
94
+
95
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
96
+ end
97
+
98
+ describe 'sequest params v 3.3' do
99
+ file = TESTFILES + '/bioworks33.params'
100
+ api_hash = {
101
+ :version => '3.3',
102
+ :enzyme => 'Trypsin',
103
+ :database => "C:\\Xcalibur\\database\\yeast.fasta",
104
+ :enzyme_specificity => [1, 'KR', ''],
105
+ :precursor_mass_type => "monoisotopic",
106
+ :fragment_mass_type => "monoisotopic",
107
+ :min_number_termini => '2',
108
+ }
109
+
110
+ backwards_hash = {
111
+ :max_num_internal_cleavages => '2',
112
+ :fragment_ion_tol => '1.0000',
113
+ }
114
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
115
+ end
116
+
117
+ describe 'sequest params v 3.2 from srf' do
118
+ file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
119
+ api_hash = {
120
+ :version => '3.2',
121
+ :enzyme => 'Trypsin',
122
+ :database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
123
+ :enzyme_specificity => [1, 'KR', 'P'],
124
+ :precursor_mass_type => "average",
125
+ :fragment_mass_type => "average",
126
+ :min_number_termini => '2',
127
+ }
128
+
129
+ backwards_hash = {
130
+ :max_num_internal_cleavages => '2',
131
+ :fragment_ion_tol => '1.0000',
132
+ }
133
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
134
+ end
135
+
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/sequest/params'
4
+ require 'mspire/sequest/pepxml/modifications'
5
+
6
+ describe 'Mspire::Sequest::Pepxml::Modifications' do
7
+ before do
8
+ tf_params = TESTFILES + "/bioworks32.params"
9
+ @params = Mspire::Sequest::Params.new(tf_params)
10
+ # The params object here is completely unnecessary for this test, except
11
+ # that it sets up the mass table
12
+ @obj = Mspire::Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
13
+ end
14
+ it 'creates a mod_symbols_hash' do
15
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
16
+ @obj.mod_symbols_hash.should == answ
17
+ ## need more here
18
+ end
19
+
20
+ it 'creates a ModificationInfo object given a special peptide sequence' do
21
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
22
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
23
+ @params.term_diff_search_options = "14.20000 12.33000"
24
+ mod = Mspire::Sequest::Pepxml::Modifications.new(@params, mod_string)
25
+ ## no mods
26
+ peptide_nomod = "PEPTIDE"
27
+ mod.modification_info(peptide_nomod).should be_nil
28
+ peptide_mod = "]M*EC^S@IDM#M*EMSCM["
29
+ modinfo = mod.modification_info(peptide_mod)
30
+
31
+ xml_string = modinfo.to_xml
32
+ xml_string.should match( /<mod_aminoacid_mass / )
33
+ xml_string.should match( /mod_nterm_mass=/ )
34
+ xml_string.should match( /mod_cterm_mass=/ )
35
+ xml_string.should match( /modified_peptide=/ )
36
+
37
+ modinfo.mod_aminoacid_masses.size.should == 5
38
+ mod_aa_masses = modinfo.mod_aminoacid_masses
39
+ # positions are verified, masses are just frozen
40
+ [1,3,4,7,8].zip([147.09606, 115.1429, 167.0772999, 160.19606, 147.09606], mod_aa_masses) do |pos, mass, obj|
41
+ obj.position.should == pos
42
+ obj.mass.should be_within(0.0001).of(mass)
43
+ end
44
+ # These values are just frozen and not independently verified yet
45
+ modinfo.mod_nterm_mass.should be_within(0.0001).of(146.4033)
46
+ modinfo.mod_cterm_mass.should be_within(0.0001).of(160.5334)
47
+ end
48
+
49
+ end
50
+
@@ -0,0 +1,311 @@
1
+
2
+ =begin
3
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
4
+
5
+ require 'spec_id'
6
+ require 'spec_id/sequest/pepxml'
7
+ #require 'mspire/mzxml'
8
+
9
+
10
+ NODELETE = false
11
+
12
+ describe Sequest::PepXML, " created from small bioworks.xml" do
13
+
14
+ spec_large do
15
+ before(:all) do
16
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
17
+
18
+ tf_params = Tfiles + "/bioworks32.params"
19
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
20
+ out_path = Tfiles
21
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
22
+ end
23
+
24
+ it 'gets some spectrum queries' do
25
+ @pepxml_objs.each do |obj|
26
+ (obj.spectrum_queries.size > 2).should be_true
27
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
28
+ end
29
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+
36
+ describe Sequest::PepXML, " created from large bioworks.xml" do
37
+ # assert_equal_by_pairs (really any old array)
38
+ def assert_equal_pairs(obj, arrs)
39
+ arrs.each do |arr|
40
+ #if obj.send(arr[1]) != arr[0]
41
+ # puts "HELLO"
42
+ # puts "OBJ answer"
43
+ # p obj.send(arr[1])
44
+ # puts "ar0"
45
+ # p arr[0]
46
+ # puts "ar1"
47
+ # p arr[1]
48
+ #end
49
+ if arr[0].is_a? Float
50
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
51
+ else
52
+ obj.send(arr[1]).should == arr[0]
53
+ end
54
+ end
55
+ end
56
+
57
+ #swap the first to guys first
58
+ def assert_equal_pairs_swapped(obj, arrs)
59
+ arrs.each do |arr|
60
+ arr[0], arr[1] = arr[1], arr[0]
61
+ end
62
+ assert_equal_pairs(obj, arrs)
63
+ end
64
+
65
+ spec_large do
66
+ before(:all) do
67
+ st = Time.new
68
+ params = Tfiles + "/opd1/sequest.3.2.params"
69
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
70
+ mzxml_path = Tfiles_l + "/opd1"
71
+ out_path = Tfiles
72
+ @pepxml_version = 18
73
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
74
+ puts "- takes #{Time.new - st} secs"
75
+ end
76
+
77
+ it 'extracts MSMSPipelineAnalysis' do
78
+ ######## HMMMMM...
79
+ Sequest::PepXML.pepxml_version.should == @pepxml_version
80
+
81
+ # MSMSPipelineAnalysis
82
+ po = @pepxml_objs.first
83
+ msms_pipeline = po.msms_pipeline_analysis
84
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
85
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
86
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
87
+ msms_pipeline.summary_xml.should == '000.xml'
88
+ end
89
+
90
+ it 'extracts MSmSRunSummary' do
91
+ # MSMSRunSummary
92
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
93
+ rs.base_name.should =~ /\/000/
94
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
95
+ end
96
+
97
+ it 'extracts SampleEnzyme' do
98
+ # SampleEnzyme
99
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
100
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
101
+ end
102
+
103
+ it 'extracts SearchSummary' do
104
+ # SearchSummary
105
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
106
+ ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
107
+ ss.base_name.should =~ /\/000/
108
+ ss.peptide_mass_tol.should =~ /1\.500/
109
+ assert_equal_pairs_swapped(ss, [ # normal attributes
110
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
111
+
112
+ # enzymatic_search_constraint
113
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
114
+
115
+ # parameters
116
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
117
+ ])
118
+
119
+ end
120
+ it 'extracts SearchDatabase' do
121
+ # SearchDatabase
122
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
123
+ sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
124
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
125
+ end
126
+
127
+ it 'returns SpectrumQueries' do
128
+ # SpectrumQueries
129
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
130
+ spec = sq.first
131
+ assert_equal_pairs_swapped(spec, [
132
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
133
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
134
+ [:precursor_neutral_mass, 1074.666926], # mine
135
+ [:assumed_charge, 1], [:index, "1"],
136
+ ])
137
+ sh = spec.search_results.first.search_hits.first
138
+ assert_equal_pairs_swapped(sh, [
139
+ # normal attributes
140
+ [:hit_rank, 1],
141
+ [:peptide, "SIYFRNFK"],
142
+ [:peptide_prev_aa, "R"],
143
+ [:peptide_next_aa, "G"],
144
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
145
+ [:num_tot_proteins, 1],
146
+ [:num_matched_ions, 4],
147
+ [:tot_num_ions, 14],
148
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
149
+ [:calc_neutral_pep_mass, 1074.23261], # mine
150
+ #[:massdiff, "+0.400000"], # out2summary
151
+ [:massdiff, 0.434316000000081], # mine
152
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
153
+
154
+ # search_score
155
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
156
+ ])
157
+
158
+ spec = sq[1]
159
+ assert_equal_pairs_swapped(spec, [
160
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
161
+ [:precursor_neutral_mass, 663.206111], # mine
162
+ [:assumed_charge, 1], [:index, "2"],
163
+ ])
164
+
165
+ sh = spec.search_results.first.search_hits.first
166
+ assert_equal_pairs_swapped(sh, [
167
+ # normal attributes
168
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
169
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
170
+ #[:massdiff, "-0.600000"], # out2summary
171
+ [:massdiff, -0.556499000000031], # mine
172
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
173
+ [:calc_neutral_pep_mass, 663.76261], # mine
174
+
175
+ # search_score
176
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
177
+ ])
178
+
179
+ spec = sq[9]
180
+ assert_equal_pairs_swapped(spec, [
181
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
182
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
183
+ [:precursor_neutral_mass, 691.150992], # mine
184
+ ])
185
+
186
+ sh = spec.search_results.first.search_hits.first
187
+ assert_equal_pairs_swapped(sh, [
188
+ # normal attributes
189
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
190
+
191
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
192
+ [:num_missed_cleavages, 1],
193
+ [:is_rejected, 0],
194
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
195
+ [:calc_neutral_pep_mass, 691.82261], # mine
196
+ #[:massdiff, "-0.700000"], # out2summary
197
+ [:massdiff, -0.67161800000008], # mine
198
+
199
+ # search_score
200
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
201
+ ])
202
+ end
203
+
204
+ it 'can generate correct pepxml file' do
205
+
206
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
207
+ string = @pepxml_objs.first.to_pepxml
208
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
209
+ base_name_re = /base_name=".*?files\//o
210
+ date_re = /date=".*?"/
211
+ string.split("\n").each_with_index do |line,i|
212
+ if i > 99 ; break end
213
+ ans, exp =
214
+ if i == 1
215
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
216
+ elsif i == 2
217
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
218
+ elsif i == 6
219
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
220
+ else
221
+ [line, ans_lines[i]]
222
+ end
223
+
224
+ #ans.split('').zip(exp.split('')) do |l,a|
225
+ # if l != a
226
+ # puts line
227
+ # puts ans_lines[i]
228
+ # puts l
229
+ # puts a
230
+ # end
231
+ #end
232
+ if ans != exp
233
+ puts ans
234
+ puts exp
235
+ end
236
+ ans.should == exp
237
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
238
+ end
239
+ end
240
+ end
241
+ end
242
+
243
+
244
+ describe 'bioworks file with modifications transformed into pepxml' do
245
+
246
+ spec_large do
247
+ before(:all) do
248
+ modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
249
+ modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
250
+ @srgfile = modfiles_sequest_dir + 'tmp.srg'
251
+ @out_path = modfiles_sequest_dir + 'pepxml'
252
+ modfiles = %w(020 040).map do |file|
253
+ modfiles_sequest_dir + file + ".srf"
254
+ end
255
+ objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
256
+ @out_files = %w(020 040).map do |file|
257
+ @out_path + '/' + file + '.xml'
258
+ end
259
+ end
260
+
261
+ after(:all) do
262
+ File.unlink(@srgfile) unless NODELETE
263
+ FileUtils.rm_r(@out_path)
264
+ #@out_files.each do |fn|
265
+ # File.unlink(fn) unless NODELETE
266
+ #end
267
+ end
268
+
269
+ # splits string on ' 'and matches the line found by find_line_regexp in
270
+ # lines
271
+ def match_modline_pieces(lines, find_line_regexp, string)
272
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
273
+ lines.each do |line|
274
+ if line =~ find_line_regexp
275
+ pieces.each do |piece|
276
+ line.should =~ piece
277
+ end
278
+ end
279
+ end
280
+ end
281
+
282
+ it 'gets modifications right in real run' do
283
+ @out_files.each do |fn|
284
+ fn.exist_as_a_file?.should be_true
285
+ beginning = IO.read(fn)
286
+ lines = beginning.split("\n")
287
+ [
288
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
289
+
290
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
291
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
292
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
293
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
294
+ ].each do |a,b|
295
+ match_modline_pieces(lines, a, b)
296
+ end
297
+ [
298
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
299
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
300
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
301
+ '</modification_info>',
302
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
303
+ ].each do |line|
304
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
305
+ end
306
+ end
307
+ end
308
+ end
309
+ end
310
+
311
+ =end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/sequest/sqt_spec_helper'
4
+ require 'mspire/sequest/sqt'
5
+
6
+ describe 'reading a small sqt file' do
7
+
8
+ before(:each) do
9
+ file = TESTFILES + '/small.sqt'
10
+ @sqt = Mspire::Sequest::Sqt.new(file)
11
+ end
12
+
13
+ it 'can access header entries like a hash' do
14
+ header = @sqt.header
15
+ HeaderHash.each do |k,v|
16
+ header[k].should == v
17
+ end
18
+ end
19
+
20
+ it 'can access header entries with methods' do
21
+ header = @sqt.header
22
+ # for example:
23
+ header.database.should == HeaderHash['Database']
24
+ # all working:
25
+ HeaderHash.each do |k,v|
26
+ header.send(Mspire::Sequest::Sqt::Header::KeysToAtts[k]).should == v
27
+ end
28
+
29
+ end
30
+
31
+ it 'has spectra, matches, and loci' do
32
+ svt = @sqt.spectra[16]
33
+ reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
34
+ [:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
35
+ TestSpectra[key].each do |k,v|
36
+ if v.is_a? Float
37
+ reply[key].send(k).should be_within(0.0000000001).of(v)
38
+ else
39
+ next if key == :last_match_17_last_loci
40
+ #p k
41
+ #p v
42
+ reply[key].send(k).should == v
43
+ end
44
+ end
45
+ end
46
+ @sqt.spectra[16].matches.first.loci.size.should == 1
47
+ @sqt.spectra[16].matches.last.loci.size.should == 1
48
+ end
49
+
50
+ end
51
+
@@ -0,0 +1,34 @@
1
+
2
+
3
+ HeaderHash = {}
4
+ header_doublets = [
5
+ %w(SQTGenerator mspire),
6
+ %w(SQTGeneratorVersion 0.3.1),
7
+ %w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
8
+ %w(FragmentMasses AVG),
9
+ %w(PrecursorMasses AVG),
10
+ ['StartTime', ''],
11
+ ['Alg-MSModel', 'LCQ Deca XP'],
12
+ %w(DBLocusCount 4237),
13
+ %w(Alg-FragMassTol 1.0000),
14
+ %w(Alg-PreMassTol 25.0000),
15
+ ['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
16
+ %w(Alg-PreMassUnits ppm),
17
+ ['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
18
+
19
+ ['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
20
+ ['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
21
+ ['StaticMod', []],
22
+ ].each do |double|
23
+ HeaderHash[double[0]] = double[1]
24
+ end
25
+
26
+ TestSpectra = {
27
+ :first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
28
+ :last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
29
+ :seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
30
+ :first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
31
+ :last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
32
+ :last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
33
+ }
34
+