mspire-sequest 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/.autotest +30 -0
  2. data/.gitmodules +9 -0
  3. data/History +79 -0
  4. data/LICENSE +22 -0
  5. data/README.rdoc +85 -0
  6. data/Rakefile +52 -0
  7. data/VERSION +1 -0
  8. data/bin/srf_to_pepxml.rb +7 -0
  9. data/bin/srf_to_search.rb +7 -0
  10. data/bin/srf_to_sqt.rb +8 -0
  11. data/lib/mspire/sequest/params.rb +331 -0
  12. data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
  13. data/lib/mspire/sequest/pepxml/params.rb +32 -0
  14. data/lib/mspire/sequest/sqt.rb +393 -0
  15. data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
  16. data/lib/mspire/sequest/srf/pepxml.rb +333 -0
  17. data/lib/mspire/sequest/srf/search.rb +158 -0
  18. data/lib/mspire/sequest/srf/sqt.rb +218 -0
  19. data/lib/mspire/sequest/srf.rb +715 -0
  20. data/lib/mspire/sequest.rb +6 -0
  21. data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
  22. data/spec/mspire/sequest/params_spec.rb +135 -0
  23. data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
  24. data/spec/mspire/sequest/pepxml_spec.rb +311 -0
  25. data/spec/mspire/sequest/sqt_spec.rb +51 -0
  26. data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
  27. data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
  28. data/spec/mspire/sequest/srf/search_spec.rb +131 -0
  29. data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
  30. data/spec/mspire/sequest/srf_spec.rb +113 -0
  31. data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
  32. data/spec/spec_helper.rb +22 -0
  33. data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  34. data/spec/testfiles/bioworks31.params +77 -0
  35. data/spec/testfiles/bioworks32.params +62 -0
  36. data/spec/testfiles/bioworks33.params +63 -0
  37. data/spec/testfiles/corrupted_900.srf +0 -0
  38. data/spec/testfiles/small.sqt +87 -0
  39. data/spec/testfiles/small2.sqt +176 -0
  40. metadata +185 -0
@@ -0,0 +1,6 @@
1
+
2
+ module Mspire
3
+ module Sequest
4
+ VERSION = File.open(File.dirname(__FILE__) + '/../../VERSION') {|io| io.gets.chomp }
5
+ end
6
+ end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+
3
+ if ARGV.size == 0
4
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
5
+ puts "outputs: <file>_NCBI.fasta ..."
6
+ puts ""
7
+ puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
8
+ puts "formatted fasta database header lines. This will change an"
9
+ puts "IPI format to an NCBI style format that Bioworks can read."
10
+ exit
11
+ end
12
+
13
+ ARGV.each do |file|
14
+ base = file.chomp(File.extname(file))
15
+ outfile = base + '_NCBI' + ".fasta"
16
+ File.open(outfile, 'w') do |out|
17
+ IO.foreach(file) do |line|
18
+ if line =~ /^>/
19
+ (codes, *description) = line[1..-1].split(" ")
20
+ description = description.join(" ")
21
+ code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
22
+ out.puts ">#{code_section} #{description}"
23
+ else
24
+ out.print line
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,135 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/sequest/params'
4
+
5
+ # returns a hash of all params
6
+ def simple_parse(filename)
7
+ hash = {}
8
+ data = File.open(filename) do |io|
9
+ # this makes it work with ruby 1.9:
10
+ io.set_encoding("ASCII-8BIT") if io.respond_to?(:set_encoding)
11
+ io.read
12
+ end
13
+ data.split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
14
+ if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
15
+ hash[$1.dup] = $2.rstrip
16
+ end
17
+ end
18
+ hash
19
+ end
20
+
21
+ shared_examples_for 'sequest params' do |params_file, api_hash, backwards_hash|
22
+
23
+ subject { Mspire::Sequest::Params.new(params_file) }
24
+
25
+ it 'has a method for every parameter in the file' do
26
+ hash = simple_parse(params_file)
27
+ hash.each do |k,v|
28
+ subject.send(k.to_sym).should == v
29
+ end
30
+ end
31
+
32
+ it 'returns zero length string for params with no information' do
33
+ subject.second_database_name.should == ""
34
+ subject.sequence_header_filter.should == ""
35
+ end
36
+
37
+ it 'returns nil for params that do not exist and have no translation' do
38
+ subject.google_plex.should == nil
39
+ end
40
+
41
+ it 'provides consistent API between versions for important info' do
42
+ message = capture_stderr do
43
+ api_hash.each do |k,v|
44
+ subject.send(k).should == v
45
+ end
46
+ end
47
+ end
48
+
49
+ it 'provides some backwards compatibility' do
50
+ backwards_hash.each do |k,v|
51
+ subject.send(k).should == v
52
+ end
53
+ end
54
+
55
+ end
56
+
57
+ describe 'sequest params v 3.1' do
58
+
59
+ file = TESTFILES + '/bioworks31.params'
60
+ api_hash = {
61
+ :version => '3.1',
62
+ :enzyme => 'Trypsin',
63
+ :database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
64
+ :enzyme_specificity => [1, 'KR', ''],
65
+ :precursor_mass_type => "average",
66
+ :fragment_mass_type => "average",
67
+ :min_number_termini => '1',
68
+ }
69
+
70
+ backwards_hash = {
71
+ :max_num_internal_cleavages => '2',
72
+ :fragment_ion_tol => '0.0000',
73
+ }
74
+
75
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
76
+ end
77
+
78
+ describe 'sequest params v 3.2' do
79
+ file = TESTFILES + '/bioworks32.params'
80
+ api_hash = {
81
+ :version => '3.2',
82
+ :enzyme => 'Trypsin',
83
+ :database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
84
+ :enzyme_specificity => [1, 'KR', 'P'],
85
+ :precursor_mass_type => "average",
86
+ :fragment_mass_type => "average",
87
+ :min_number_termini => '2',
88
+ }
89
+
90
+ backwards_hash = {
91
+ :max_num_internal_cleavages => '2',
92
+ :fragment_ion_tol => '1.0000',
93
+ }
94
+
95
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
96
+ end
97
+
98
+ describe 'sequest params v 3.3' do
99
+ file = TESTFILES + '/bioworks33.params'
100
+ api_hash = {
101
+ :version => '3.3',
102
+ :enzyme => 'Trypsin',
103
+ :database => "C:\\Xcalibur\\database\\yeast.fasta",
104
+ :enzyme_specificity => [1, 'KR', ''],
105
+ :precursor_mass_type => "monoisotopic",
106
+ :fragment_mass_type => "monoisotopic",
107
+ :min_number_termini => '2',
108
+ }
109
+
110
+ backwards_hash = {
111
+ :max_num_internal_cleavages => '2',
112
+ :fragment_ion_tol => '1.0000',
113
+ }
114
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
115
+ end
116
+
117
+ describe 'sequest params v 3.2 from srf' do
118
+ file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
119
+ api_hash = {
120
+ :version => '3.2',
121
+ :enzyme => 'Trypsin',
122
+ :database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
123
+ :enzyme_specificity => [1, 'KR', 'P'],
124
+ :precursor_mass_type => "average",
125
+ :fragment_mass_type => "average",
126
+ :min_number_termini => '2',
127
+ }
128
+
129
+ backwards_hash = {
130
+ :max_num_internal_cleavages => '2',
131
+ :fragment_ion_tol => '1.0000',
132
+ }
133
+ it_behaves_like 'sequest params', file, api_hash, backwards_hash
134
+ end
135
+
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/sequest/params'
4
+ require 'mspire/sequest/pepxml/modifications'
5
+
6
+ describe 'Mspire::Sequest::Pepxml::Modifications' do
7
+ before do
8
+ tf_params = TESTFILES + "/bioworks32.params"
9
+ @params = Mspire::Sequest::Params.new(tf_params)
10
+ # The params object here is completely unnecessary for this test, except
11
+ # that it sets up the mass table
12
+ @obj = Mspire::Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
13
+ end
14
+ it 'creates a mod_symbols_hash' do
15
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
16
+ @obj.mod_symbols_hash.should == answ
17
+ ## need more here
18
+ end
19
+
20
+ it 'creates a ModificationInfo object given a special peptide sequence' do
21
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
22
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
23
+ @params.term_diff_search_options = "14.20000 12.33000"
24
+ mod = Mspire::Sequest::Pepxml::Modifications.new(@params, mod_string)
25
+ ## no mods
26
+ peptide_nomod = "PEPTIDE"
27
+ mod.modification_info(peptide_nomod).should be_nil
28
+ peptide_mod = "]M*EC^S@IDM#M*EMSCM["
29
+ modinfo = mod.modification_info(peptide_mod)
30
+
31
+ xml_string = modinfo.to_xml
32
+ xml_string.should match( /<mod_aminoacid_mass / )
33
+ xml_string.should match( /mod_nterm_mass=/ )
34
+ xml_string.should match( /mod_cterm_mass=/ )
35
+ xml_string.should match( /modified_peptide=/ )
36
+
37
+ modinfo.mod_aminoacid_masses.size.should == 5
38
+ mod_aa_masses = modinfo.mod_aminoacid_masses
39
+ # positions are verified, masses are just frozen
40
+ [1,3,4,7,8].zip([147.09606, 115.1429, 167.0772999, 160.19606, 147.09606], mod_aa_masses) do |pos, mass, obj|
41
+ obj.position.should == pos
42
+ obj.mass.should be_within(0.0001).of(mass)
43
+ end
44
+ # These values are just frozen and not independently verified yet
45
+ modinfo.mod_nterm_mass.should be_within(0.0001).of(146.4033)
46
+ modinfo.mod_cterm_mass.should be_within(0.0001).of(160.5334)
47
+ end
48
+
49
+ end
50
+
@@ -0,0 +1,311 @@
1
+
2
+ =begin
3
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
4
+
5
+ require 'spec_id'
6
+ require 'spec_id/sequest/pepxml'
7
+ #require 'mspire/mzxml'
8
+
9
+
10
+ NODELETE = false
11
+
12
+ describe Sequest::PepXML, " created from small bioworks.xml" do
13
+
14
+ spec_large do
15
+ before(:all) do
16
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
17
+
18
+ tf_params = Tfiles + "/bioworks32.params"
19
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
20
+ out_path = Tfiles
21
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
22
+ end
23
+
24
+ it 'gets some spectrum queries' do
25
+ @pepxml_objs.each do |obj|
26
+ (obj.spectrum_queries.size > 2).should be_true
27
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
28
+ end
29
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+
36
+ describe Sequest::PepXML, " created from large bioworks.xml" do
37
+ # assert_equal_by_pairs (really any old array)
38
+ def assert_equal_pairs(obj, arrs)
39
+ arrs.each do |arr|
40
+ #if obj.send(arr[1]) != arr[0]
41
+ # puts "HELLO"
42
+ # puts "OBJ answer"
43
+ # p obj.send(arr[1])
44
+ # puts "ar0"
45
+ # p arr[0]
46
+ # puts "ar1"
47
+ # p arr[1]
48
+ #end
49
+ if arr[0].is_a? Float
50
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
51
+ else
52
+ obj.send(arr[1]).should == arr[0]
53
+ end
54
+ end
55
+ end
56
+
57
+ #swap the first to guys first
58
+ def assert_equal_pairs_swapped(obj, arrs)
59
+ arrs.each do |arr|
60
+ arr[0], arr[1] = arr[1], arr[0]
61
+ end
62
+ assert_equal_pairs(obj, arrs)
63
+ end
64
+
65
+ spec_large do
66
+ before(:all) do
67
+ st = Time.new
68
+ params = Tfiles + "/opd1/sequest.3.2.params"
69
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
70
+ mzxml_path = Tfiles_l + "/opd1"
71
+ out_path = Tfiles
72
+ @pepxml_version = 18
73
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
74
+ puts "- takes #{Time.new - st} secs"
75
+ end
76
+
77
+ it 'extracts MSMSPipelineAnalysis' do
78
+ ######## HMMMMM...
79
+ Sequest::PepXML.pepxml_version.should == @pepxml_version
80
+
81
+ # MSMSPipelineAnalysis
82
+ po = @pepxml_objs.first
83
+ msms_pipeline = po.msms_pipeline_analysis
84
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
85
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
86
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
87
+ msms_pipeline.summary_xml.should == '000.xml'
88
+ end
89
+
90
+ it 'extracts MSmSRunSummary' do
91
+ # MSMSRunSummary
92
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
93
+ rs.base_name.should =~ /\/000/
94
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
95
+ end
96
+
97
+ it 'extracts SampleEnzyme' do
98
+ # SampleEnzyme
99
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
100
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
101
+ end
102
+
103
+ it 'extracts SearchSummary' do
104
+ # SearchSummary
105
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
106
+ ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
107
+ ss.base_name.should =~ /\/000/
108
+ ss.peptide_mass_tol.should =~ /1\.500/
109
+ assert_equal_pairs_swapped(ss, [ # normal attributes
110
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
111
+
112
+ # enzymatic_search_constraint
113
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
114
+
115
+ # parameters
116
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
117
+ ])
118
+
119
+ end
120
+ it 'extracts SearchDatabase' do
121
+ # SearchDatabase
122
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
123
+ sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
124
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
125
+ end
126
+
127
+ it 'returns SpectrumQueries' do
128
+ # SpectrumQueries
129
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
130
+ spec = sq.first
131
+ assert_equal_pairs_swapped(spec, [
132
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
133
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
134
+ [:precursor_neutral_mass, 1074.666926], # mine
135
+ [:assumed_charge, 1], [:index, "1"],
136
+ ])
137
+ sh = spec.search_results.first.search_hits.first
138
+ assert_equal_pairs_swapped(sh, [
139
+ # normal attributes
140
+ [:hit_rank, 1],
141
+ [:peptide, "SIYFRNFK"],
142
+ [:peptide_prev_aa, "R"],
143
+ [:peptide_next_aa, "G"],
144
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
145
+ [:num_tot_proteins, 1],
146
+ [:num_matched_ions, 4],
147
+ [:tot_num_ions, 14],
148
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
149
+ [:calc_neutral_pep_mass, 1074.23261], # mine
150
+ #[:massdiff, "+0.400000"], # out2summary
151
+ [:massdiff, 0.434316000000081], # mine
152
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
153
+
154
+ # search_score
155
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
156
+ ])
157
+
158
+ spec = sq[1]
159
+ assert_equal_pairs_swapped(spec, [
160
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
161
+ [:precursor_neutral_mass, 663.206111], # mine
162
+ [:assumed_charge, 1], [:index, "2"],
163
+ ])
164
+
165
+ sh = spec.search_results.first.search_hits.first
166
+ assert_equal_pairs_swapped(sh, [
167
+ # normal attributes
168
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
169
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
170
+ #[:massdiff, "-0.600000"], # out2summary
171
+ [:massdiff, -0.556499000000031], # mine
172
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
173
+ [:calc_neutral_pep_mass, 663.76261], # mine
174
+
175
+ # search_score
176
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
177
+ ])
178
+
179
+ spec = sq[9]
180
+ assert_equal_pairs_swapped(spec, [
181
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
182
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
183
+ [:precursor_neutral_mass, 691.150992], # mine
184
+ ])
185
+
186
+ sh = spec.search_results.first.search_hits.first
187
+ assert_equal_pairs_swapped(sh, [
188
+ # normal attributes
189
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
190
+
191
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
192
+ [:num_missed_cleavages, 1],
193
+ [:is_rejected, 0],
194
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
195
+ [:calc_neutral_pep_mass, 691.82261], # mine
196
+ #[:massdiff, "-0.700000"], # out2summary
197
+ [:massdiff, -0.67161800000008], # mine
198
+
199
+ # search_score
200
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
201
+ ])
202
+ end
203
+
204
+ it 'can generate correct pepxml file' do
205
+
206
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
207
+ string = @pepxml_objs.first.to_pepxml
208
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
209
+ base_name_re = /base_name=".*?files\//o
210
+ date_re = /date=".*?"/
211
+ string.split("\n").each_with_index do |line,i|
212
+ if i > 99 ; break end
213
+ ans, exp =
214
+ if i == 1
215
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
216
+ elsif i == 2
217
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
218
+ elsif i == 6
219
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
220
+ else
221
+ [line, ans_lines[i]]
222
+ end
223
+
224
+ #ans.split('').zip(exp.split('')) do |l,a|
225
+ # if l != a
226
+ # puts line
227
+ # puts ans_lines[i]
228
+ # puts l
229
+ # puts a
230
+ # end
231
+ #end
232
+ if ans != exp
233
+ puts ans
234
+ puts exp
235
+ end
236
+ ans.should == exp
237
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
238
+ end
239
+ end
240
+ end
241
+ end
242
+
243
+
244
+ describe 'bioworks file with modifications transformed into pepxml' do
245
+
246
+ spec_large do
247
+ before(:all) do
248
+ modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
249
+ modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
250
+ @srgfile = modfiles_sequest_dir + 'tmp.srg'
251
+ @out_path = modfiles_sequest_dir + 'pepxml'
252
+ modfiles = %w(020 040).map do |file|
253
+ modfiles_sequest_dir + file + ".srf"
254
+ end
255
+ objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
256
+ @out_files = %w(020 040).map do |file|
257
+ @out_path + '/' + file + '.xml'
258
+ end
259
+ end
260
+
261
+ after(:all) do
262
+ File.unlink(@srgfile) unless NODELETE
263
+ FileUtils.rm_r(@out_path)
264
+ #@out_files.each do |fn|
265
+ # File.unlink(fn) unless NODELETE
266
+ #end
267
+ end
268
+
269
+ # splits string on ' 'and matches the line found by find_line_regexp in
270
+ # lines
271
+ def match_modline_pieces(lines, find_line_regexp, string)
272
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
273
+ lines.each do |line|
274
+ if line =~ find_line_regexp
275
+ pieces.each do |piece|
276
+ line.should =~ piece
277
+ end
278
+ end
279
+ end
280
+ end
281
+
282
+ it 'gets modifications right in real run' do
283
+ @out_files.each do |fn|
284
+ fn.exist_as_a_file?.should be_true
285
+ beginning = IO.read(fn)
286
+ lines = beginning.split("\n")
287
+ [
288
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
289
+
290
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
291
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
292
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
293
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
294
+ ].each do |a,b|
295
+ match_modline_pieces(lines, a, b)
296
+ end
297
+ [
298
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
299
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
300
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
301
+ '</modification_info>',
302
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
303
+ ].each do |line|
304
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
305
+ end
306
+ end
307
+ end
308
+ end
309
+ end
310
+
311
+ =end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/sequest/sqt_spec_helper'
4
+ require 'mspire/sequest/sqt'
5
+
6
+ describe 'reading a small sqt file' do
7
+
8
+ before(:each) do
9
+ file = TESTFILES + '/small.sqt'
10
+ @sqt = Mspire::Sequest::Sqt.new(file)
11
+ end
12
+
13
+ it 'can access header entries like a hash' do
14
+ header = @sqt.header
15
+ HeaderHash.each do |k,v|
16
+ header[k].should == v
17
+ end
18
+ end
19
+
20
+ it 'can access header entries with methods' do
21
+ header = @sqt.header
22
+ # for example:
23
+ header.database.should == HeaderHash['Database']
24
+ # all working:
25
+ HeaderHash.each do |k,v|
26
+ header.send(Mspire::Sequest::Sqt::Header::KeysToAtts[k]).should == v
27
+ end
28
+
29
+ end
30
+
31
+ it 'has spectra, matches, and loci' do
32
+ svt = @sqt.spectra[16]
33
+ reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
34
+ [:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
35
+ TestSpectra[key].each do |k,v|
36
+ if v.is_a? Float
37
+ reply[key].send(k).should be_within(0.0000000001).of(v)
38
+ else
39
+ next if key == :last_match_17_last_loci
40
+ #p k
41
+ #p v
42
+ reply[key].send(k).should == v
43
+ end
44
+ end
45
+ end
46
+ @sqt.spectra[16].matches.first.loci.size.should == 1
47
+ @sqt.spectra[16].matches.last.loci.size.should == 1
48
+ end
49
+
50
+ end
51
+
@@ -0,0 +1,34 @@
1
+
2
+
3
+ HeaderHash = {}
4
+ header_doublets = [
5
+ %w(SQTGenerator mspire),
6
+ %w(SQTGeneratorVersion 0.3.1),
7
+ %w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
8
+ %w(FragmentMasses AVG),
9
+ %w(PrecursorMasses AVG),
10
+ ['StartTime', ''],
11
+ ['Alg-MSModel', 'LCQ Deca XP'],
12
+ %w(DBLocusCount 4237),
13
+ %w(Alg-FragMassTol 1.0000),
14
+ %w(Alg-PreMassTol 25.0000),
15
+ ['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
16
+ %w(Alg-PreMassUnits ppm),
17
+ ['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
18
+
19
+ ['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
20
+ ['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
21
+ ['StaticMod', []],
22
+ ].each do |double|
23
+ HeaderHash[double[0]] = double[1]
24
+ end
25
+
26
+ TestSpectra = {
27
+ :first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
28
+ :last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
29
+ :seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
30
+ :first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
31
+ :last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
32
+ :last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
33
+ }
34
+