mspire 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/Rakefile +5 -2
  2. data/bin/bioworks_to_pepxml.rb +84 -40
  3. data/bin/fasta_shaker.rb +100 -0
  4. data/bin/filter_spec_id.rb +185 -23
  5. data/bin/gi2annot.rb +2 -110
  6. data/bin/id_class_anal.rb +31 -21
  7. data/bin/id_precision.rb +12 -8
  8. data/bin/{false_positive_rate.rb → precision.rb} +1 -1
  9. data/bin/protein_summary.rb +55 -62
  10. data/changelog.txt +34 -0
  11. data/lib/align.rb +0 -1
  12. data/lib/fasta.rb +88 -24
  13. data/lib/gi.rb +114 -0
  14. data/lib/roc.rb +64 -58
  15. data/lib/spec_id/aa_freqs.rb +166 -0
  16. data/lib/spec_id/bioworks.rb +5 -1
  17. data/lib/spec_id/precision.rb +427 -0
  18. data/lib/spec_id/proph.rb +2 -2
  19. data/lib/spec_id/sequest.rb +810 -113
  20. data/lib/spec_id/srf.rb +486 -0
  21. data/lib/spec_id.rb +107 -23
  22. data/release_notes.txt +11 -0
  23. data/script/estimate_fpr_by_cysteine.rb +226 -0
  24. data/script/filter-peps.rb +3 -3
  25. data/script/find_cysteine_background.rb +137 -0
  26. data/script/gen_database_searching.rb +11 -7
  27. data/script/genuine_tps_and_probs.rb +136 -0
  28. data/script/top_hit_per_scan.rb +5 -2
  29. data/test/tc_aa_freqs.rb +59 -0
  30. data/test/tc_bioworks.rb +6 -1
  31. data/test/tc_bioworks_to_pepxml.rb +25 -18
  32. data/test/tc_fasta.rb +81 -3
  33. data/test/tc_fasta_shaker.rb +147 -0
  34. data/test/tc_gi.rb +20 -0
  35. data/test/tc_id_class_anal.rb +9 -12
  36. data/test/tc_id_precision.rb +12 -11
  37. data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
  38. data/test/tc_protein_summary.rb +31 -22
  39. data/test/tc_roc.rb +95 -50
  40. data/test/tc_sequest.rb +212 -145
  41. data/test/tc_spec.rb +10 -5
  42. data/test/tc_spec_id.rb +0 -2
  43. data/test/tc_spec_id_xml.rb +36 -0
  44. data/test/tc_srf.rb +216 -0
  45. metadata +35 -21
  46. data/lib/spec_id/false_positive_rate.rb +0 -476
  47. data/test/tc_gi2annot.rb +0 -12
data/test/tc_sequest.rb CHANGED
@@ -12,17 +12,22 @@ class SequestTest < Test::Unit::TestCase
12
12
  def initialize(arg)
13
13
  super(arg)
14
14
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
15
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
15
16
  @tf_params = @tfiles + "bioworks32.params"
16
- @tf_mzxml_path = @tfiles + "yeast_gly_mzXML"
17
+ @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
17
18
  @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
18
19
  end
19
20
 
20
- def Xtest_set_from_bioworks
21
- out_path = '.'
22
- pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(@tf_params, @tf_bioworks_xml, @tf_mzxml_path, out_path)
23
- pepxml_objs.each do |obj|
24
- assert(obj.spectrum_queries.size > 2)
25
- assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
21
+ def test_set_from_bioworks
22
+ if File.exist? @tfiles_l
23
+ out_path = '.'
24
+ pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(@tf_params, @tf_bioworks_xml, @tf_mzxml_path, out_path)
25
+ pepxml_objs.each do |obj|
26
+ assert(obj.spectrum_queries.size > 2)
27
+ assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
28
+ end
29
+ else
30
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
26
31
  end
27
32
  end
28
33
 
@@ -41,147 +46,152 @@ class SequestTest < Test::Unit::TestCase
41
46
  aep(obj, arrs)
42
47
  end
43
48
 
44
- def test_set_from_bioworks ## new one for opd1
45
- st = Time.new
46
- params = @tfiles + "opd1/sequest.3.2.params"
47
- bioworks_xml = @tfiles + "opd1/bioworks.000.oldparams.xml"
48
- mzxml_path = @tfiles + "opd1"
49
- out_path = @tfiles
50
- pepxml_version = 18
51
- pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params, bioworks_xml, mzxml_path, out_path, pepxml_version, "trypsin")
52
- puts "TOOK #{Time.new - st}secs"
53
- po = pepxml_objs.first
54
- assert_equal(pepxml_version, SpecID::Sequest::PepXML.pepxml_version)
55
-
56
- # MSMSPipelineAnalysis
57
- pipe = po.msms_pipeline_analysis
58
- aep(pipe, [
59
- ['http://regis-web.systemsbiology.net/pepXML', :xmlns],
60
- ['http://www.w3.org/2001/XMLSchema-instance', :xmlns_xsi],
61
- ['http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd', :xsi_schema_location],
62
- ['000.xml', :summary_xml],
63
- ])
64
-
65
- # MSMSRunSummary
66
- rs = pipe.msms_run_summary
67
- assert_match(/test\/tfiles\/000/, rs.base_name)
68
- aep(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
69
-
70
- # SampleEnzyme
71
- se = rs.sample_enzyme
72
- aep(se, [ ['trypsin', :name], ['KR', :cut], ['P', :no_cut], ['C', :sense], ])
73
-
74
- # SearchSummary
75
- ss = rs.search_summary
76
- assert_match(/test\/tfiles\/000/, ss.base_name)
77
- assert_match(/1\.500/, ss.peptide_mass_tol)
78
- aeps(ss, [ # normal attributes
79
- [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
80
-
81
- # enzymatic_search_constraint
82
- [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
83
-
84
- # parameters
85
- [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
86
- ])
87
-
88
- # SearchDatabase
89
- sd = ss.search_database
90
- aeps(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
91
-
92
- # SpectrumQueries
93
- sq = rs.spectrum_queries
94
- spec = sq.first
95
- aeps(spec, [
96
- [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
97
- #[:precursor_neutral_mass, "1074.5920"], # out2summary
98
- [:precursor_neutral_mass, "1074.666926"], # mine
99
- [:assumed_charge, "1"], [:index, "1"],
100
- ])
101
- sh = spec.search_results.first.search_hits.first
102
- aeps(sh, [
103
- # normal attributes
104
- [:hit_rank, "1"],
105
- [:peptide, "SIYFRNFK"],
106
- [:peptide_prev_aa, "R"],
107
- [:peptide_next_aa, "G"],
108
- [:protein, "gi|16130084|ref|NP_416651.1|"],
109
- [:num_tot_proteins, "1"],
110
- [:num_matched_ions, "4"],
111
- [:tot_num_ions, "14"],
112
- #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
113
- [:calc_neutral_pep_mass, "1074.23261"], # mine
114
- #[:massdiff, "+0.400000"], # out2summary
115
- [:massdiff, "+0.434316000000081"], # mine
116
- [:num_tol_term, "2"], [:num_missed_cleavages, "1"], [:is_rejected, "0"],
117
-
118
- # search_score
119
- [:xcorr, "0.400"], [:deltacn, "0.023"], [:deltacnstar, "0"], [:spscore, "78.8"], [:sprank, "1"],
120
- ])
121
-
122
- spec = sq[1]
123
- aeps(spec, [
124
- [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
125
- [:precursor_neutral_mass, "663.206111"], # mine
126
- [:assumed_charge, "1"], [:index, "2"],
127
- ])
128
-
129
- sh = spec.search_results.first.search_hits.first
130
- aeps(sh, [
131
- # normal attributes
132
- [:hit_rank, "1"], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "10"],
133
- [:num_tol_term, "2"], [:num_missed_cleavages, "0"], [:is_rejected, "0"],
134
- #[:massdiff, "-0.600000"], # out2summary
135
- [:massdiff, "-0.556499000000031"], # mine
136
- #[:calc_neutral_pep_mass, "663.7920"], # out2summary
137
- [:calc_neutral_pep_mass, "663.76261"], # mine
138
-
139
- # search_score
140
- [:xcorr, "0.965"], [:deltacn, "0.132"], [:deltacnstar, "0"], [:spscore, "81.1"], [:sprank, "1"],
141
- ])
142
-
143
- spec = sq[9]
144
- aeps(spec, [
145
- [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, "2"], [:index, "10"],
146
- #[:precursor_neutral_mass, "691.0920"], # out2summary
147
- [:precursor_neutral_mass, "691.150992"], # mine
148
- ])
149
-
150
- sh = spec.search_results.first.search_hits.first
151
- aeps(sh, [
152
- # normal attributes
153
- [:hit_rank, "1"], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "8"], [:num_tol_term, "2"],
154
-
155
- #[:num_missed_cleavages, "0"], # out2summary misses this!
156
- [:num_missed_cleavages, "1"],
157
- [:is_rejected, "0"],
158
- #[:calc_neutral_pep_mass, "691.7920"], # out2summary
159
- [:calc_neutral_pep_mass, "691.82261"], # mine
160
- #[:massdiff, "-0.700000"], # out2summary
161
- [:massdiff, "-0.67161800000008"], # mine
162
-
163
- # search_score
164
- [:xcorr, "0.903"], [:deltacn, "0.333"], [:deltacnstar, "0"], [:spscore, "172.8"], [:sprank, "1"],
165
- ])
166
-
167
- ## IF ARE OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
168
- string = po.to_pepxml
169
-
170
- ans_lines = IO.read(@tfiles + "opd1/000.my_answer.100lines.xml").split("\n")
171
- string.split("\n").each_with_index do |line,i|
172
- base_name_re = /base_name=".*?\/test/o
173
- if i > 99 ; break end
174
- if i == 1
175
- assert_equal(ans_lines[i].sub(/date=".*?"/,''), line.sub(/date=".*?"/,''))
176
- elsif i == 2
177
- assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
178
- else
179
- assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
180
- #assert_equal(ans_lines[i], line)
49
+ ## turn this off if you are doing lots of tests
50
+ def Xtest_set_from_bioworks ## new one for opd1
51
+ if File.exist? @tfiles_l
52
+ st = Time.new
53
+ params = @tfiles + "opd1/sequest.3.2.params"
54
+ bioworks_xml = @tfiles_l + "opd1/bioworks.000.oldparams.xml"
55
+ mzxml_path = @tfiles + "opd1"
56
+ out_path = @tfiles
57
+ pepxml_version = 18
58
+ pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params, bioworks_xml, mzxml_path, out_path, pepxml_version, "trypsin")
59
+ puts "TOOK #{Time.new - st}secs"
60
+ po = pepxml_objs.first
61
+ assert_equal(pepxml_version, SpecID::Sequest::PepXML.pepxml_version)
62
+
63
+ # MSMSPipelineAnalysis
64
+ pipe = po.msms_pipeline_analysis
65
+ aep(pipe, [
66
+ ['http://regis-web.systemsbiology.net/pepXML', :xmlns],
67
+ ['http://www.w3.org/2001/XMLSchema-instance', :xmlns_xsi],
68
+ ['http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd', :xsi_schema_location],
69
+ ['000.xml', :summary_xml],
70
+ ])
71
+
72
+ # MSMSRunSummary
73
+ rs = pipe.msms_run_summary
74
+ assert_match(/test\/tfiles\/000/, rs.base_name)
75
+ aep(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
76
+
77
+ # SampleEnzyme
78
+ se = rs.sample_enzyme
79
+ aep(se, [ ['trypsin', :name], ['KR', :cut], ['P', :no_cut], ['C', :sense], ])
80
+
81
+ # SearchSummary
82
+ ss = rs.search_summary
83
+ assert_match(/test\/tfiles\/000/, ss.base_name)
84
+ assert_match(/1\.500/, ss.peptide_mass_tol)
85
+ aeps(ss, [ # normal attributes
86
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
87
+
88
+ # enzymatic_search_constraint
89
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
90
+
91
+ # parameters
92
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
93
+ ])
94
+
95
+ # SearchDatabase
96
+ sd = ss.search_database
97
+ aeps(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
98
+
99
+ # SpectrumQueries
100
+ sq = rs.spectrum_queries
101
+ spec = sq.first
102
+ aeps(spec, [
103
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
104
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
105
+ [:precursor_neutral_mass, "1074.666926"], # mine
106
+ [:assumed_charge, "1"], [:index, "1"],
107
+ ])
108
+ sh = spec.search_results.first.search_hits.first
109
+ aeps(sh, [
110
+ # normal attributes
111
+ [:hit_rank, "1"],
112
+ [:peptide, "SIYFRNFK"],
113
+ [:peptide_prev_aa, "R"],
114
+ [:peptide_next_aa, "G"],
115
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
116
+ [:num_tot_proteins, "1"],
117
+ [:num_matched_ions, "4"],
118
+ [:tot_num_ions, "14"],
119
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
120
+ [:calc_neutral_pep_mass, "1074.23261"], # mine
121
+ #[:massdiff, "+0.400000"], # out2summary
122
+ [:massdiff, "+0.434316000000081"], # mine
123
+ [:num_tol_term, "2"], [:num_missed_cleavages, "1"], [:is_rejected, "0"],
124
+
125
+ # search_score
126
+ [:xcorr, "0.400"], [:deltacn, "0.023"], [:deltacnstar, "0"], [:spscore, "78.8"], [:sprank, "1"],
127
+ ])
128
+
129
+ spec = sq[1]
130
+ aeps(spec, [
131
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
132
+ [:precursor_neutral_mass, "663.206111"], # mine
133
+ [:assumed_charge, "1"], [:index, "2"],
134
+ ])
135
+
136
+ sh = spec.search_results.first.search_hits.first
137
+ aeps(sh, [
138
+ # normal attributes
139
+ [:hit_rank, "1"], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "10"],
140
+ [:num_tol_term, "2"], [:num_missed_cleavages, "0"], [:is_rejected, "0"],
141
+ #[:massdiff, "-0.600000"], # out2summary
142
+ [:massdiff, "-0.556499000000031"], # mine
143
+ #[:calc_neutral_pep_mass, "663.7920"], # out2summary
144
+ [:calc_neutral_pep_mass, "663.76261"], # mine
145
+
146
+ # search_score
147
+ [:xcorr, "0.965"], [:deltacn, "0.132"], [:deltacnstar, "0"], [:spscore, "81.1"], [:sprank, "1"],
148
+ ])
149
+
150
+ spec = sq[9]
151
+ aeps(spec, [
152
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, "2"], [:index, "10"],
153
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
154
+ [:precursor_neutral_mass, "691.150992"], # mine
155
+ ])
156
+
157
+ sh = spec.search_results.first.search_hits.first
158
+ aeps(sh, [
159
+ # normal attributes
160
+ [:hit_rank, "1"], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "8"], [:num_tol_term, "2"],
161
+
162
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
163
+ [:num_missed_cleavages, "1"],
164
+ [:is_rejected, "0"],
165
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
166
+ [:calc_neutral_pep_mass, "691.82261"], # mine
167
+ #[:massdiff, "-0.700000"], # out2summary
168
+ [:massdiff, "-0.67161800000008"], # mine
169
+
170
+ # search_score
171
+ [:xcorr, "0.903"], [:deltacn, "0.333"], [:deltacnstar, "0"], [:spscore, "172.8"], [:sprank, "1"],
172
+ ])
173
+
174
+ ## IF ARE OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
175
+ string = po.to_pepxml
176
+
177
+ ans_lines = IO.read(@tfiles + "opd1/000.my_answer.100lines.xml").split("\n")
178
+ string.split("\n").each_with_index do |line,i|
179
+ base_name_re = /base_name=".*?\/test/o
180
+ if i > 99 ; break end
181
+ if i == 1
182
+ assert_equal(ans_lines[i].sub(/date=".*?"/,''), line.sub(/date=".*?"/,''))
183
+ elsif i == 2
184
+ assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
185
+ else
186
+ assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
187
+ #assert_equal(ans_lines[i], line)
188
+ end
181
189
  end
190
+ else
191
+ assert_nil(puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})"))
182
192
  end
183
193
 
184
- #assert_match(/#{Regexp.escape("")}/, string)
194
+ #assert_match(/#{Regexp.escape("")}/, string)
185
195
 
186
196
  end
187
197
 
@@ -220,5 +230,62 @@ class SequestTest < Test::Unit::TestCase
220
230
  assert_equal("hello.fasta", SpecID::Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
221
231
  end
222
232
 
233
+ def test_modifications
234
+ obj = SpecID::Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
235
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
236
+ assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
237
+
238
+ ## need more here
239
+ end
240
+
241
+ def test_non_standard_aa_removal
242
+ hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
243
+ cl = proc {|v| SpecID::Sequest::PepXML::SearchHit.remove_non_amino_acids(v) }
244
+ hash.each do |k,v|
245
+ assert_equal(v, cl.call(k))
246
+ end
247
+ end
248
+
249
+ def test_modification_info
250
+ hash = {
251
+ :mod_nterm_mass => 520.2,
252
+ :modified_peptide => "MOD*IFI^E&D",
253
+ :mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
254
+ }
255
+ answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
256
+ string = SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
257
+ assert_match(_re('<modification_info'), answ)
258
+ assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
259
+ assert_match(_re(" modified_peptide=\"MOD*IFI^E&amp;D\""), answ)
260
+ assert_match(_re("<mod_aminoacid_mass"), answ)
261
+ assert_match(_re(" position=\"3\""), answ)
262
+ assert_match(_re(" mass=\"150.3\""), answ)
263
+ assert_match(_re(" position=\"6\""), answ)
264
+ assert_match(_re(" mass=\"345.2\""), answ)
265
+ assert_match(_re("</modification_info>"), answ)
266
+ end
267
+
268
+ def _re(st)
269
+ /#{Regexp.escape(st)}/
270
+ end
271
+
272
+ def test_modifications
273
+ params = SpecID::Sequest::Params.new(@tf_params)
274
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
275
+ params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
276
+ params.term_diff_search_options = "14.20000 12.33000"
277
+ assert 1
278
+ =begin
279
+ mod = SpecID::Sequest::PepXML::Modifications(params, mod_string)
280
+ SpecID::Sequest::PepXML::Modifications
281
+ peptide = "PEPTIDE"
282
+ ## no mods
283
+ assert_equal(nil, mod.modification_info(peptide))
284
+ peptide = "]M*EC^S@IDM#M*EMSCM["
285
+ p mod.modification_info(peptide)
286
+ =end
287
+
288
+ end
289
+
223
290
  end
224
291
 
data/test/tc_spec.rb CHANGED
@@ -10,17 +10,22 @@ class SpecTest < Test::Unit::TestCase
10
10
  def initialize(arg)
11
11
  super(arg)
12
12
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
13
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
13
14
  @tscans = @tfiles + "opd1/twenty_scans.mzXML"
14
- @tf_mzxml_path = @tfiles + "yeast_gly_mzXML"
15
+ @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
15
16
  #@big_file = "/work/john/ISB_Proteomics_18Set/mzXML/sergei_digest_A_full_01.mzXML"
16
17
  @big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
17
18
  end
18
19
 
19
20
  def test_mzxml_path_precursor_mz_by_scan
20
- hash = Spec::MzXML::Parser.new.precursor_mz_by_scan_for_path(@tf_mzxml_path, "*.mzXML")
21
- assert_equal(%w(000 020), hash.keys.sort)
22
- assert(hash["000"].size > 0)
23
- assert(hash["020"].size > 0)
21
+ if File.exist? @tfiles_l
22
+ hash = Spec::MzXML::Parser.new.precursor_mz_by_scan_for_path(@tf_mzxml_path, "*.mzXML")
23
+ assert_equal(%w(000 020), hash.keys.sort)
24
+ assert(hash["000"].size > 0)
25
+ assert(hash["020"].size > 0)
26
+ else
27
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
28
+ end
24
29
  end
25
30
 
26
31
  def test_mzxml_precursor_mz_by_scan
data/test/tc_spec_id.rb CHANGED
@@ -66,7 +66,5 @@ class SpecIDTest < Test::Unit::TestCase
66
66
  assert_in_delta(v, two[i], delta, message)
67
67
  end
68
68
  end
69
-
70
69
  end
71
70
 
72
-
@@ -0,0 +1,36 @@
1
+ require 'test/unit'
2
+ require 'spec_id'
3
+ require 'ostruct'
4
+
5
+ class Bob
6
+ include SpecIDXML
7
+ def initialize(first, second)
8
+ @first = first ; @second = second
9
+ end
10
+ end
11
+
12
+
13
+ class SpecIDXMLTest < Test::Unit::TestCase
14
+ include SpecIDXML
15
+
16
+ def initialize(*args)
17
+ super(*args)
18
+ end
19
+
20
+ def test_short_element_xml_from_instance_vars
21
+ obj = Bob.new(1, 2)
22
+ st = obj.short_element_xml_from_instance_vars("bob")
23
+ assert_match(/second="2"/, st)
24
+ assert_match(/first="1"/, st)
25
+ assert_match(/^<bob /, st)
26
+ assert_match(/>$/, st)
27
+ end
28
+
29
+ def test_escape_special_chars
30
+ assert_equal("&amp;&gt;&lt;&quot;&apos;" , escape_special_chars("&><\"'"))
31
+ assert_equal("PE&amp;PT&gt;I&lt;D&quot;E&apos;", escape_special_chars("PE&PT>I<D\"E'"))
32
+ end
33
+
34
+ end
35
+
36
+