ms-sequest 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,6 +17,7 @@ require 'ms/sequest/params'
17
17
  module Ms ; end
18
18
  module Ms::Sequest ; end
19
19
 
20
+
20
21
  class Ms::Sequest::Srf
21
22
 
22
23
  class NoSequestParamsError < ArgumentError
@@ -350,8 +351,8 @@ class Ms::Sequest::Srf::Header
350
351
  :modifications => 456,
351
352
  }
352
353
 
353
- # a Ms::Sequest::Srf::DTAGen object
354
354
  attr_accessor :version
355
+ # a Ms::Sequest::Srf::DTAGen object
355
356
  attr_accessor :dta_gen
356
357
  attr_accessor :enzyme
357
358
  attr_accessor :ion_series
@@ -732,8 +733,8 @@ end
732
733
  class Ms::Sequest::SrfGroup
733
734
  include Ms::Id::SearchGroup
734
735
 
735
- # inherets an array of Ms::Sequest::Srf::Out::Pep objects
736
- # inherets an array of Ms::Sequest::Srf::Out::Prot objects
736
+ # inherits an array of Ms::Sequest::Srf::Out::Pep objects
737
+ # inherits an array of Ms::Sequest::Srf::Out::Prot objects
737
738
 
738
739
  # see Ms::Id::Search for acceptable arguments
739
740
  # (filename, filenames, array of objects)
@@ -44,7 +44,7 @@ module Ms
44
44
  # :zip requires gem rubyzip to be installed and is *very* bloated
45
45
  # as it writes out all the files first!
46
46
  # :tgz requires gem archive-tar-minitar to be installed
47
- def to_dta_files(out_folder=nil, compress=nil)
47
+ def to_dta(out_folder=nil, compress=nil)
48
48
  outdir =
49
49
  if out_folder ; out_folder
50
50
  else base_name
@@ -27,7 +27,7 @@ module Ms
27
27
  when 'mgf'
28
28
  srf.to_mgf(newfile)
29
29
  when 'dta'
30
- srf.to_dta_files(newfile)
30
+ srf.to_dta(newfile)
31
31
  end
32
32
  end
33
33
  end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+
3
+ if ARGV.size == 0
4
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
5
+ puts "outputs: <file>_NCBI.fasta ..."
6
+ puts ""
7
+ puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
8
+ puts "formatted fasta database header lines. This will change an"
9
+ puts "IPI format to an NCBI style format that Bioworks can read."
10
+ exit
11
+ end
12
+
13
+ ARGV.each do |file|
14
+ base = file.chomp(File.extname(file))
15
+ outfile = base + '_NCBI' + ".fasta"
16
+ File.open(outfile, 'w') do |out|
17
+ IO.foreach(file) do |line|
18
+ if line =~ /^>/
19
+ (codes, *description) = line[1..-1].split(" ")
20
+ description = description.join(" ")
21
+ code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
22
+ out.puts ">#{code_section} #{description}"
23
+ else
24
+ out.print line
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,153 @@
1
+
2
+ # TODO work on this guy!
3
+ =begin
4
+
5
+ require File.expand_path( File.dirname(__FILE__) + '/../tap_spec_helper' )
6
+
7
+ require 'spec_id'
8
+ require 'spec_id/bioworks'
9
+ #require 'benchmark'
10
+
11
+ describe Bioworks, 'set from an xml file' do
12
+ # NEED TO DEBUG THIS PROB!
13
+ it 'can set one with labeled proteins' do
14
+ file = Tfiles + "/bioworks_with_INV_small.xml"
15
+ obj = Bioworks.new(file)
16
+ obj.prots.size.should == 19
17
+ file = Tfiles + '/bioworks_small.xml'
18
+ obj = Bioworks.new(file)
19
+ obj.prots.size.should == 106
20
+ end
21
+
22
+ it 'can parse an xml file NOT derived from multi-concensus' do
23
+ tf_bioworks_single_xml_small = Tfiles + '/bioworks_single_run_small.xml'
24
+ obj = Bioworks.new(tf_bioworks_single_xml_small)
25
+ gfn = '5prot_mix_michrom_20fmol_200pmol'
26
+ origfilename = '5prot_mix_michrom_20fmol_200pmol.RAW'
27
+ origfilepath = 'C:\Xcalibur\sequest'
28
+ obj.global_filename.should == gfn
29
+ obj.origfilename.should == origfilename
30
+ obj.origfilepath.should == origfilepath
31
+ obj.prots.size.should == 7
32
+ obj.prots.first.peps.first.base_name.should == gfn
33
+ obj.prots.first.peps.first.file.should == "152"
34
+ obj.prots.first.peps.first.charge.should == 2
35
+ # @TODO: add more tests here
36
+ end
37
+
38
+ it 'can output in excel format (**semi-verified right now)' do
39
+ tf_bioworks_to_excel = Tfiles + '/tf_bioworks2excel.bioXML'
40
+ tf_bioworks_to_excel_actual = Tfiles + '/tf_bioworks2excel.txt.actual'
41
+ tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
42
+ bio = Bioworks.new(tf_bioworks_to_excel)
43
+ bio.to_excel(tmpfile)
44
+ tmpfile.exist_as_a_file?.should be_true
45
+ #File.should exist_as_a_file(tmpfile)
46
+ exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
47
+ act = _arr_of_arrs(tmpfile)
48
+ exp.each_index do |i|
49
+ break if i == 23 ## this is where the ordering becomes arbitrary between guys with the same scans, but different filenames
50
+ _assert_equal_pieces(exp[i], act[i], exp[i][0] =~ /\d/)
51
+ end
52
+
53
+ File.unlink tmpfile
54
+ end
55
+
56
+ # prot is boolean if this is a protein line!
57
+ def _assert_equal_pieces(exp, act, prot)
58
+ # equal as floats (by delta)
59
+ exp.each_index do |i|
60
+ if i == 5 # both prots and peps
61
+ act[i].to_f.should be_close(exp[i].to_f, 0.1)
62
+ elsif i == 3 && !prot
63
+ act[i].to_f.should be_close(exp[i].to_f, 0.01)
64
+ elsif i == 6 && !prot
65
+ act[i].to_f.should be_close(exp[i].to_f, 0.01)
66
+ elsif i == 9 && prot
67
+ ## NEED TO GET THESE BACK (for consistency):
68
+ #act[i].split(" ")[0].should =~ exp[i].split(" ")[0]
69
+ else
70
+ ## NEED TO GET THESE BACK (for consistency):
71
+ #act[i].should == exp[i]
72
+ end
73
+ end
74
+ end
75
+
76
+ # takes a bioworks excel (in txt format) and outputs an arr of arrs
77
+ def _arr_of_arrs(file)
78
+ IO.readlines(file).collect do |line|
79
+ line.chomp!
80
+ line.split("\t")
81
+ end
82
+ end
83
+
84
+ it 'can return unique peptides and proteins by sequence+charge (private)' do
85
+ cnt = 0
86
+ answer = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(3 PEPY), %w(2 PEPY)]
87
+ exp_peps = answer.collect! do |arr|
88
+ pep = Bioworks::Pep.new
89
+ pep.charge = arr[0]
90
+ pep.sequence = arr[1]
91
+ pep
92
+ end
93
+ exp_prots = [[0,2],[1,4,5],[3],[6]].collect do |arr|
94
+ arr.collect do |num|
95
+ prot = Bioworks::Prot.new
96
+ prot.reference = "#{num}"
97
+ prot
98
+ end
99
+ end
100
+ exp_peps = exp_peps.zip(exp_prots)
101
+ exp_peps.collect! do |both|
102
+ both[0].prots = [both[1]]
103
+ both[0]
104
+ end
105
+
106
+ peptides = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPTIDE), %w(3 PEPY), %w(3 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPY)].collect do |arr|
107
+ pep = Bioworks::Pep.new
108
+ pep.charge = arr[0]
109
+ pep.sequence = arr[1]
110
+ pep.prots = [Bioworks::Prot.new]
111
+ pep.prots.first.reference = "#{cnt}"
112
+ cnt += 1
113
+ pep
114
+ end
115
+ peptides, proteins = Bioworks.new._uniq_peps_by_sequence_charge(peptides)
116
+ proteins.size.should == peptides.size
117
+ exp_peps.each_with_index do |pep, i|
118
+ peptides[i].charge.should == pep.charge
119
+ peptides[i].sequence.should == pep.sequence
120
+ end
121
+
122
+ exp_prots.each_index do |i|
123
+ exp_prots[i].each_index do |j|
124
+ proteins[i][j].reference.should == exp_prots[i][j].reference
125
+ end
126
+ end
127
+ end
128
+
129
+ end
130
+
131
+ describe Bioworks::Pep do
132
+ it 'can be initialized from a hash' do
133
+ hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
134
+ pep = Bioworks::Pep.new(hash)
135
+ hash.each do |k,v|
136
+ pep.send(k).should == v
137
+ end
138
+ end
139
+
140
+ it 'correctly extracts file information' do
141
+ pep = Bioworks::Pep.new
142
+ testing = ['005a, 1131', '005b, 1131 - 1133', '1131', '1131 - 1133']
143
+ answers = [%w(005a 1131 1131), %w(005b 1131 1133), [nil, '1131', '1131'], [nil, '1131', '1133']]
144
+ testing.zip(answers) do |ar|
145
+ ans = pep.class.extract_file_info(ar[0])
146
+ ans.join(" ").should == ar[1].join(" ")
147
+ end
148
+ end
149
+
150
+ end
151
+
152
+
153
+ =end
@@ -0,0 +1,131 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+
3
+ require 'ms/sequest/params'
4
+
5
+ # returns a hash of all params
6
+ def simple_parse(filename)
7
+ hash = {}
8
+ IO.read(filename).split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
9
+ if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
10
+ hash[$1.dup] = $2.rstrip
11
+ end
12
+ end
13
+ hash
14
+ end
15
+
16
+ shared 'sequest params' do
17
+ before do
18
+ @obj = Ms::Sequest::Params.new(@file)
19
+ end
20
+
21
+ it 'has a method for every parameter in the file' do
22
+ hash = simple_parse(@file)
23
+ hash.each do |k,v|
24
+ @obj.send(k.to_sym).is v
25
+ end
26
+ end
27
+
28
+ it 'returns zero length string for params with no information' do
29
+ @obj.second_database_name.is ""
30
+ @obj.sequence_header_filter.is ""
31
+ end
32
+
33
+ it 'returns nil for params that do not exist and have no translation' do
34
+ @obj.google_plex.is nil
35
+ end
36
+
37
+ it 'provides consistent API between versions for important info' do
38
+ message = capture_stderr do
39
+ @api_hash.each do |k,v|
40
+ @obj.send(k).is v
41
+ end
42
+ end
43
+ end
44
+
45
+ it 'provides some backwards compatibility' do
46
+ @backwards_hash.each do |k,v|
47
+ @obj.send(k).is v
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+ describe 'sequest params v 3.1' do
54
+
55
+ @file = TESTFILES + '/bioworks31.params'
56
+ @api_hash = {
57
+ :version => '3.1',
58
+ :enzyme => 'Trypsin',
59
+ :database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
60
+ :enzyme_specificity => [1, 'KR', ''],
61
+ :precursor_mass_type => "average",
62
+ :fragment_mass_type => "average",
63
+ :min_number_termini => '1',
64
+ }
65
+
66
+ @backwards_hash = {
67
+ :max_num_internal_cleavages => '2',
68
+ :fragment_ion_tol => '0.0000',
69
+ }
70
+
71
+ behaves_like 'sequest params'
72
+ end
73
+
74
+ describe 'sequest params v 3.2' do
75
+ @file = TESTFILES + '/bioworks32.params'
76
+ @api_hash = {
77
+ :version => '3.2',
78
+ :enzyme => 'Trypsin',
79
+ :database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
80
+ :enzyme_specificity => [1, 'KR', 'P'],
81
+ :precursor_mass_type => "average",
82
+ :fragment_mass_type => "average",
83
+ :min_number_termini => '2',
84
+ }
85
+
86
+ @backwards_hash = {
87
+ :max_num_internal_cleavages => '2',
88
+ :fragment_ion_tol => '1.0000',
89
+ }
90
+
91
+ behaves_like 'sequest params'
92
+ end
93
+
94
+ describe 'sequest params v 3.3' do
95
+ @file = TESTFILES + '/bioworks33.params'
96
+ @api_hash = {
97
+ :version => '3.3',
98
+ :enzyme => 'Trypsin',
99
+ :database => "C:\\Xcalibur\\database\\yeast.fasta",
100
+ :enzyme_specificity => [1, 'KR', ''],
101
+ :precursor_mass_type => "monoisotopic",
102
+ :fragment_mass_type => "monoisotopic",
103
+ :min_number_termini => '2',
104
+ }
105
+
106
+ @backwards_hash = {
107
+ :max_num_internal_cleavages => '2',
108
+ :fragment_ion_tol => '1.0000',
109
+ }
110
+ behaves_like 'sequest params'
111
+ end
112
+
113
+ describe 'sequest params v 3.2 from srf' do
114
+ @file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
115
+ @api_hash = {
116
+ :version => '3.2',
117
+ :enzyme => 'Trypsin',
118
+ :database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
119
+ :enzyme_specificity => [1, 'KR', 'P'],
120
+ :precursor_mass_type => "average",
121
+ :fragment_mass_type => "average",
122
+ :min_number_termini => '2',
123
+ }
124
+
125
+ @backwards_hash = {
126
+ :max_num_internal_cleavages => '2',
127
+ :fragment_ion_tol => '1.0000',
128
+ }
129
+ behaves_like 'sequest params'
130
+ end
131
+
@@ -0,0 +1,376 @@
1
+
2
+ =begin
3
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
4
+
5
+ require 'spec_id'
6
+ require 'spec_id/sequest/pepxml'
7
+ #require 'ms/mzxml'
8
+
9
+
10
+ NODELETE = false
11
+
12
+ describe Sequest::PepXML, " created from small bioworks.xml" do
13
+
14
+ spec_large do
15
+ before(:all) do
16
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
17
+
18
+ tf_params = Tfiles + "/bioworks32.params"
19
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
20
+ out_path = Tfiles
21
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
22
+ end
23
+
24
+ it 'gets some spectrum queries' do
25
+ @pepxml_objs.each do |obj|
26
+ (obj.spectrum_queries.size > 2).should be_true
27
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
28
+ end
29
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+
36
+ describe Sequest::PepXML, " created from large bioworks.xml" do
37
+ # assert_equal_by_pairs (really any old array)
38
+ def assert_equal_pairs(obj, arrs)
39
+ arrs.each do |arr|
40
+ #if obj.send(arr[1]) != arr[0]
41
+ # puts "HELLO"
42
+ # puts "OBJ answer"
43
+ # p obj.send(arr[1])
44
+ # puts "ar0"
45
+ # p arr[0]
46
+ # puts "ar1"
47
+ # p arr[1]
48
+ #end
49
+ if arr[0].is_a? Float
50
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
51
+ else
52
+ obj.send(arr[1]).should == arr[0]
53
+ end
54
+ end
55
+ end
56
+
57
+ #swap the first to guys first
58
+ def assert_equal_pairs_swapped(obj, arrs)
59
+ arrs.each do |arr|
60
+ arr[0], arr[1] = arr[1], arr[0]
61
+ end
62
+ assert_equal_pairs(obj, arrs)
63
+ end
64
+
65
+ spec_large do
66
+ before(:all) do
67
+ st = Time.new
68
+ params = Tfiles + "/opd1/sequest.3.2.params"
69
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
70
+ mzxml_path = Tfiles_l + "/opd1"
71
+ out_path = Tfiles
72
+ @pepxml_version = 18
73
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
74
+ puts "- takes #{Time.new - st} secs"
75
+ end
76
+
77
+ it 'extracts MSMSPipelineAnalysis' do
78
+ ######## HMMMMM...
79
+ Sequest::PepXML.pepxml_version.should == @pepxml_version
80
+
81
+ # MSMSPipelineAnalysis
82
+ po = @pepxml_objs.first
83
+ msms_pipeline = po.msms_pipeline_analysis
84
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
85
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
86
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
87
+ msms_pipeline.summary_xml.should == '000.xml'
88
+ end
89
+
90
+ it 'extracts MSmSRunSummary' do
91
+ # MSMSRunSummary
92
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
93
+ rs.base_name.should =~ /\/000/
94
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
95
+ end
96
+
97
+ it 'extracts SampleEnzyme' do
98
+ # SampleEnzyme
99
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
100
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
101
+ end
102
+
103
+ it 'extracts SearchSummary' do
104
+ # SearchSummary
105
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
106
+ ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
107
+ ss.base_name.should =~ /\/000/
108
+ ss.peptide_mass_tol.should =~ /1\.500/
109
+ assert_equal_pairs_swapped(ss, [ # normal attributes
110
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
111
+
112
+ # enzymatic_search_constraint
113
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
114
+
115
+ # parameters
116
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
117
+ ])
118
+
119
+ end
120
+ it 'extracts SearchDatabase' do
121
+ # SearchDatabase
122
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
123
+ sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
124
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
125
+ end
126
+
127
+ it 'returns SpectrumQueries' do
128
+ # SpectrumQueries
129
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
130
+ spec = sq.first
131
+ assert_equal_pairs_swapped(spec, [
132
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
133
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
134
+ [:precursor_neutral_mass, 1074.666926], # mine
135
+ [:assumed_charge, 1], [:index, "1"],
136
+ ])
137
+ sh = spec.search_results.first.search_hits.first
138
+ assert_equal_pairs_swapped(sh, [
139
+ # normal attributes
140
+ [:hit_rank, 1],
141
+ [:peptide, "SIYFRNFK"],
142
+ [:peptide_prev_aa, "R"],
143
+ [:peptide_next_aa, "G"],
144
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
145
+ [:num_tot_proteins, 1],
146
+ [:num_matched_ions, 4],
147
+ [:tot_num_ions, 14],
148
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
149
+ [:calc_neutral_pep_mass, 1074.23261], # mine
150
+ #[:massdiff, "+0.400000"], # out2summary
151
+ [:massdiff, 0.434316000000081], # mine
152
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
153
+
154
+ # search_score
155
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
156
+ ])
157
+
158
+ spec = sq[1]
159
+ assert_equal_pairs_swapped(spec, [
160
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
161
+ [:precursor_neutral_mass, 663.206111], # mine
162
+ [:assumed_charge, 1], [:index, "2"],
163
+ ])
164
+
165
+ sh = spec.search_results.first.search_hits.first
166
+ assert_equal_pairs_swapped(sh, [
167
+ # normal attributes
168
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
169
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
170
+ #[:massdiff, "-0.600000"], # out2summary
171
+ [:massdiff, -0.556499000000031], # mine
172
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
173
+ [:calc_neutral_pep_mass, 663.76261], # mine
174
+
175
+ # search_score
176
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
177
+ ])
178
+
179
+ spec = sq[9]
180
+ assert_equal_pairs_swapped(spec, [
181
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
182
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
183
+ [:precursor_neutral_mass, 691.150992], # mine
184
+ ])
185
+
186
+ sh = spec.search_results.first.search_hits.first
187
+ assert_equal_pairs_swapped(sh, [
188
+ # normal attributes
189
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
190
+
191
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
192
+ [:num_missed_cleavages, 1],
193
+ [:is_rejected, 0],
194
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
195
+ [:calc_neutral_pep_mass, 691.82261], # mine
196
+ #[:massdiff, "-0.700000"], # out2summary
197
+ [:massdiff, -0.67161800000008], # mine
198
+
199
+ # search_score
200
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
201
+ ])
202
+ end
203
+
204
+ it 'can generate correct pepxml file' do
205
+
206
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
207
+ string = @pepxml_objs.first.to_pepxml
208
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
209
+ base_name_re = /base_name=".*?files\//o
210
+ date_re = /date=".*?"/
211
+ string.split("\n").each_with_index do |line,i|
212
+ if i > 99 ; break end
213
+ ans, exp =
214
+ if i == 1
215
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
216
+ elsif i == 2
217
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
218
+ elsif i == 6
219
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
220
+ else
221
+ [line, ans_lines[i]]
222
+ end
223
+
224
+ #ans.split('').zip(exp.split('')) do |l,a|
225
+ # if l != a
226
+ # puts line
227
+ # puts ans_lines[i]
228
+ # puts l
229
+ # puts a
230
+ # end
231
+ #end
232
+ if ans != exp
233
+ puts ans
234
+ puts exp
235
+ end
236
+ ans.should == exp
237
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
238
+ end
239
+ end
240
+ end
241
+ end
242
+
243
+
244
+
245
+ describe Sequest::PepXML::Modifications do
246
+ before(:each) do
247
+ tf_params = Tfiles + "/bioworks32.params"
248
+ @params = Sequest::Params.new(tf_params)
249
+ # The params object here is completely unnecessary for this test, except
250
+ # that it sets up the mass table
251
+ @obj = Sequest::PepXML::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
252
+ end
253
+ it 'creates a mod_symbols_hash' do
254
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
255
+ @obj.mod_symbols_hash.should == answ
256
+ ## need more here
257
+ end
258
+
259
+ it 'creates a ModificationInfo object given a special peptide sequence' do
260
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
261
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
262
+ @params.term_diff_search_options = "14.20000 12.33000"
263
+ mod = Sequest::PepXML::Modifications.new(@params, mod_string)
264
+ ## no mods
265
+ peptide = "PEPTIDE"
266
+ mod.modification_info(peptide).should be_nil
267
+ peptide = "]M*EC^S@IDM#M*EMSCM["
268
+ modinfo = mod.modification_info(peptide)
269
+ modinfo.modified_peptide.should == peptide
270
+ modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
271
+ modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
272
+ end
273
+
274
+ end
275
+
276
+ describe Sequest::PepXML::SearchHit::ModificationInfo do
277
+
278
+ before(:each) do
279
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
280
+ Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
281
+ end
282
+ hash = {
283
+ :mod_nterm_mass => 520.2,
284
+ :modified_peptide => "MOD*IFI^E&D",
285
+ :mod_aminoacid_masses => modaaobjs,
286
+ }
287
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
288
+ @obj = Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
289
+ end
290
+
291
+ def _re(st)
292
+ /#{Regexp.escape(st)}/
293
+ end
294
+
295
+ it 'can produce pepxml' do
296
+ answ = @obj.to_pepxml
297
+ answ.should =~ _re('<modification_info')
298
+ answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
299
+ answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&amp;D\"")
300
+ answ.should =~ _re("<mod_aminoacid_mass")
301
+ answ.should =~ _re(" position=\"3\"")
302
+ answ.should =~ _re(" mass=\"150.3\"")
303
+ answ.should =~ _re(" position=\"6\"")
304
+ answ.should =~ _re(" mass=\"345.2\"")
305
+ answ.should =~ _re("</modification_info>")
306
+ end
307
+ end
308
+
309
+ describe 'bioworks file with modifications transformed into pepxml' do
310
+
311
+ spec_large do
312
+ before(:all) do
313
+ modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
314
+ modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
315
+ @srgfile = modfiles_sequest_dir + 'tmp.srg'
316
+ @out_path = modfiles_sequest_dir + 'pepxml'
317
+ modfiles = %w(020 040).map do |file|
318
+ modfiles_sequest_dir + file + ".srf"
319
+ end
320
+ objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
321
+ @out_files = %w(020 040).map do |file|
322
+ @out_path + '/' + file + '.xml'
323
+ end
324
+ end
325
+
326
+ after(:all) do
327
+ File.unlink(@srgfile) unless NODELETE
328
+ FileUtils.rm_r(@out_path)
329
+ #@out_files.each do |fn|
330
+ # File.unlink(fn) unless NODELETE
331
+ #end
332
+ end
333
+
334
+ # splits string on ' 'and matches the line found by find_line_regexp in
335
+ # lines
336
+ def match_modline_pieces(lines, find_line_regexp, string)
337
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
338
+ lines.each do |line|
339
+ if line =~ find_line_regexp
340
+ pieces.each do |piece|
341
+ line.should =~ piece
342
+ end
343
+ end
344
+ end
345
+ end
346
+
347
+ it 'gets modifications right in real run' do
348
+ @out_files.each do |fn|
349
+ fn.exist_as_a_file?.should be_true
350
+ beginning = IO.read(fn)
351
+ lines = beginning.split("\n")
352
+ [
353
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
354
+
355
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
356
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
357
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
358
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
359
+ ].each do |a,b|
360
+ match_modline_pieces(lines, a, b)
361
+ end
362
+ [
363
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
364
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
365
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
366
+ '</modification_info>',
367
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
368
+ ].each do |line|
369
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
370
+ end
371
+ end
372
+ end
373
+ end
374
+ end
375
+
376
+ =end