mspire-sequest 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
if ARGV.size == 0
|
4
|
+
puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
|
5
|
+
puts "outputs: <file>_NCBI.fasta ..."
|
6
|
+
puts ""
|
7
|
+
puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
|
8
|
+
puts "formatted fasta database header lines. This will change an"
|
9
|
+
puts "IPI format to an NCBI style format that Bioworks can read."
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
ARGV.each do |file|
|
14
|
+
base = file.chomp(File.extname(file))
|
15
|
+
outfile = base + '_NCBI' + ".fasta"
|
16
|
+
File.open(outfile, 'w') do |out|
|
17
|
+
IO.foreach(file) do |line|
|
18
|
+
if line =~ /^>/
|
19
|
+
(codes, *description) = line[1..-1].split(" ")
|
20
|
+
description = description.join(" ")
|
21
|
+
code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
|
22
|
+
out.puts ">#{code_section} #{description}"
|
23
|
+
else
|
24
|
+
out.print line
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/params'
|
4
|
+
|
5
|
+
# returns a hash of all params
|
6
|
+
def simple_parse(filename)
|
7
|
+
hash = {}
|
8
|
+
data = File.open(filename) do |io|
|
9
|
+
# this makes it work with ruby 1.9:
|
10
|
+
io.set_encoding("ASCII-8BIT") if io.respond_to?(:set_encoding)
|
11
|
+
io.read
|
12
|
+
end
|
13
|
+
data.split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
|
14
|
+
if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
|
15
|
+
hash[$1.dup] = $2.rstrip
|
16
|
+
end
|
17
|
+
end
|
18
|
+
hash
|
19
|
+
end
|
20
|
+
|
21
|
+
shared_examples_for 'sequest params' do |params_file, api_hash, backwards_hash|
|
22
|
+
|
23
|
+
subject { Mspire::Sequest::Params.new(params_file) }
|
24
|
+
|
25
|
+
it 'has a method for every parameter in the file' do
|
26
|
+
hash = simple_parse(params_file)
|
27
|
+
hash.each do |k,v|
|
28
|
+
subject.send(k.to_sym).should == v
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns zero length string for params with no information' do
|
33
|
+
subject.second_database_name.should == ""
|
34
|
+
subject.sequence_header_filter.should == ""
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns nil for params that do not exist and have no translation' do
|
38
|
+
subject.google_plex.should == nil
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'provides consistent API between versions for important info' do
|
42
|
+
message = capture_stderr do
|
43
|
+
api_hash.each do |k,v|
|
44
|
+
subject.send(k).should == v
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'provides some backwards compatibility' do
|
50
|
+
backwards_hash.each do |k,v|
|
51
|
+
subject.send(k).should == v
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
describe 'sequest params v 3.1' do
|
58
|
+
|
59
|
+
file = TESTFILES + '/bioworks31.params'
|
60
|
+
api_hash = {
|
61
|
+
:version => '3.1',
|
62
|
+
:enzyme => 'Trypsin',
|
63
|
+
:database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
|
64
|
+
:enzyme_specificity => [1, 'KR', ''],
|
65
|
+
:precursor_mass_type => "average",
|
66
|
+
:fragment_mass_type => "average",
|
67
|
+
:min_number_termini => '1',
|
68
|
+
}
|
69
|
+
|
70
|
+
backwards_hash = {
|
71
|
+
:max_num_internal_cleavages => '2',
|
72
|
+
:fragment_ion_tol => '0.0000',
|
73
|
+
}
|
74
|
+
|
75
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
76
|
+
end
|
77
|
+
|
78
|
+
describe 'sequest params v 3.2' do
|
79
|
+
file = TESTFILES + '/bioworks32.params'
|
80
|
+
api_hash = {
|
81
|
+
:version => '3.2',
|
82
|
+
:enzyme => 'Trypsin',
|
83
|
+
:database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
|
84
|
+
:enzyme_specificity => [1, 'KR', 'P'],
|
85
|
+
:precursor_mass_type => "average",
|
86
|
+
:fragment_mass_type => "average",
|
87
|
+
:min_number_termini => '2',
|
88
|
+
}
|
89
|
+
|
90
|
+
backwards_hash = {
|
91
|
+
:max_num_internal_cleavages => '2',
|
92
|
+
:fragment_ion_tol => '1.0000',
|
93
|
+
}
|
94
|
+
|
95
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
96
|
+
end
|
97
|
+
|
98
|
+
describe 'sequest params v 3.3' do
|
99
|
+
file = TESTFILES + '/bioworks33.params'
|
100
|
+
api_hash = {
|
101
|
+
:version => '3.3',
|
102
|
+
:enzyme => 'Trypsin',
|
103
|
+
:database => "C:\\Xcalibur\\database\\yeast.fasta",
|
104
|
+
:enzyme_specificity => [1, 'KR', ''],
|
105
|
+
:precursor_mass_type => "monoisotopic",
|
106
|
+
:fragment_mass_type => "monoisotopic",
|
107
|
+
:min_number_termini => '2',
|
108
|
+
}
|
109
|
+
|
110
|
+
backwards_hash = {
|
111
|
+
:max_num_internal_cleavages => '2',
|
112
|
+
:fragment_ion_tol => '1.0000',
|
113
|
+
}
|
114
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
115
|
+
end
|
116
|
+
|
117
|
+
describe 'sequest params v 3.2 from srf' do
|
118
|
+
file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
|
119
|
+
api_hash = {
|
120
|
+
:version => '3.2',
|
121
|
+
:enzyme => 'Trypsin',
|
122
|
+
:database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
|
123
|
+
:enzyme_specificity => [1, 'KR', 'P'],
|
124
|
+
:precursor_mass_type => "average",
|
125
|
+
:fragment_mass_type => "average",
|
126
|
+
:min_number_termini => '2',
|
127
|
+
}
|
128
|
+
|
129
|
+
backwards_hash = {
|
130
|
+
:max_num_internal_cleavages => '2',
|
131
|
+
:fragment_ion_tol => '1.0000',
|
132
|
+
}
|
133
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
134
|
+
end
|
135
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/params'
|
4
|
+
require 'mspire/sequest/pepxml/modifications'
|
5
|
+
|
6
|
+
describe 'Mspire::Sequest::Pepxml::Modifications' do
|
7
|
+
before do
|
8
|
+
tf_params = TESTFILES + "/bioworks32.params"
|
9
|
+
@params = Mspire::Sequest::Params.new(tf_params)
|
10
|
+
# The params object here is completely unnecessary for this test, except
|
11
|
+
# that it sets up the mass table
|
12
|
+
@obj = Mspire::Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
13
|
+
end
|
14
|
+
it 'creates a mod_symbols_hash' do
|
15
|
+
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
16
|
+
@obj.mod_symbols_hash.should == answ
|
17
|
+
## need more here
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'creates a ModificationInfo object given a special peptide sequence' do
|
21
|
+
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
22
|
+
@params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
23
|
+
@params.term_diff_search_options = "14.20000 12.33000"
|
24
|
+
mod = Mspire::Sequest::Pepxml::Modifications.new(@params, mod_string)
|
25
|
+
## no mods
|
26
|
+
peptide_nomod = "PEPTIDE"
|
27
|
+
mod.modification_info(peptide_nomod).should be_nil
|
28
|
+
peptide_mod = "]M*EC^S@IDM#M*EMSCM["
|
29
|
+
modinfo = mod.modification_info(peptide_mod)
|
30
|
+
|
31
|
+
xml_string = modinfo.to_xml
|
32
|
+
xml_string.should match( /<mod_aminoacid_mass / )
|
33
|
+
xml_string.should match( /mod_nterm_mass=/ )
|
34
|
+
xml_string.should match( /mod_cterm_mass=/ )
|
35
|
+
xml_string.should match( /modified_peptide=/ )
|
36
|
+
|
37
|
+
modinfo.mod_aminoacid_masses.size.should == 5
|
38
|
+
mod_aa_masses = modinfo.mod_aminoacid_masses
|
39
|
+
# positions are verified, masses are just frozen
|
40
|
+
[1,3,4,7,8].zip([147.09606, 115.1429, 167.0772999, 160.19606, 147.09606], mod_aa_masses) do |pos, mass, obj|
|
41
|
+
obj.position.should == pos
|
42
|
+
obj.mass.should be_within(0.0001).of(mass)
|
43
|
+
end
|
44
|
+
# These values are just frozen and not independently verified yet
|
45
|
+
modinfo.mod_nterm_mass.should be_within(0.0001).of(146.4033)
|
46
|
+
modinfo.mod_cterm_mass.should be_within(0.0001).of(160.5334)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
@@ -0,0 +1,311 @@
|
|
1
|
+
|
2
|
+
=begin
|
3
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
4
|
+
|
5
|
+
require 'spec_id'
|
6
|
+
require 'spec_id/sequest/pepxml'
|
7
|
+
#require 'mspire/mzxml'
|
8
|
+
|
9
|
+
|
10
|
+
NODELETE = false
|
11
|
+
|
12
|
+
describe Sequest::PepXML, " created from small bioworks.xml" do
|
13
|
+
|
14
|
+
spec_large do
|
15
|
+
before(:all) do
|
16
|
+
tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
|
17
|
+
|
18
|
+
tf_params = Tfiles + "/bioworks32.params"
|
19
|
+
tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
|
20
|
+
out_path = Tfiles
|
21
|
+
@pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'gets some spectrum queries' do
|
25
|
+
@pepxml_objs.each do |obj|
|
26
|
+
(obj.spectrum_queries.size > 2).should be_true
|
27
|
+
(obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
|
28
|
+
end
|
29
|
+
#@pepxml_objs.each do |pep| puts pep.to_pepxml end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
describe Sequest::PepXML, " created from large bioworks.xml" do
|
37
|
+
# assert_equal_by_pairs (really any old array)
|
38
|
+
def assert_equal_pairs(obj, arrs)
|
39
|
+
arrs.each do |arr|
|
40
|
+
#if obj.send(arr[1]) != arr[0]
|
41
|
+
# puts "HELLO"
|
42
|
+
# puts "OBJ answer"
|
43
|
+
# p obj.send(arr[1])
|
44
|
+
# puts "ar0"
|
45
|
+
# p arr[0]
|
46
|
+
# puts "ar1"
|
47
|
+
# p arr[1]
|
48
|
+
#end
|
49
|
+
if arr[0].is_a? Float
|
50
|
+
obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
|
51
|
+
else
|
52
|
+
obj.send(arr[1]).should == arr[0]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
#swap the first to guys first
|
58
|
+
def assert_equal_pairs_swapped(obj, arrs)
|
59
|
+
arrs.each do |arr|
|
60
|
+
arr[0], arr[1] = arr[1], arr[0]
|
61
|
+
end
|
62
|
+
assert_equal_pairs(obj, arrs)
|
63
|
+
end
|
64
|
+
|
65
|
+
spec_large do
|
66
|
+
before(:all) do
|
67
|
+
st = Time.new
|
68
|
+
params = Tfiles + "/opd1/sequest.3.2.params"
|
69
|
+
bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
|
70
|
+
mzxml_path = Tfiles_l + "/opd1"
|
71
|
+
out_path = Tfiles
|
72
|
+
@pepxml_version = 18
|
73
|
+
@pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
|
74
|
+
puts "- takes #{Time.new - st} secs"
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'extracts MSMSPipelineAnalysis' do
|
78
|
+
######## HMMMMM...
|
79
|
+
Sequest::PepXML.pepxml_version.should == @pepxml_version
|
80
|
+
|
81
|
+
# MSMSPipelineAnalysis
|
82
|
+
po = @pepxml_objs.first
|
83
|
+
msms_pipeline = po.msms_pipeline_analysis
|
84
|
+
msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
|
85
|
+
msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
|
86
|
+
msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
|
87
|
+
msms_pipeline.summary_xml.should == '000.xml'
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'extracts MSmSRunSummary' do
|
91
|
+
# MSMSRunSummary
|
92
|
+
rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
|
93
|
+
rs.base_name.should =~ /\/000/
|
94
|
+
assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'extracts SampleEnzyme' do
|
98
|
+
# SampleEnzyme
|
99
|
+
se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
|
100
|
+
assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'extracts SearchSummary' do
|
104
|
+
# SearchSummary
|
105
|
+
ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
|
106
|
+
ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
|
107
|
+
ss.base_name.should =~ /\/000/
|
108
|
+
ss.peptide_mass_tol.should =~ /1\.500/
|
109
|
+
assert_equal_pairs_swapped(ss, [ # normal attributes
|
110
|
+
[:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
|
111
|
+
|
112
|
+
# enzymatic_search_constraint
|
113
|
+
[:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
|
114
|
+
|
115
|
+
# parameters
|
116
|
+
[:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
|
117
|
+
])
|
118
|
+
|
119
|
+
end
|
120
|
+
it 'extracts SearchDatabase' do
|
121
|
+
# SearchDatabase
|
122
|
+
sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
|
123
|
+
sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
|
124
|
+
assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'returns SpectrumQueries' do
|
128
|
+
# SpectrumQueries
|
129
|
+
sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
|
130
|
+
spec = sq.first
|
131
|
+
assert_equal_pairs_swapped(spec, [
|
132
|
+
[:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
|
133
|
+
#[:precursor_neutral_mass, "1074.5920"], # out2summary
|
134
|
+
[:precursor_neutral_mass, 1074.666926], # mine
|
135
|
+
[:assumed_charge, 1], [:index, "1"],
|
136
|
+
])
|
137
|
+
sh = spec.search_results.first.search_hits.first
|
138
|
+
assert_equal_pairs_swapped(sh, [
|
139
|
+
# normal attributes
|
140
|
+
[:hit_rank, 1],
|
141
|
+
[:peptide, "SIYFRNFK"],
|
142
|
+
[:peptide_prev_aa, "R"],
|
143
|
+
[:peptide_next_aa, "G"],
|
144
|
+
[:protein, "gi|16130084|ref|NP_416651.1|"],
|
145
|
+
[:num_tot_proteins, 1],
|
146
|
+
[:num_matched_ions, 4],
|
147
|
+
[:tot_num_ions, 14],
|
148
|
+
#[:calc_neutral_pep_mass, "1074.1920"], # out2summary
|
149
|
+
[:calc_neutral_pep_mass, 1074.23261], # mine
|
150
|
+
#[:massdiff, "+0.400000"], # out2summary
|
151
|
+
[:massdiff, 0.434316000000081], # mine
|
152
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
|
153
|
+
|
154
|
+
# search_score
|
155
|
+
[:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
|
156
|
+
])
|
157
|
+
|
158
|
+
spec = sq[1]
|
159
|
+
assert_equal_pairs_swapped(spec, [
|
160
|
+
[:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
|
161
|
+
[:precursor_neutral_mass, 663.206111], # mine
|
162
|
+
[:assumed_charge, 1], [:index, "2"],
|
163
|
+
])
|
164
|
+
|
165
|
+
sh = spec.search_results.first.search_hits.first
|
166
|
+
assert_equal_pairs_swapped(sh, [
|
167
|
+
# normal attributes
|
168
|
+
[:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
|
169
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
|
170
|
+
#[:massdiff, "-0.600000"], # out2summary
|
171
|
+
[:massdiff, -0.556499000000031], # mine
|
172
|
+
#[:calc_neutral_pep_mass, 663.7920], # out2summary
|
173
|
+
[:calc_neutral_pep_mass, 663.76261], # mine
|
174
|
+
|
175
|
+
# search_score
|
176
|
+
[:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
|
177
|
+
])
|
178
|
+
|
179
|
+
spec = sq[9]
|
180
|
+
assert_equal_pairs_swapped(spec, [
|
181
|
+
[:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
|
182
|
+
#[:precursor_neutral_mass, "691.0920"], # out2summary
|
183
|
+
[:precursor_neutral_mass, 691.150992], # mine
|
184
|
+
])
|
185
|
+
|
186
|
+
sh = spec.search_results.first.search_hits.first
|
187
|
+
assert_equal_pairs_swapped(sh, [
|
188
|
+
# normal attributes
|
189
|
+
[:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
|
190
|
+
|
191
|
+
#[:num_missed_cleavages, "0"], # out2summary misses this!
|
192
|
+
[:num_missed_cleavages, 1],
|
193
|
+
[:is_rejected, 0],
|
194
|
+
#[:calc_neutral_pep_mass, "691.7920"], # out2summary
|
195
|
+
[:calc_neutral_pep_mass, 691.82261], # mine
|
196
|
+
#[:massdiff, "-0.700000"], # out2summary
|
197
|
+
[:massdiff, -0.67161800000008], # mine
|
198
|
+
|
199
|
+
# search_score
|
200
|
+
[:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
|
201
|
+
])
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'can generate correct pepxml file' do
|
205
|
+
|
206
|
+
## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
|
207
|
+
string = @pepxml_objs.first.to_pepxml
|
208
|
+
ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
|
209
|
+
base_name_re = /base_name=".*?files\//o
|
210
|
+
date_re = /date=".*?"/
|
211
|
+
string.split("\n").each_with_index do |line,i|
|
212
|
+
if i > 99 ; break end
|
213
|
+
ans, exp =
|
214
|
+
if i == 1
|
215
|
+
[line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
|
216
|
+
elsif i == 2
|
217
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
|
218
|
+
elsif i == 6
|
219
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
|
220
|
+
else
|
221
|
+
[line, ans_lines[i]]
|
222
|
+
end
|
223
|
+
|
224
|
+
#ans.split('').zip(exp.split('')) do |l,a|
|
225
|
+
# if l != a
|
226
|
+
# puts line
|
227
|
+
# puts ans_lines[i]
|
228
|
+
# puts l
|
229
|
+
# puts a
|
230
|
+
# end
|
231
|
+
#end
|
232
|
+
if ans != exp
|
233
|
+
puts ans
|
234
|
+
puts exp
|
235
|
+
end
|
236
|
+
ans.should == exp
|
237
|
+
#line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
describe 'bioworks file with modifications transformed into pepxml' do
|
245
|
+
|
246
|
+
spec_large do
|
247
|
+
before(:all) do
|
248
|
+
modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
|
249
|
+
modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
|
250
|
+
@srgfile = modfiles_sequest_dir + 'tmp.srg'
|
251
|
+
@out_path = modfiles_sequest_dir + 'pepxml'
|
252
|
+
modfiles = %w(020 040).map do |file|
|
253
|
+
modfiles_sequest_dir + file + ".srf"
|
254
|
+
end
|
255
|
+
objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
|
256
|
+
@out_files = %w(020 040).map do |file|
|
257
|
+
@out_path + '/' + file + '.xml'
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
after(:all) do
|
262
|
+
File.unlink(@srgfile) unless NODELETE
|
263
|
+
FileUtils.rm_r(@out_path)
|
264
|
+
#@out_files.each do |fn|
|
265
|
+
# File.unlink(fn) unless NODELETE
|
266
|
+
#end
|
267
|
+
end
|
268
|
+
|
269
|
+
# splits string on ' 'and matches the line found by find_line_regexp in
|
270
|
+
# lines
|
271
|
+
def match_modline_pieces(lines, find_line_regexp, string)
|
272
|
+
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
273
|
+
lines.each do |line|
|
274
|
+
if line =~ find_line_regexp
|
275
|
+
pieces.each do |piece|
|
276
|
+
line.should =~ piece
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
it 'gets modifications right in real run' do
|
283
|
+
@out_files.each do |fn|
|
284
|
+
fn.exist_as_a_file?.should be_true
|
285
|
+
beginning = IO.read(fn)
|
286
|
+
lines = beginning.split("\n")
|
287
|
+
[
|
288
|
+
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
289
|
+
|
290
|
+
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
291
|
+
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
292
|
+
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
293
|
+
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
294
|
+
].each do |a,b|
|
295
|
+
match_modline_pieces(lines, a, b)
|
296
|
+
end
|
297
|
+
[
|
298
|
+
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
299
|
+
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
300
|
+
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
301
|
+
'</modification_info>',
|
302
|
+
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
303
|
+
].each do |line|
|
304
|
+
beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
=end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/sqt_spec_helper'
|
4
|
+
require 'mspire/sequest/sqt'
|
5
|
+
|
6
|
+
describe 'reading a small sqt file' do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
file = TESTFILES + '/small.sqt'
|
10
|
+
@sqt = Mspire::Sequest::Sqt.new(file)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'can access header entries like a hash' do
|
14
|
+
header = @sqt.header
|
15
|
+
HeaderHash.each do |k,v|
|
16
|
+
header[k].should == v
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'can access header entries with methods' do
|
21
|
+
header = @sqt.header
|
22
|
+
# for example:
|
23
|
+
header.database.should == HeaderHash['Database']
|
24
|
+
# all working:
|
25
|
+
HeaderHash.each do |k,v|
|
26
|
+
header.send(Mspire::Sequest::Sqt::Header::KeysToAtts[k]).should == v
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'has spectra, matches, and loci' do
|
32
|
+
svt = @sqt.spectra[16]
|
33
|
+
reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
|
34
|
+
[:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
|
35
|
+
TestSpectra[key].each do |k,v|
|
36
|
+
if v.is_a? Float
|
37
|
+
reply[key].send(k).should be_within(0.0000000001).of(v)
|
38
|
+
else
|
39
|
+
next if key == :last_match_17_last_loci
|
40
|
+
#p k
|
41
|
+
#p v
|
42
|
+
reply[key].send(k).should == v
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
@sqt.spectra[16].matches.first.loci.size.should == 1
|
47
|
+
@sqt.spectra[16].matches.last.loci.size.should == 1
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
HeaderHash = {}
|
4
|
+
header_doublets = [
|
5
|
+
%w(SQTGenerator mspire),
|
6
|
+
%w(SQTGeneratorVersion 0.3.1),
|
7
|
+
%w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
|
8
|
+
%w(FragmentMasses AVG),
|
9
|
+
%w(PrecursorMasses AVG),
|
10
|
+
['StartTime', ''],
|
11
|
+
['Alg-MSModel', 'LCQ Deca XP'],
|
12
|
+
%w(DBLocusCount 4237),
|
13
|
+
%w(Alg-FragMassTol 1.0000),
|
14
|
+
%w(Alg-PreMassTol 25.0000),
|
15
|
+
['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
|
16
|
+
%w(Alg-PreMassUnits ppm),
|
17
|
+
['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
|
18
|
+
|
19
|
+
['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
|
20
|
+
['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
|
21
|
+
['StaticMod', []],
|
22
|
+
].each do |double|
|
23
|
+
HeaderHash[double[0]] = double[1]
|
24
|
+
end
|
25
|
+
|
26
|
+
TestSpectra = {
|
27
|
+
:first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
|
28
|
+
:last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
|
29
|
+
:seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
|
30
|
+
:first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
|
31
|
+
:last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
|
32
|
+
:last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
|
33
|
+
}
|
34
|
+
|