mspire-sequest 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
if ARGV.size == 0
|
4
|
+
puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
|
5
|
+
puts "outputs: <file>_NCBI.fasta ..."
|
6
|
+
puts ""
|
7
|
+
puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
|
8
|
+
puts "formatted fasta database header lines. This will change an"
|
9
|
+
puts "IPI format to an NCBI style format that Bioworks can read."
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
ARGV.each do |file|
|
14
|
+
base = file.chomp(File.extname(file))
|
15
|
+
outfile = base + '_NCBI' + ".fasta"
|
16
|
+
File.open(outfile, 'w') do |out|
|
17
|
+
IO.foreach(file) do |line|
|
18
|
+
if line =~ /^>/
|
19
|
+
(codes, *description) = line[1..-1].split(" ")
|
20
|
+
description = description.join(" ")
|
21
|
+
code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
|
22
|
+
out.puts ">#{code_section} #{description}"
|
23
|
+
else
|
24
|
+
out.print line
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/params'
|
4
|
+
|
5
|
+
# returns a hash of all params
|
6
|
+
def simple_parse(filename)
|
7
|
+
hash = {}
|
8
|
+
data = File.open(filename) do |io|
|
9
|
+
# this makes it work with ruby 1.9:
|
10
|
+
io.set_encoding("ASCII-8BIT") if io.respond_to?(:set_encoding)
|
11
|
+
io.read
|
12
|
+
end
|
13
|
+
data.split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
|
14
|
+
if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
|
15
|
+
hash[$1.dup] = $2.rstrip
|
16
|
+
end
|
17
|
+
end
|
18
|
+
hash
|
19
|
+
end
|
20
|
+
|
21
|
+
shared_examples_for 'sequest params' do |params_file, api_hash, backwards_hash|
|
22
|
+
|
23
|
+
subject { Mspire::Sequest::Params.new(params_file) }
|
24
|
+
|
25
|
+
it 'has a method for every parameter in the file' do
|
26
|
+
hash = simple_parse(params_file)
|
27
|
+
hash.each do |k,v|
|
28
|
+
subject.send(k.to_sym).should == v
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns zero length string for params with no information' do
|
33
|
+
subject.second_database_name.should == ""
|
34
|
+
subject.sequence_header_filter.should == ""
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns nil for params that do not exist and have no translation' do
|
38
|
+
subject.google_plex.should == nil
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'provides consistent API between versions for important info' do
|
42
|
+
message = capture_stderr do
|
43
|
+
api_hash.each do |k,v|
|
44
|
+
subject.send(k).should == v
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'provides some backwards compatibility' do
|
50
|
+
backwards_hash.each do |k,v|
|
51
|
+
subject.send(k).should == v
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
describe 'sequest params v 3.1' do
|
58
|
+
|
59
|
+
file = TESTFILES + '/bioworks31.params'
|
60
|
+
api_hash = {
|
61
|
+
:version => '3.1',
|
62
|
+
:enzyme => 'Trypsin',
|
63
|
+
:database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
|
64
|
+
:enzyme_specificity => [1, 'KR', ''],
|
65
|
+
:precursor_mass_type => "average",
|
66
|
+
:fragment_mass_type => "average",
|
67
|
+
:min_number_termini => '1',
|
68
|
+
}
|
69
|
+
|
70
|
+
backwards_hash = {
|
71
|
+
:max_num_internal_cleavages => '2',
|
72
|
+
:fragment_ion_tol => '0.0000',
|
73
|
+
}
|
74
|
+
|
75
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
76
|
+
end
|
77
|
+
|
78
|
+
describe 'sequest params v 3.2' do
|
79
|
+
file = TESTFILES + '/bioworks32.params'
|
80
|
+
api_hash = {
|
81
|
+
:version => '3.2',
|
82
|
+
:enzyme => 'Trypsin',
|
83
|
+
:database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
|
84
|
+
:enzyme_specificity => [1, 'KR', 'P'],
|
85
|
+
:precursor_mass_type => "average",
|
86
|
+
:fragment_mass_type => "average",
|
87
|
+
:min_number_termini => '2',
|
88
|
+
}
|
89
|
+
|
90
|
+
backwards_hash = {
|
91
|
+
:max_num_internal_cleavages => '2',
|
92
|
+
:fragment_ion_tol => '1.0000',
|
93
|
+
}
|
94
|
+
|
95
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
96
|
+
end
|
97
|
+
|
98
|
+
describe 'sequest params v 3.3' do
|
99
|
+
file = TESTFILES + '/bioworks33.params'
|
100
|
+
api_hash = {
|
101
|
+
:version => '3.3',
|
102
|
+
:enzyme => 'Trypsin',
|
103
|
+
:database => "C:\\Xcalibur\\database\\yeast.fasta",
|
104
|
+
:enzyme_specificity => [1, 'KR', ''],
|
105
|
+
:precursor_mass_type => "monoisotopic",
|
106
|
+
:fragment_mass_type => "monoisotopic",
|
107
|
+
:min_number_termini => '2',
|
108
|
+
}
|
109
|
+
|
110
|
+
backwards_hash = {
|
111
|
+
:max_num_internal_cleavages => '2',
|
112
|
+
:fragment_ion_tol => '1.0000',
|
113
|
+
}
|
114
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
115
|
+
end
|
116
|
+
|
117
|
+
describe 'sequest params v 3.2 from srf' do
|
118
|
+
file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
|
119
|
+
api_hash = {
|
120
|
+
:version => '3.2',
|
121
|
+
:enzyme => 'Trypsin',
|
122
|
+
:database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
|
123
|
+
:enzyme_specificity => [1, 'KR', 'P'],
|
124
|
+
:precursor_mass_type => "average",
|
125
|
+
:fragment_mass_type => "average",
|
126
|
+
:min_number_termini => '2',
|
127
|
+
}
|
128
|
+
|
129
|
+
backwards_hash = {
|
130
|
+
:max_num_internal_cleavages => '2',
|
131
|
+
:fragment_ion_tol => '1.0000',
|
132
|
+
}
|
133
|
+
it_behaves_like 'sequest params', file, api_hash, backwards_hash
|
134
|
+
end
|
135
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/params'
|
4
|
+
require 'mspire/sequest/pepxml/modifications'
|
5
|
+
|
6
|
+
describe 'Mspire::Sequest::Pepxml::Modifications' do
|
7
|
+
before do
|
8
|
+
tf_params = TESTFILES + "/bioworks32.params"
|
9
|
+
@params = Mspire::Sequest::Params.new(tf_params)
|
10
|
+
# The params object here is completely unnecessary for this test, except
|
11
|
+
# that it sets up the mass table
|
12
|
+
@obj = Mspire::Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
13
|
+
end
|
14
|
+
it 'creates a mod_symbols_hash' do
|
15
|
+
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
16
|
+
@obj.mod_symbols_hash.should == answ
|
17
|
+
## need more here
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'creates a ModificationInfo object given a special peptide sequence' do
|
21
|
+
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
22
|
+
@params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
23
|
+
@params.term_diff_search_options = "14.20000 12.33000"
|
24
|
+
mod = Mspire::Sequest::Pepxml::Modifications.new(@params, mod_string)
|
25
|
+
## no mods
|
26
|
+
peptide_nomod = "PEPTIDE"
|
27
|
+
mod.modification_info(peptide_nomod).should be_nil
|
28
|
+
peptide_mod = "]M*EC^S@IDM#M*EMSCM["
|
29
|
+
modinfo = mod.modification_info(peptide_mod)
|
30
|
+
|
31
|
+
xml_string = modinfo.to_xml
|
32
|
+
xml_string.should match( /<mod_aminoacid_mass / )
|
33
|
+
xml_string.should match( /mod_nterm_mass=/ )
|
34
|
+
xml_string.should match( /mod_cterm_mass=/ )
|
35
|
+
xml_string.should match( /modified_peptide=/ )
|
36
|
+
|
37
|
+
modinfo.mod_aminoacid_masses.size.should == 5
|
38
|
+
mod_aa_masses = modinfo.mod_aminoacid_masses
|
39
|
+
# positions are verified, masses are just frozen
|
40
|
+
[1,3,4,7,8].zip([147.09606, 115.1429, 167.0772999, 160.19606, 147.09606], mod_aa_masses) do |pos, mass, obj|
|
41
|
+
obj.position.should == pos
|
42
|
+
obj.mass.should be_within(0.0001).of(mass)
|
43
|
+
end
|
44
|
+
# These values are just frozen and not independently verified yet
|
45
|
+
modinfo.mod_nterm_mass.should be_within(0.0001).of(146.4033)
|
46
|
+
modinfo.mod_cterm_mass.should be_within(0.0001).of(160.5334)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
@@ -0,0 +1,311 @@
|
|
1
|
+
|
2
|
+
=begin
|
3
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
4
|
+
|
5
|
+
require 'spec_id'
|
6
|
+
require 'spec_id/sequest/pepxml'
|
7
|
+
#require 'mspire/mzxml'
|
8
|
+
|
9
|
+
|
10
|
+
NODELETE = false
|
11
|
+
|
12
|
+
describe Sequest::PepXML, " created from small bioworks.xml" do
|
13
|
+
|
14
|
+
spec_large do
|
15
|
+
before(:all) do
|
16
|
+
tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
|
17
|
+
|
18
|
+
tf_params = Tfiles + "/bioworks32.params"
|
19
|
+
tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
|
20
|
+
out_path = Tfiles
|
21
|
+
@pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'gets some spectrum queries' do
|
25
|
+
@pepxml_objs.each do |obj|
|
26
|
+
(obj.spectrum_queries.size > 2).should be_true
|
27
|
+
(obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
|
28
|
+
end
|
29
|
+
#@pepxml_objs.each do |pep| puts pep.to_pepxml end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
describe Sequest::PepXML, " created from large bioworks.xml" do
|
37
|
+
# assert_equal_by_pairs (really any old array)
|
38
|
+
def assert_equal_pairs(obj, arrs)
|
39
|
+
arrs.each do |arr|
|
40
|
+
#if obj.send(arr[1]) != arr[0]
|
41
|
+
# puts "HELLO"
|
42
|
+
# puts "OBJ answer"
|
43
|
+
# p obj.send(arr[1])
|
44
|
+
# puts "ar0"
|
45
|
+
# p arr[0]
|
46
|
+
# puts "ar1"
|
47
|
+
# p arr[1]
|
48
|
+
#end
|
49
|
+
if arr[0].is_a? Float
|
50
|
+
obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
|
51
|
+
else
|
52
|
+
obj.send(arr[1]).should == arr[0]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
#swap the first to guys first
|
58
|
+
def assert_equal_pairs_swapped(obj, arrs)
|
59
|
+
arrs.each do |arr|
|
60
|
+
arr[0], arr[1] = arr[1], arr[0]
|
61
|
+
end
|
62
|
+
assert_equal_pairs(obj, arrs)
|
63
|
+
end
|
64
|
+
|
65
|
+
spec_large do
|
66
|
+
before(:all) do
|
67
|
+
st = Time.new
|
68
|
+
params = Tfiles + "/opd1/sequest.3.2.params"
|
69
|
+
bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
|
70
|
+
mzxml_path = Tfiles_l + "/opd1"
|
71
|
+
out_path = Tfiles
|
72
|
+
@pepxml_version = 18
|
73
|
+
@pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
|
74
|
+
puts "- takes #{Time.new - st} secs"
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'extracts MSMSPipelineAnalysis' do
|
78
|
+
######## HMMMMM...
|
79
|
+
Sequest::PepXML.pepxml_version.should == @pepxml_version
|
80
|
+
|
81
|
+
# MSMSPipelineAnalysis
|
82
|
+
po = @pepxml_objs.first
|
83
|
+
msms_pipeline = po.msms_pipeline_analysis
|
84
|
+
msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
|
85
|
+
msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
|
86
|
+
msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
|
87
|
+
msms_pipeline.summary_xml.should == '000.xml'
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'extracts MSmSRunSummary' do
|
91
|
+
# MSMSRunSummary
|
92
|
+
rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
|
93
|
+
rs.base_name.should =~ /\/000/
|
94
|
+
assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'extracts SampleEnzyme' do
|
98
|
+
# SampleEnzyme
|
99
|
+
se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
|
100
|
+
assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'extracts SearchSummary' do
|
104
|
+
# SearchSummary
|
105
|
+
ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
|
106
|
+
ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
|
107
|
+
ss.base_name.should =~ /\/000/
|
108
|
+
ss.peptide_mass_tol.should =~ /1\.500/
|
109
|
+
assert_equal_pairs_swapped(ss, [ # normal attributes
|
110
|
+
[:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
|
111
|
+
|
112
|
+
# enzymatic_search_constraint
|
113
|
+
[:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
|
114
|
+
|
115
|
+
# parameters
|
116
|
+
[:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
|
117
|
+
])
|
118
|
+
|
119
|
+
end
|
120
|
+
it 'extracts SearchDatabase' do
|
121
|
+
# SearchDatabase
|
122
|
+
sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
|
123
|
+
sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
|
124
|
+
assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'returns SpectrumQueries' do
|
128
|
+
# SpectrumQueries
|
129
|
+
sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
|
130
|
+
spec = sq.first
|
131
|
+
assert_equal_pairs_swapped(spec, [
|
132
|
+
[:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
|
133
|
+
#[:precursor_neutral_mass, "1074.5920"], # out2summary
|
134
|
+
[:precursor_neutral_mass, 1074.666926], # mine
|
135
|
+
[:assumed_charge, 1], [:index, "1"],
|
136
|
+
])
|
137
|
+
sh = spec.search_results.first.search_hits.first
|
138
|
+
assert_equal_pairs_swapped(sh, [
|
139
|
+
# normal attributes
|
140
|
+
[:hit_rank, 1],
|
141
|
+
[:peptide, "SIYFRNFK"],
|
142
|
+
[:peptide_prev_aa, "R"],
|
143
|
+
[:peptide_next_aa, "G"],
|
144
|
+
[:protein, "gi|16130084|ref|NP_416651.1|"],
|
145
|
+
[:num_tot_proteins, 1],
|
146
|
+
[:num_matched_ions, 4],
|
147
|
+
[:tot_num_ions, 14],
|
148
|
+
#[:calc_neutral_pep_mass, "1074.1920"], # out2summary
|
149
|
+
[:calc_neutral_pep_mass, 1074.23261], # mine
|
150
|
+
#[:massdiff, "+0.400000"], # out2summary
|
151
|
+
[:massdiff, 0.434316000000081], # mine
|
152
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
|
153
|
+
|
154
|
+
# search_score
|
155
|
+
[:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
|
156
|
+
])
|
157
|
+
|
158
|
+
spec = sq[1]
|
159
|
+
assert_equal_pairs_swapped(spec, [
|
160
|
+
[:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
|
161
|
+
[:precursor_neutral_mass, 663.206111], # mine
|
162
|
+
[:assumed_charge, 1], [:index, "2"],
|
163
|
+
])
|
164
|
+
|
165
|
+
sh = spec.search_results.first.search_hits.first
|
166
|
+
assert_equal_pairs_swapped(sh, [
|
167
|
+
# normal attributes
|
168
|
+
[:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
|
169
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
|
170
|
+
#[:massdiff, "-0.600000"], # out2summary
|
171
|
+
[:massdiff, -0.556499000000031], # mine
|
172
|
+
#[:calc_neutral_pep_mass, 663.7920], # out2summary
|
173
|
+
[:calc_neutral_pep_mass, 663.76261], # mine
|
174
|
+
|
175
|
+
# search_score
|
176
|
+
[:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
|
177
|
+
])
|
178
|
+
|
179
|
+
spec = sq[9]
|
180
|
+
assert_equal_pairs_swapped(spec, [
|
181
|
+
[:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
|
182
|
+
#[:precursor_neutral_mass, "691.0920"], # out2summary
|
183
|
+
[:precursor_neutral_mass, 691.150992], # mine
|
184
|
+
])
|
185
|
+
|
186
|
+
sh = spec.search_results.first.search_hits.first
|
187
|
+
assert_equal_pairs_swapped(sh, [
|
188
|
+
# normal attributes
|
189
|
+
[:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
|
190
|
+
|
191
|
+
#[:num_missed_cleavages, "0"], # out2summary misses this!
|
192
|
+
[:num_missed_cleavages, 1],
|
193
|
+
[:is_rejected, 0],
|
194
|
+
#[:calc_neutral_pep_mass, "691.7920"], # out2summary
|
195
|
+
[:calc_neutral_pep_mass, 691.82261], # mine
|
196
|
+
#[:massdiff, "-0.700000"], # out2summary
|
197
|
+
[:massdiff, -0.67161800000008], # mine
|
198
|
+
|
199
|
+
# search_score
|
200
|
+
[:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
|
201
|
+
])
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'can generate correct pepxml file' do
|
205
|
+
|
206
|
+
## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
|
207
|
+
string = @pepxml_objs.first.to_pepxml
|
208
|
+
ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
|
209
|
+
base_name_re = /base_name=".*?files\//o
|
210
|
+
date_re = /date=".*?"/
|
211
|
+
string.split("\n").each_with_index do |line,i|
|
212
|
+
if i > 99 ; break end
|
213
|
+
ans, exp =
|
214
|
+
if i == 1
|
215
|
+
[line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
|
216
|
+
elsif i == 2
|
217
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
|
218
|
+
elsif i == 6
|
219
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
|
220
|
+
else
|
221
|
+
[line, ans_lines[i]]
|
222
|
+
end
|
223
|
+
|
224
|
+
#ans.split('').zip(exp.split('')) do |l,a|
|
225
|
+
# if l != a
|
226
|
+
# puts line
|
227
|
+
# puts ans_lines[i]
|
228
|
+
# puts l
|
229
|
+
# puts a
|
230
|
+
# end
|
231
|
+
#end
|
232
|
+
if ans != exp
|
233
|
+
puts ans
|
234
|
+
puts exp
|
235
|
+
end
|
236
|
+
ans.should == exp
|
237
|
+
#line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
describe 'bioworks file with modifications transformed into pepxml' do
|
245
|
+
|
246
|
+
spec_large do
|
247
|
+
before(:all) do
|
248
|
+
modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
|
249
|
+
modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
|
250
|
+
@srgfile = modfiles_sequest_dir + 'tmp.srg'
|
251
|
+
@out_path = modfiles_sequest_dir + 'pepxml'
|
252
|
+
modfiles = %w(020 040).map do |file|
|
253
|
+
modfiles_sequest_dir + file + ".srf"
|
254
|
+
end
|
255
|
+
objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
|
256
|
+
@out_files = %w(020 040).map do |file|
|
257
|
+
@out_path + '/' + file + '.xml'
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
after(:all) do
|
262
|
+
File.unlink(@srgfile) unless NODELETE
|
263
|
+
FileUtils.rm_r(@out_path)
|
264
|
+
#@out_files.each do |fn|
|
265
|
+
# File.unlink(fn) unless NODELETE
|
266
|
+
#end
|
267
|
+
end
|
268
|
+
|
269
|
+
# splits string on ' 'and matches the line found by find_line_regexp in
|
270
|
+
# lines
|
271
|
+
def match_modline_pieces(lines, find_line_regexp, string)
|
272
|
+
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
273
|
+
lines.each do |line|
|
274
|
+
if line =~ find_line_regexp
|
275
|
+
pieces.each do |piece|
|
276
|
+
line.should =~ piece
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
it 'gets modifications right in real run' do
|
283
|
+
@out_files.each do |fn|
|
284
|
+
fn.exist_as_a_file?.should be_true
|
285
|
+
beginning = IO.read(fn)
|
286
|
+
lines = beginning.split("\n")
|
287
|
+
[
|
288
|
+
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
289
|
+
|
290
|
+
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
291
|
+
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
292
|
+
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
293
|
+
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
294
|
+
].each do |a,b|
|
295
|
+
match_modline_pieces(lines, a, b)
|
296
|
+
end
|
297
|
+
[
|
298
|
+
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
299
|
+
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
300
|
+
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
301
|
+
'</modification_info>',
|
302
|
+
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
303
|
+
].each do |line|
|
304
|
+
beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
=end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/sqt_spec_helper'
|
4
|
+
require 'mspire/sequest/sqt'
|
5
|
+
|
6
|
+
describe 'reading a small sqt file' do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
file = TESTFILES + '/small.sqt'
|
10
|
+
@sqt = Mspire::Sequest::Sqt.new(file)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'can access header entries like a hash' do
|
14
|
+
header = @sqt.header
|
15
|
+
HeaderHash.each do |k,v|
|
16
|
+
header[k].should == v
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'can access header entries with methods' do
|
21
|
+
header = @sqt.header
|
22
|
+
# for example:
|
23
|
+
header.database.should == HeaderHash['Database']
|
24
|
+
# all working:
|
25
|
+
HeaderHash.each do |k,v|
|
26
|
+
header.send(Mspire::Sequest::Sqt::Header::KeysToAtts[k]).should == v
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'has spectra, matches, and loci' do
|
32
|
+
svt = @sqt.spectra[16]
|
33
|
+
reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
|
34
|
+
[:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
|
35
|
+
TestSpectra[key].each do |k,v|
|
36
|
+
if v.is_a? Float
|
37
|
+
reply[key].send(k).should be_within(0.0000000001).of(v)
|
38
|
+
else
|
39
|
+
next if key == :last_match_17_last_loci
|
40
|
+
#p k
|
41
|
+
#p v
|
42
|
+
reply[key].send(k).should == v
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
@sqt.spectra[16].matches.first.loci.size.should == 1
|
47
|
+
@sqt.spectra[16].matches.last.loci.size.should == 1
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
HeaderHash = {}
|
4
|
+
header_doublets = [
|
5
|
+
%w(SQTGenerator mspire),
|
6
|
+
%w(SQTGeneratorVersion 0.3.1),
|
7
|
+
%w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
|
8
|
+
%w(FragmentMasses AVG),
|
9
|
+
%w(PrecursorMasses AVG),
|
10
|
+
['StartTime', ''],
|
11
|
+
['Alg-MSModel', 'LCQ Deca XP'],
|
12
|
+
%w(DBLocusCount 4237),
|
13
|
+
%w(Alg-FragMassTol 1.0000),
|
14
|
+
%w(Alg-PreMassTol 25.0000),
|
15
|
+
['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
|
16
|
+
%w(Alg-PreMassUnits ppm),
|
17
|
+
['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
|
18
|
+
|
19
|
+
['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
|
20
|
+
['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
|
21
|
+
['StaticMod', []],
|
22
|
+
].each do |double|
|
23
|
+
HeaderHash[double[0]] = double[1]
|
24
|
+
end
|
25
|
+
|
26
|
+
TestSpectra = {
|
27
|
+
:first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
|
28
|
+
:last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
|
29
|
+
:seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
|
30
|
+
:first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
|
31
|
+
:last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
|
32
|
+
:last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
|
33
|
+
}
|
34
|
+
|