mspire-sequest 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/srf/pepxml'
|
4
|
+
|
5
|
+
describe 'an Mspire::Ident::Pepxml object from an srf file with modifications' do
|
6
|
+
|
7
|
+
before(:all) do
|
8
|
+
@out_path = TESTFILES + '/tmp'
|
9
|
+
srf_file = SEQUEST_DIR + '/opd1_2runs_2mods/sequest331/020.srf'
|
10
|
+
@srf = Mspire::Sequest::Srf.new(srf_file)
|
11
|
+
end
|
12
|
+
|
13
|
+
before(:each) do
|
14
|
+
FileUtils.mkdir @out_path unless File.exist?(@out_path)
|
15
|
+
end
|
16
|
+
|
17
|
+
after(:each) do
|
18
|
+
FileUtils.rm_rf @out_path
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
it 'produces xml with all the expected parts' do
|
23
|
+
tags = %w(msms_pipeline_analysis msms_run_summary sample_enzyme specificity search_summary search_database enzymatic_search_constraint aminoacid_modification parameter spectrum_query search_result search_hit modification_info mod_aminoacid_mass search_score)
|
24
|
+
pepxml = @srf.to_pepxml(:verbose => false)
|
25
|
+
xml_string = pepxml.to_xml
|
26
|
+
tags.each do |tag|
|
27
|
+
xml_string.should match( %r{<#{tag}} )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# takes an xml string of attributes (' key="val" key2="val2" ') and a xml
|
32
|
+
# node that is expected to have those attributes
|
33
|
+
def has_attributes(node, string)
|
34
|
+
if node.nil?
|
35
|
+
raise "your xml node is nil!!!"
|
36
|
+
end
|
37
|
+
if node == []
|
38
|
+
raise "you gave me an empty array instead of a node"
|
39
|
+
end
|
40
|
+
# strips the tail end quote mark, also
|
41
|
+
string.strip!
|
42
|
+
string.chomp!('"')
|
43
|
+
string.split(/"\s+/).each do |str|
|
44
|
+
(key,val) = str.split('=',2)
|
45
|
+
val=val[1..-1] if val[0,1] == '"'
|
46
|
+
if node[key] != val
|
47
|
+
puts "FAILING"
|
48
|
+
puts "EXPECT: #{key} => #{val} ACTUAL => #{val}"
|
49
|
+
puts "NODE KEYS: "
|
50
|
+
p node.keys
|
51
|
+
puts "NODE VALUES: "
|
52
|
+
p node.values
|
53
|
+
end
|
54
|
+
node[key].should == val
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'gets everything right' do
|
59
|
+
xml_string = @srf.to_pepxml(:verbose => false).to_xml
|
60
|
+
doc = Nokogiri::XML.parse(xml_string, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
61
|
+
|
62
|
+
root = doc.root
|
63
|
+
|
64
|
+
root.name.should == "msms_pipeline_analysis"
|
65
|
+
has_attributes( root, 'schemaLocation="http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v115.xsd"' )
|
66
|
+
root['date'].should_not be_nil
|
67
|
+
root['summary_xml'].should match( "020.xml" )
|
68
|
+
root.namespaces.should == ( {"xmlns" => "http://regis-web.systemsbiology.net/pepXML" } )
|
69
|
+
|
70
|
+
mrs_node = root.child
|
71
|
+
mrs_node.name.should == 'msms_run_summary'
|
72
|
+
has_attributes( mrs_node, 'msManufacturer="Thermo" msModel="LCQ Deca XP" msIonization="ESI" msMassAnalyzer="Ion Trap" msDetector="UNKNOWN" raw_data=".mzML"' )
|
73
|
+
se_node = mrs_node.child
|
74
|
+
se_node.name.should == 'sample_enzyme'
|
75
|
+
has_attributes se_node, 'name="Trypsin"'
|
76
|
+
specificity_node = se_node.child
|
77
|
+
specificity_node.name.should == 'specificity'
|
78
|
+
has_attributes specificity_node, 'cut="KR" no_cut="P" sense="C"'
|
79
|
+
search_summary_node = se_node.next_sibling
|
80
|
+
search_summary_node.name.should == 'search_summary'
|
81
|
+
has_attributes search_summary_node, 'search_engine="SEQUEST" precursor_mass_type="average" fragment_mass_type="average" search_id="1"'
|
82
|
+
search_summary_node['base_name'].should match( %r{sequest/opd1_2runs_2mods/sequest331/020$} )
|
83
|
+
# TODO: expand the search summary check!
|
84
|
+
# TODO: finish testing other guys for accurcy
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
require 'mspire/sequest/srf'
|
5
|
+
require 'mspire/sequest/srf/search'
|
6
|
+
|
7
|
+
class SRF_TO_MGF_HELPER
|
8
|
+
FIRST_MSMS = {
|
9
|
+
:first_lines => ['BEGIN IONS', 'TITLE=000.2.2.1.dta', 'CHARGE=1+', 'PEPMASS=391.04541015625'],
|
10
|
+
:first_two_ion_lines => ['111.976043701172 41418.0', '112.733383178711 88292.0'],
|
11
|
+
:last_two_ion_lines => ['407.412780761719 18959.0', '781.085327148438 10104.0'],
|
12
|
+
:last_line => 'END IONS',
|
13
|
+
}
|
14
|
+
LAST_MSMS = {
|
15
|
+
:first_lines => ['BEGIN IONS', 'TITLE=000.3748.3748.3.dta', 'CHARGE=3+', 'PEPMASS=433.56494129743004'],
|
16
|
+
:first_two_ion_lines => ['143.466918945312 2110.0', '151.173095703125 4134.0'],
|
17
|
+
:last_two_ion_lines => ['482.678771972656 3357.0', '610.4111328125 8968.0'],
|
18
|
+
:last_line => 'END IONS',
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
# these have been checked against Bioworks .dta output
|
23
|
+
class SRF_TO_DTA_HELPER
|
24
|
+
FIRST_SCAN = {
|
25
|
+
:first_line => '391.045410 1',
|
26
|
+
:first_two_ion_lines => ['111.9760 41418', '112.7334 88292'],
|
27
|
+
:last_two_ion_lines => ['407.4128 18959', '781.0853 10104'],
|
28
|
+
}
|
29
|
+
LAST_SCAN = {
|
30
|
+
:first_line => '1298.680271 3',
|
31
|
+
:first_two_ion_lines => ['143.4669 2110', '151.1731 4134'],
|
32
|
+
:last_two_ion_lines => ['482.6788 3357', '610.4111 8968'],
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
Srf_file = MS::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
37
|
+
TMPDIR = TESTFILES + '/tmp'
|
38
|
+
Mgf_output = TMPDIR + '/000.mgf.tmp'
|
39
|
+
Dta_output = TMPDIR + '/000.dta.tmp'
|
40
|
+
|
41
|
+
shared_examples_for 'an srf to ms2 search converter' do |convert_to_mgf, convert_to_dta|
|
42
|
+
def assert_ion_line_close(expected, actual, delta)
|
43
|
+
expected.split(/\s+/).zip(actual.split(/\s+/)).each do |exp,act|
|
44
|
+
exp.to_f.should be_within(delta).of(act.to_f)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def compare_dtas(key, filename)
|
49
|
+
File.exist?(filename).should be_true
|
50
|
+
lines = IO.read(filename).strip.split("\n")
|
51
|
+
(exp1, act1) = [key[:first_line], lines[0]].map {|l| l.split(/\s+/) }
|
52
|
+
exp1.first.to_f.should be_within(0.000001).of(act1.first.to_f)
|
53
|
+
exp1.last.should == act1.last
|
54
|
+
(key[:first_two_ion_lines] + key[:last_two_ion_lines]).zip(lines[1,2]+lines[-2,2]) do |exp,act|
|
55
|
+
assert_ion_line_close(exp, act, 0.0001)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def compare_mgfs(key, string_chunk)
|
60
|
+
lines = string_chunk.strip.split("\n")
|
61
|
+
key[:first_lines][0,3].should == lines[0,3]
|
62
|
+
(exp_pair, act_pair) = [key[:first_lines][3], lines[3]].map {|line| line.split('=') }
|
63
|
+
exp_pair.first.should == act_pair.first
|
64
|
+
exp_pair.last.to_f.should be_within(0.0000001).of( act_pair.last.to_f )
|
65
|
+
|
66
|
+
(key[:first_two_ion_lines] + key[:last_two_ion_lines]).zip(lines[4,2] + lines[-3,2]).each do |exp_line,act_line|
|
67
|
+
assert_ion_line_close(exp_line, act_line, 0.00000001)
|
68
|
+
end
|
69
|
+
|
70
|
+
key[:last_line].should == lines[-1]
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'converts to mgf' do
|
74
|
+
output = Mgf_output
|
75
|
+
convert_to_mgf.call
|
76
|
+
File.exist?(output).should be_true
|
77
|
+
output = IO.read(output)
|
78
|
+
chunks = output.split("\n\n")
|
79
|
+
|
80
|
+
compare_mgfs(SRF_TO_MGF_HELPER::FIRST_MSMS, chunks.first)
|
81
|
+
compare_mgfs(SRF_TO_MGF_HELPER::LAST_MSMS, chunks.last)
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'generates .dta files' do
|
85
|
+
output = Dta_output
|
86
|
+
convert_to_dta.call
|
87
|
+
File.exist?(output).should be_true
|
88
|
+
File.directory?(output).should be_true
|
89
|
+
# frozen (not verified):
|
90
|
+
Dir[output + "/*.*"].size.should == 3893 # the correct number files
|
91
|
+
|
92
|
+
compare_dtas(SRF_TO_DTA_HELPER::FIRST_SCAN, output + '/000.2.2.1.dta')
|
93
|
+
compare_dtas(SRF_TO_DTA_HELPER::LAST_SCAN, output + '/000.3748.3748.3.dta')
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
describe 'converting an srf to ms2 search format: programmatic' do
|
99
|
+
before do
|
100
|
+
FileUtils.mkdir(TMPDIR) unless File.exist?(TMPDIR)
|
101
|
+
end
|
102
|
+
after do
|
103
|
+
FileUtils.rmtree(TMPDIR)
|
104
|
+
end
|
105
|
+
|
106
|
+
srf = Mspire::Sequest::Srf.new(Srf_file)
|
107
|
+
|
108
|
+
convert_to_mgf = lambda { srf.to_mgf(Mgf_output) }
|
109
|
+
convert_to_dta = lambda { srf.to_dta(Dta_output) }
|
110
|
+
|
111
|
+
it_behaves_like 'an srf to ms2 search converter', convert_to_mgf, convert_to_dta
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
describe 'converting an srf to ms2 search format: commandline' do
|
116
|
+
def self.commandline_lambda(string)
|
117
|
+
lambda { Mspire::Sequest::Srf::Search.commandline(string.split(/\s+/)) }
|
118
|
+
end
|
119
|
+
|
120
|
+
convert_to_mgf = self.commandline_lambda "#{Srf_file} -o #{Mgf_output}"
|
121
|
+
convert_to_dta = self.commandline_lambda "#{Srf_file} -o #{Dta_output} -f dta"
|
122
|
+
|
123
|
+
before(:each) do
|
124
|
+
FileUtils.mkdir(TMPDIR) unless File.exist?(TMPDIR)
|
125
|
+
end
|
126
|
+
after(:each) do
|
127
|
+
FileUtils.rmtree(TMPDIR)
|
128
|
+
end
|
129
|
+
|
130
|
+
it_behaves_like 'an srf to ms2 search converter', convert_to_mgf, convert_to_dta
|
131
|
+
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/srf'
|
4
|
+
require 'mspire/sequest/srf/sqt'
|
5
|
+
|
6
|
+
SpecHelperHeaderHash = {
|
7
|
+
'SQTGenerator' => 'mspire: ms-sequest',
|
8
|
+
'SQTGeneratorVersion' => String,
|
9
|
+
'Database' => 'C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta',
|
10
|
+
'FragmentMasses' => 'AVG',
|
11
|
+
'PrecursorMasses' => 'AVG',
|
12
|
+
'StartTime' => nil,
|
13
|
+
'Alg-MSModel' => 'LCQ Deca XP',
|
14
|
+
'Alg-PreMassUnits' => 'amu',
|
15
|
+
'DBLocusCount' => '4237',
|
16
|
+
'Alg-FragMassTol' => '1.0000',
|
17
|
+
'Alg-PreMassTol' => '1.4000',
|
18
|
+
'Alg-IonSeries' => '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0',
|
19
|
+
'Alg-Enzyme' => 'Trypsin(KR/P) (2)',
|
20
|
+
'Comment' => ['Created from Bioworks .srf file'],
|
21
|
+
'DynamicMod' => ['STY*=+79.97990', 'M#=+14.02660'],
|
22
|
+
}
|
23
|
+
|
24
|
+
ExpasyStaticMods = ['C=160.1901','Cterm=10.1230','E=161.4455']
|
25
|
+
MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
|
26
|
+
SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
|
27
|
+
|
28
|
+
# these only need to be really close
|
29
|
+
Close_indices = {
|
30
|
+
'S' => [6,7],
|
31
|
+
'M' => [3,4,5,6],
|
32
|
+
}
|
33
|
+
|
34
|
+
SpecHelperOtherLines =<<END
|
35
|
+
S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
|
36
|
+
S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
|
37
|
+
M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
|
38
|
+
L gi|16128266|ref|NP_414815.1|
|
39
|
+
END
|
40
|
+
|
41
|
+
SpecHelperOtherLinesEnd =<<END
|
42
|
+
L gi|90111093|ref|NP_414704.4|
|
43
|
+
M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12 54 K.LQKIITNSY*K U
|
44
|
+
L gi|90111124|ref|NP_414904.2|
|
45
|
+
END
|
46
|
+
|
47
|
+
|
48
|
+
module SPEC
|
49
|
+
Srf_file = MS::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
50
|
+
TMPDIR = TESTFILES + '/tmp'
|
51
|
+
Srf_output = TMPDIR + '/000.sqt.tmp'
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
# {
|
56
|
+
# :lambdas => { :basic_conversion, :with_new_db_path, :update_the_db_path }
|
57
|
+
# :original_db_filename = String
|
58
|
+
# # "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
59
|
+
# :output => String # SPEC::Srf_output
|
60
|
+
# }
|
61
|
+
|
62
|
+
shared_examples_for 'an srf to sqt converter' do |opts|
|
63
|
+
|
64
|
+
# returns true or false
|
65
|
+
def header_hash_match(header_lines, hash)
|
66
|
+
header_lines.all? do |line|
|
67
|
+
(h, k, v) = line.chomp.split("\t")
|
68
|
+
if hash[k].is_a? Array
|
69
|
+
if hash[k].include?(v)
|
70
|
+
true
|
71
|
+
else
|
72
|
+
puts "FAILED: "
|
73
|
+
p k
|
74
|
+
p v
|
75
|
+
p hash[k]
|
76
|
+
false
|
77
|
+
end
|
78
|
+
elsif hash[k] == String
|
79
|
+
v.is_a?(String)
|
80
|
+
else
|
81
|
+
if v == hash[k]
|
82
|
+
true
|
83
|
+
else
|
84
|
+
puts "FAILED: "
|
85
|
+
p k
|
86
|
+
p v
|
87
|
+
p hash[k]
|
88
|
+
false
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def sqt_line_match(act_line_ar, exp_line_ar)
|
95
|
+
exp_line_ar.zip(act_line_ar) do |exp_line, act_line|
|
96
|
+
(e_pieces, a_pieces) = [exp_line, act_line].map {|line| line.chomp.split("\t") }
|
97
|
+
if %w(S M).include?(k = e_pieces[0])
|
98
|
+
(e_close, a_close) = [e_pieces, a_pieces].map do |pieces|
|
99
|
+
Close_indices[k].sort.reverse.map do |i|
|
100
|
+
pieces.delete_at(i).to_f
|
101
|
+
end.reverse
|
102
|
+
end
|
103
|
+
e_close.zip(a_close) do |ex, ac|
|
104
|
+
ex.should be_within(0.0000001).of( ac )
|
105
|
+
end
|
106
|
+
end
|
107
|
+
e_pieces.should == a_pieces
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'converts without bothering with the database' do
|
112
|
+
opts[:lambdas][:basic_conversion].call
|
113
|
+
File.exist?(opts[:output]).should be_true
|
114
|
+
lines = File.readlines(opts[:output])
|
115
|
+
lines.size.should == 80910
|
116
|
+
header_lines = lines.grep(/^H/)
|
117
|
+
(header_lines.size > 10).should be_true
|
118
|
+
header_hash_match(header_lines, SpecHelperHeaderHash).should be_true
|
119
|
+
other_lines = lines.grep(/^[^H]/)
|
120
|
+
|
121
|
+
sqt_line_match(other_lines[0,4], SpecHelperOtherLines.strip.split("\n"))
|
122
|
+
sqt_line_match(other_lines[-3,3], SpecHelperOtherLinesEnd.strip.split("\n"))
|
123
|
+
|
124
|
+
File.unlink(opts[:output]) rescue false
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'can get db info with correct path' do
|
128
|
+
opts[:lambdas][:with_new_db_path].call
|
129
|
+
File.exist?(opts[:output]).should be_true
|
130
|
+
lines = IO.readlines(opts[:output])
|
131
|
+
has_md5 = lines.any? do |line|
|
132
|
+
line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
|
133
|
+
end
|
134
|
+
has_md5.should be_true
|
135
|
+
|
136
|
+
has_seq_len = lines.any? do |line|
|
137
|
+
# frozen
|
138
|
+
line =~ /DBSeqLength\s+1342842/
|
139
|
+
end
|
140
|
+
has_seq_len.should be_true
|
141
|
+
lines.size.should == 80912
|
142
|
+
File.unlink(opts[:output]) rescue false
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'can update the Database' do
|
146
|
+
opts[:lambdas][:update_the_db_path].call
|
147
|
+
regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
|
148
|
+
updated_db = IO.readlines(opts[:output]).any? do |line|
|
149
|
+
line =~ regexp
|
150
|
+
end
|
151
|
+
updated_db.should be_true
|
152
|
+
File.unlink(opts[:output]) rescue false
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "programmatic interface srf to sqt" do
|
158
|
+
|
159
|
+
srf = Mspire::Sequest::Srf.new(SPEC::Srf_file)
|
160
|
+
|
161
|
+
shared_hash = {
|
162
|
+
:lambdas => {
|
163
|
+
basic_conversion: lambda { srf.to_sqt(SPEC::Srf_output) },
|
164
|
+
with_new_db_path: lambda { srf.to_sqt(SPEC::Srf_output, :db_info => true, :new_db_path => MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') },
|
165
|
+
update_the_db_path: lambda { srf.to_sqt(SPEC::Srf_output, :new_db_path => MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) },
|
166
|
+
},
|
167
|
+
output: SPEC::Srf_output,
|
168
|
+
mkdir: SPEC::TMPDIR,
|
169
|
+
original_db_filename: "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
170
|
+
}
|
171
|
+
|
172
|
+
it_behaves_like "an srf to sqt converter", shared_hash
|
173
|
+
|
174
|
+
before(:each) do
|
175
|
+
FileUtils.mkdir(SPEC::TMPDIR) unless File.exist?(SPEC::TMPDIR)
|
176
|
+
end
|
177
|
+
after(:each) do
|
178
|
+
FileUtils.rm_rf(SPEC::TMPDIR)
|
179
|
+
end
|
180
|
+
|
181
|
+
# this requires programmatic interface to manipulate the object for this
|
182
|
+
# test
|
183
|
+
it 'warns if the db path is incorrect and we want to update db info' do
|
184
|
+
output = shared_hash[:output]
|
185
|
+
# requires some knowledge of how the database file is extracted
|
186
|
+
# internally
|
187
|
+
wacky_path = '/not/a/real/path/wacky.fasta'
|
188
|
+
|
189
|
+
srf.header.db_filename = wacky_path
|
190
|
+
my_error_string = ''
|
191
|
+
StringIO.open(my_error_string, 'w') do |strio|
|
192
|
+
$stderr = strio
|
193
|
+
srf.to_sqt(output, :db_info => true)
|
194
|
+
end
|
195
|
+
my_error_string.include?(wacky_path).should be_true
|
196
|
+
srf.header.db_filename = shared_hash[:original_db_filename]
|
197
|
+
$stderr = STDERR
|
198
|
+
File.exists?(output).should be_true
|
199
|
+
IO.readlines(output).size.should == 80910
|
200
|
+
File.delete(output) rescue false
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
describe "command-line interface srf to sqt" do
|
205
|
+
before(:each) do
|
206
|
+
FileUtils.mkdir(SPEC::TMPDIR) unless File.exist?(SPEC::TMPDIR)
|
207
|
+
end
|
208
|
+
after(:each) do
|
209
|
+
FileUtils.rm_rf(SPEC::TMPDIR)
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.commandline_lambda(string)
|
213
|
+
lambda { Mspire::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
|
214
|
+
end
|
215
|
+
|
216
|
+
base_cmd = "#{SPEC::Srf_file} -o #{SPEC::Srf_output}"
|
217
|
+
shared_hash = {
|
218
|
+
lambdas: {
|
219
|
+
basic_conversion: self.commandline_lambda(base_cmd),
|
220
|
+
with_new_db_path: self.commandline_lambda(base_cmd + " --db-info --db-path #{MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}"),
|
221
|
+
update_the_db_path: self.commandline_lambda(base_cmd + " --db-path #{MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" ),
|
222
|
+
},
|
223
|
+
output: SPEC::Srf_output,
|
224
|
+
mkdir: SPEC::TMPDIR,
|
225
|
+
}
|
226
|
+
|
227
|
+
it_behaves_like "an srf to sqt converter", shared_hash
|
228
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mspire/sequest/srf_spec_helper' # in spec/
|
3
|
+
|
4
|
+
require 'mspire/sequest/srf'
|
5
|
+
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
include SRFHelper
|
9
|
+
|
10
|
+
class Hash
|
11
|
+
def object_match(obj)
|
12
|
+
self.all? do |k,v|
|
13
|
+
k = k.to_sym
|
14
|
+
retval =
|
15
|
+
if k == :peaks or k == :hits or k == :proteins
|
16
|
+
obj.send(k).size == v
|
17
|
+
elsif v.class == Float
|
18
|
+
delta =
|
19
|
+
if k == :ppm ; 0.0001
|
20
|
+
else ; 0.0000001
|
21
|
+
end
|
22
|
+
(v - obj.send(k)).abs <= delta
|
23
|
+
else
|
24
|
+
obj.send(k) == v
|
25
|
+
end
|
26
|
+
if retval == false
|
27
|
+
puts "BAD KEY: #{k}"
|
28
|
+
puts "need: #{v}"
|
29
|
+
puts "got: #{obj.send(k)}"
|
30
|
+
end
|
31
|
+
retval
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
shared_examples_for 'an srf reader' do |srf_obj, test_hash|
|
37
|
+
|
38
|
+
it 'retrieves correct header info' do
|
39
|
+
test_hash[:header].object_match(srf_obj.header).should be_true
|
40
|
+
test_hash[:dta_gen].object_match(srf_obj.header.dta_gen).should be_true
|
41
|
+
end
|
42
|
+
|
43
|
+
# a few more dta params could be added in here:
|
44
|
+
it 'retrieves correct dta files' do
|
45
|
+
test_hash[:dta_files_first].object_match(srf_obj.dta_files.first).should be_true
|
46
|
+
test_hash[:dta_files_last].object_match(srf_obj.dta_files.last).should be_true
|
47
|
+
end
|
48
|
+
|
49
|
+
# given an array of out_file objects, returns the first set of hits
|
50
|
+
def get_first_peps(out_files)
|
51
|
+
out_files.each do |outf|
|
52
|
+
if outf.num_hits > 0
|
53
|
+
return outf.hits
|
54
|
+
end
|
55
|
+
end
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'retrieves correct out files' do
|
60
|
+
test_hash[:out_files_first].object_match(srf_obj.out_files.first).should be_true
|
61
|
+
test_hash[:out_files_last].object_match(srf_obj.out_files.last).should be_true
|
62
|
+
# first available peptide hit
|
63
|
+
test_hash[:out_files_first_pep].object_match(get_first_peps(srf_obj.out_files).first).should be_true
|
64
|
+
# last available peptide hit
|
65
|
+
test_hash[:out_files_last_pep].object_match(get_first_peps(srf_obj.out_files.reverse).last).should be_true
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'retrieves correct params' do
|
69
|
+
test_hash[:params].object_match(srf_obj.params).should be_true
|
70
|
+
end
|
71
|
+
|
72
|
+
# TODO:
|
73
|
+
#it_should 'retrieve probabilities if available'
|
74
|
+
end
|
75
|
+
|
76
|
+
# TODO:, we should try to get some tests with sf values present!
|
77
|
+
|
78
|
+
|
79
|
+
Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
|
80
|
+
|
81
|
+
To_run = {
|
82
|
+
'3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
|
83
|
+
'3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
|
84
|
+
'3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
|
85
|
+
}
|
86
|
+
|
87
|
+
# I had these nicely combined under RSpec, but this is not as obvious a task
|
88
|
+
# under minispec given the corrupted include behavior...
|
89
|
+
|
90
|
+
describe 'reading srf with duplicate refs v3.2' do
|
91
|
+
|
92
|
+
info = To_run['3.2']
|
93
|
+
file = MS::TESTDATA + '/sequest' + info[:file]
|
94
|
+
srf_obj = Mspire::Sequest::Srf.new(file)
|
95
|
+
|
96
|
+
it_behaves_like 'an srf reader', srf_obj, info[:hash]
|
97
|
+
end
|
98
|
+
|
99
|
+
describe 'reading srf with duplicate refs v3.3' do
|
100
|
+
info = To_run['3.3']
|
101
|
+
file = MS::TESTDATA + '/sequest' + info[:file]
|
102
|
+
srf_obj = Mspire::Sequest::Srf.new(file)
|
103
|
+
|
104
|
+
it_behaves_like 'an srf reader', srf_obj, info[:hash]
|
105
|
+
end
|
106
|
+
|
107
|
+
describe 'reading srf with duplicate refs v3.3.1' do
|
108
|
+
info = To_run['3.3.1']
|
109
|
+
file = MS::TESTDATA + '/sequest' + info[:file]
|
110
|
+
srf_obj = Mspire::Sequest::Srf.new(file)
|
111
|
+
|
112
|
+
it_behaves_like 'an srf reader', srf_obj, info[:hash]
|
113
|
+
end
|