mspire-sequest 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/srf/pepxml'
|
4
|
+
|
5
|
+
describe 'an Mspire::Ident::Pepxml object from an srf file with modifications' do
|
6
|
+
|
7
|
+
before(:all) do
|
8
|
+
@out_path = TESTFILES + '/tmp'
|
9
|
+
srf_file = SEQUEST_DIR + '/opd1_2runs_2mods/sequest331/020.srf'
|
10
|
+
@srf = Mspire::Sequest::Srf.new(srf_file)
|
11
|
+
end
|
12
|
+
|
13
|
+
before(:each) do
|
14
|
+
FileUtils.mkdir @out_path unless File.exist?(@out_path)
|
15
|
+
end
|
16
|
+
|
17
|
+
after(:each) do
|
18
|
+
FileUtils.rm_rf @out_path
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
it 'produces xml with all the expected parts' do
|
23
|
+
tags = %w(msms_pipeline_analysis msms_run_summary sample_enzyme specificity search_summary search_database enzymatic_search_constraint aminoacid_modification parameter spectrum_query search_result search_hit modification_info mod_aminoacid_mass search_score)
|
24
|
+
pepxml = @srf.to_pepxml(:verbose => false)
|
25
|
+
xml_string = pepxml.to_xml
|
26
|
+
tags.each do |tag|
|
27
|
+
xml_string.should match( %r{<#{tag}} )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# takes an xml string of attributes (' key="val" key2="val2" ') and a xml
|
32
|
+
# node that is expected to have those attributes
|
33
|
+
def has_attributes(node, string)
|
34
|
+
if node.nil?
|
35
|
+
raise "your xml node is nil!!!"
|
36
|
+
end
|
37
|
+
if node == []
|
38
|
+
raise "you gave me an empty array instead of a node"
|
39
|
+
end
|
40
|
+
# strips the tail end quote mark, also
|
41
|
+
string.strip!
|
42
|
+
string.chomp!('"')
|
43
|
+
string.split(/"\s+/).each do |str|
|
44
|
+
(key,val) = str.split('=',2)
|
45
|
+
val=val[1..-1] if val[0,1] == '"'
|
46
|
+
if node[key] != val
|
47
|
+
puts "FAILING"
|
48
|
+
puts "EXPECT: #{key} => #{val} ACTUAL => #{val}"
|
49
|
+
puts "NODE KEYS: "
|
50
|
+
p node.keys
|
51
|
+
puts "NODE VALUES: "
|
52
|
+
p node.values
|
53
|
+
end
|
54
|
+
node[key].should == val
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'gets everything right' do
|
59
|
+
xml_string = @srf.to_pepxml(:verbose => false).to_xml
|
60
|
+
doc = Nokogiri::XML.parse(xml_string, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
61
|
+
|
62
|
+
root = doc.root
|
63
|
+
|
64
|
+
root.name.should == "msms_pipeline_analysis"
|
65
|
+
has_attributes( root, 'schemaLocation="http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v115.xsd"' )
|
66
|
+
root['date'].should_not be_nil
|
67
|
+
root['summary_xml'].should match( "020.xml" )
|
68
|
+
root.namespaces.should == ( {"xmlns" => "http://regis-web.systemsbiology.net/pepXML" } )
|
69
|
+
|
70
|
+
mrs_node = root.child
|
71
|
+
mrs_node.name.should == 'msms_run_summary'
|
72
|
+
has_attributes( mrs_node, 'msManufacturer="Thermo" msModel="LCQ Deca XP" msIonization="ESI" msMassAnalyzer="Ion Trap" msDetector="UNKNOWN" raw_data=".mzML"' )
|
73
|
+
se_node = mrs_node.child
|
74
|
+
se_node.name.should == 'sample_enzyme'
|
75
|
+
has_attributes se_node, 'name="Trypsin"'
|
76
|
+
specificity_node = se_node.child
|
77
|
+
specificity_node.name.should == 'specificity'
|
78
|
+
has_attributes specificity_node, 'cut="KR" no_cut="P" sense="C"'
|
79
|
+
search_summary_node = se_node.next_sibling
|
80
|
+
search_summary_node.name.should == 'search_summary'
|
81
|
+
has_attributes search_summary_node, 'search_engine="SEQUEST" precursor_mass_type="average" fragment_mass_type="average" search_id="1"'
|
82
|
+
search_summary_node['base_name'].should match( %r{sequest/opd1_2runs_2mods/sequest331/020$} )
|
83
|
+
# TODO: expand the search summary check!
|
84
|
+
# TODO: finish testing other guys for accurcy
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
require 'mspire/sequest/srf'
|
5
|
+
require 'mspire/sequest/srf/search'
|
6
|
+
|
7
|
+
class SRF_TO_MGF_HELPER
|
8
|
+
FIRST_MSMS = {
|
9
|
+
:first_lines => ['BEGIN IONS', 'TITLE=000.2.2.1.dta', 'CHARGE=1+', 'PEPMASS=391.04541015625'],
|
10
|
+
:first_two_ion_lines => ['111.976043701172 41418.0', '112.733383178711 88292.0'],
|
11
|
+
:last_two_ion_lines => ['407.412780761719 18959.0', '781.085327148438 10104.0'],
|
12
|
+
:last_line => 'END IONS',
|
13
|
+
}
|
14
|
+
LAST_MSMS = {
|
15
|
+
:first_lines => ['BEGIN IONS', 'TITLE=000.3748.3748.3.dta', 'CHARGE=3+', 'PEPMASS=433.56494129743004'],
|
16
|
+
:first_two_ion_lines => ['143.466918945312 2110.0', '151.173095703125 4134.0'],
|
17
|
+
:last_two_ion_lines => ['482.678771972656 3357.0', '610.4111328125 8968.0'],
|
18
|
+
:last_line => 'END IONS',
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
# these have been checked against Bioworks .dta output
|
23
|
+
class SRF_TO_DTA_HELPER
|
24
|
+
FIRST_SCAN = {
|
25
|
+
:first_line => '391.045410 1',
|
26
|
+
:first_two_ion_lines => ['111.9760 41418', '112.7334 88292'],
|
27
|
+
:last_two_ion_lines => ['407.4128 18959', '781.0853 10104'],
|
28
|
+
}
|
29
|
+
LAST_SCAN = {
|
30
|
+
:first_line => '1298.680271 3',
|
31
|
+
:first_two_ion_lines => ['143.4669 2110', '151.1731 4134'],
|
32
|
+
:last_two_ion_lines => ['482.6788 3357', '610.4111 8968'],
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
Srf_file = MS::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
37
|
+
TMPDIR = TESTFILES + '/tmp'
|
38
|
+
Mgf_output = TMPDIR + '/000.mgf.tmp'
|
39
|
+
Dta_output = TMPDIR + '/000.dta.tmp'
|
40
|
+
|
41
|
+
shared_examples_for 'an srf to ms2 search converter' do |convert_to_mgf, convert_to_dta|
|
42
|
+
def assert_ion_line_close(expected, actual, delta)
|
43
|
+
expected.split(/\s+/).zip(actual.split(/\s+/)).each do |exp,act|
|
44
|
+
exp.to_f.should be_within(delta).of(act.to_f)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def compare_dtas(key, filename)
|
49
|
+
File.exist?(filename).should be_true
|
50
|
+
lines = IO.read(filename).strip.split("\n")
|
51
|
+
(exp1, act1) = [key[:first_line], lines[0]].map {|l| l.split(/\s+/) }
|
52
|
+
exp1.first.to_f.should be_within(0.000001).of(act1.first.to_f)
|
53
|
+
exp1.last.should == act1.last
|
54
|
+
(key[:first_two_ion_lines] + key[:last_two_ion_lines]).zip(lines[1,2]+lines[-2,2]) do |exp,act|
|
55
|
+
assert_ion_line_close(exp, act, 0.0001)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def compare_mgfs(key, string_chunk)
|
60
|
+
lines = string_chunk.strip.split("\n")
|
61
|
+
key[:first_lines][0,3].should == lines[0,3]
|
62
|
+
(exp_pair, act_pair) = [key[:first_lines][3], lines[3]].map {|line| line.split('=') }
|
63
|
+
exp_pair.first.should == act_pair.first
|
64
|
+
exp_pair.last.to_f.should be_within(0.0000001).of( act_pair.last.to_f )
|
65
|
+
|
66
|
+
(key[:first_two_ion_lines] + key[:last_two_ion_lines]).zip(lines[4,2] + lines[-3,2]).each do |exp_line,act_line|
|
67
|
+
assert_ion_line_close(exp_line, act_line, 0.00000001)
|
68
|
+
end
|
69
|
+
|
70
|
+
key[:last_line].should == lines[-1]
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'converts to mgf' do
|
74
|
+
output = Mgf_output
|
75
|
+
convert_to_mgf.call
|
76
|
+
File.exist?(output).should be_true
|
77
|
+
output = IO.read(output)
|
78
|
+
chunks = output.split("\n\n")
|
79
|
+
|
80
|
+
compare_mgfs(SRF_TO_MGF_HELPER::FIRST_MSMS, chunks.first)
|
81
|
+
compare_mgfs(SRF_TO_MGF_HELPER::LAST_MSMS, chunks.last)
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'generates .dta files' do
|
85
|
+
output = Dta_output
|
86
|
+
convert_to_dta.call
|
87
|
+
File.exist?(output).should be_true
|
88
|
+
File.directory?(output).should be_true
|
89
|
+
# frozen (not verified):
|
90
|
+
Dir[output + "/*.*"].size.should == 3893 # the correct number files
|
91
|
+
|
92
|
+
compare_dtas(SRF_TO_DTA_HELPER::FIRST_SCAN, output + '/000.2.2.1.dta')
|
93
|
+
compare_dtas(SRF_TO_DTA_HELPER::LAST_SCAN, output + '/000.3748.3748.3.dta')
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
describe 'converting an srf to ms2 search format: programmatic' do
|
99
|
+
before do
|
100
|
+
FileUtils.mkdir(TMPDIR) unless File.exist?(TMPDIR)
|
101
|
+
end
|
102
|
+
after do
|
103
|
+
FileUtils.rmtree(TMPDIR)
|
104
|
+
end
|
105
|
+
|
106
|
+
srf = Mspire::Sequest::Srf.new(Srf_file)
|
107
|
+
|
108
|
+
convert_to_mgf = lambda { srf.to_mgf(Mgf_output) }
|
109
|
+
convert_to_dta = lambda { srf.to_dta(Dta_output) }
|
110
|
+
|
111
|
+
it_behaves_like 'an srf to ms2 search converter', convert_to_mgf, convert_to_dta
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
describe 'converting an srf to ms2 search format: commandline' do
|
116
|
+
def self.commandline_lambda(string)
|
117
|
+
lambda { Mspire::Sequest::Srf::Search.commandline(string.split(/\s+/)) }
|
118
|
+
end
|
119
|
+
|
120
|
+
convert_to_mgf = self.commandline_lambda "#{Srf_file} -o #{Mgf_output}"
|
121
|
+
convert_to_dta = self.commandline_lambda "#{Srf_file} -o #{Dta_output} -f dta"
|
122
|
+
|
123
|
+
before(:each) do
|
124
|
+
FileUtils.mkdir(TMPDIR) unless File.exist?(TMPDIR)
|
125
|
+
end
|
126
|
+
after(:each) do
|
127
|
+
FileUtils.rmtree(TMPDIR)
|
128
|
+
end
|
129
|
+
|
130
|
+
it_behaves_like 'an srf to ms2 search converter', convert_to_mgf, convert_to_dta
|
131
|
+
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/sequest/srf'
|
4
|
+
require 'mspire/sequest/srf/sqt'
|
5
|
+
|
6
|
+
SpecHelperHeaderHash = {
|
7
|
+
'SQTGenerator' => 'mspire: ms-sequest',
|
8
|
+
'SQTGeneratorVersion' => String,
|
9
|
+
'Database' => 'C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta',
|
10
|
+
'FragmentMasses' => 'AVG',
|
11
|
+
'PrecursorMasses' => 'AVG',
|
12
|
+
'StartTime' => nil,
|
13
|
+
'Alg-MSModel' => 'LCQ Deca XP',
|
14
|
+
'Alg-PreMassUnits' => 'amu',
|
15
|
+
'DBLocusCount' => '4237',
|
16
|
+
'Alg-FragMassTol' => '1.0000',
|
17
|
+
'Alg-PreMassTol' => '1.4000',
|
18
|
+
'Alg-IonSeries' => '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0',
|
19
|
+
'Alg-Enzyme' => 'Trypsin(KR/P) (2)',
|
20
|
+
'Comment' => ['Created from Bioworks .srf file'],
|
21
|
+
'DynamicMod' => ['STY*=+79.97990', 'M#=+14.02660'],
|
22
|
+
}
|
23
|
+
|
24
|
+
ExpasyStaticMods = ['C=160.1901','Cterm=10.1230','E=161.4455']
|
25
|
+
MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
|
26
|
+
SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
|
27
|
+
|
28
|
+
# these only need to be really close
|
29
|
+
Close_indices = {
|
30
|
+
'S' => [6,7],
|
31
|
+
'M' => [3,4,5,6],
|
32
|
+
}
|
33
|
+
|
34
|
+
SpecHelperOtherLines =<<END
|
35
|
+
S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
|
36
|
+
S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
|
37
|
+
M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
|
38
|
+
L gi|16128266|ref|NP_414815.1|
|
39
|
+
END
|
40
|
+
|
41
|
+
SpecHelperOtherLinesEnd =<<END
|
42
|
+
L gi|90111093|ref|NP_414704.4|
|
43
|
+
M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12 54 K.LQKIITNSY*K U
|
44
|
+
L gi|90111124|ref|NP_414904.2|
|
45
|
+
END
|
46
|
+
|
47
|
+
|
48
|
+
module SPEC
|
49
|
+
Srf_file = MS::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
50
|
+
TMPDIR = TESTFILES + '/tmp'
|
51
|
+
Srf_output = TMPDIR + '/000.sqt.tmp'
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
# {
|
56
|
+
# :lambdas => { :basic_conversion, :with_new_db_path, :update_the_db_path }
|
57
|
+
# :original_db_filename = String
|
58
|
+
# # "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
59
|
+
# :output => String # SPEC::Srf_output
|
60
|
+
# }
|
61
|
+
|
62
|
+
shared_examples_for 'an srf to sqt converter' do |opts|
|
63
|
+
|
64
|
+
# returns true or false
|
65
|
+
def header_hash_match(header_lines, hash)
|
66
|
+
header_lines.all? do |line|
|
67
|
+
(h, k, v) = line.chomp.split("\t")
|
68
|
+
if hash[k].is_a? Array
|
69
|
+
if hash[k].include?(v)
|
70
|
+
true
|
71
|
+
else
|
72
|
+
puts "FAILED: "
|
73
|
+
p k
|
74
|
+
p v
|
75
|
+
p hash[k]
|
76
|
+
false
|
77
|
+
end
|
78
|
+
elsif hash[k] == String
|
79
|
+
v.is_a?(String)
|
80
|
+
else
|
81
|
+
if v == hash[k]
|
82
|
+
true
|
83
|
+
else
|
84
|
+
puts "FAILED: "
|
85
|
+
p k
|
86
|
+
p v
|
87
|
+
p hash[k]
|
88
|
+
false
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def sqt_line_match(act_line_ar, exp_line_ar)
|
95
|
+
exp_line_ar.zip(act_line_ar) do |exp_line, act_line|
|
96
|
+
(e_pieces, a_pieces) = [exp_line, act_line].map {|line| line.chomp.split("\t") }
|
97
|
+
if %w(S M).include?(k = e_pieces[0])
|
98
|
+
(e_close, a_close) = [e_pieces, a_pieces].map do |pieces|
|
99
|
+
Close_indices[k].sort.reverse.map do |i|
|
100
|
+
pieces.delete_at(i).to_f
|
101
|
+
end.reverse
|
102
|
+
end
|
103
|
+
e_close.zip(a_close) do |ex, ac|
|
104
|
+
ex.should be_within(0.0000001).of( ac )
|
105
|
+
end
|
106
|
+
end
|
107
|
+
e_pieces.should == a_pieces
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'converts without bothering with the database' do
|
112
|
+
opts[:lambdas][:basic_conversion].call
|
113
|
+
File.exist?(opts[:output]).should be_true
|
114
|
+
lines = File.readlines(opts[:output])
|
115
|
+
lines.size.should == 80910
|
116
|
+
header_lines = lines.grep(/^H/)
|
117
|
+
(header_lines.size > 10).should be_true
|
118
|
+
header_hash_match(header_lines, SpecHelperHeaderHash).should be_true
|
119
|
+
other_lines = lines.grep(/^[^H]/)
|
120
|
+
|
121
|
+
sqt_line_match(other_lines[0,4], SpecHelperOtherLines.strip.split("\n"))
|
122
|
+
sqt_line_match(other_lines[-3,3], SpecHelperOtherLinesEnd.strip.split("\n"))
|
123
|
+
|
124
|
+
File.unlink(opts[:output]) rescue false
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'can get db info with correct path' do
|
128
|
+
opts[:lambdas][:with_new_db_path].call
|
129
|
+
File.exist?(opts[:output]).should be_true
|
130
|
+
lines = IO.readlines(opts[:output])
|
131
|
+
has_md5 = lines.any? do |line|
|
132
|
+
line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
|
133
|
+
end
|
134
|
+
has_md5.should be_true
|
135
|
+
|
136
|
+
has_seq_len = lines.any? do |line|
|
137
|
+
# frozen
|
138
|
+
line =~ /DBSeqLength\s+1342842/
|
139
|
+
end
|
140
|
+
has_seq_len.should be_true
|
141
|
+
lines.size.should == 80912
|
142
|
+
File.unlink(opts[:output]) rescue false
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'can update the Database' do
|
146
|
+
opts[:lambdas][:update_the_db_path].call
|
147
|
+
regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
|
148
|
+
updated_db = IO.readlines(opts[:output]).any? do |line|
|
149
|
+
line =~ regexp
|
150
|
+
end
|
151
|
+
updated_db.should be_true
|
152
|
+
File.unlink(opts[:output]) rescue false
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "programmatic interface srf to sqt" do
|
158
|
+
|
159
|
+
srf = Mspire::Sequest::Srf.new(SPEC::Srf_file)
|
160
|
+
|
161
|
+
shared_hash = {
|
162
|
+
:lambdas => {
|
163
|
+
basic_conversion: lambda { srf.to_sqt(SPEC::Srf_output) },
|
164
|
+
with_new_db_path: lambda { srf.to_sqt(SPEC::Srf_output, :db_info => true, :new_db_path => MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') },
|
165
|
+
update_the_db_path: lambda { srf.to_sqt(SPEC::Srf_output, :new_db_path => MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) },
|
166
|
+
},
|
167
|
+
output: SPEC::Srf_output,
|
168
|
+
mkdir: SPEC::TMPDIR,
|
169
|
+
original_db_filename: "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
170
|
+
}
|
171
|
+
|
172
|
+
it_behaves_like "an srf to sqt converter", shared_hash
|
173
|
+
|
174
|
+
before(:each) do
|
175
|
+
FileUtils.mkdir(SPEC::TMPDIR) unless File.exist?(SPEC::TMPDIR)
|
176
|
+
end
|
177
|
+
after(:each) do
|
178
|
+
FileUtils.rm_rf(SPEC::TMPDIR)
|
179
|
+
end
|
180
|
+
|
181
|
+
# this requires programmatic interface to manipulate the object for this
|
182
|
+
# test
|
183
|
+
it 'warns if the db path is incorrect and we want to update db info' do
|
184
|
+
output = shared_hash[:output]
|
185
|
+
# requires some knowledge of how the database file is extracted
|
186
|
+
# internally
|
187
|
+
wacky_path = '/not/a/real/path/wacky.fasta'
|
188
|
+
|
189
|
+
srf.header.db_filename = wacky_path
|
190
|
+
my_error_string = ''
|
191
|
+
StringIO.open(my_error_string, 'w') do |strio|
|
192
|
+
$stderr = strio
|
193
|
+
srf.to_sqt(output, :db_info => true)
|
194
|
+
end
|
195
|
+
my_error_string.include?(wacky_path).should be_true
|
196
|
+
srf.header.db_filename = shared_hash[:original_db_filename]
|
197
|
+
$stderr = STDERR
|
198
|
+
File.exists?(output).should be_true
|
199
|
+
IO.readlines(output).size.should == 80910
|
200
|
+
File.delete(output) rescue false
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
describe "command-line interface srf to sqt" do
|
205
|
+
before(:each) do
|
206
|
+
FileUtils.mkdir(SPEC::TMPDIR) unless File.exist?(SPEC::TMPDIR)
|
207
|
+
end
|
208
|
+
after(:each) do
|
209
|
+
FileUtils.rm_rf(SPEC::TMPDIR)
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.commandline_lambda(string)
|
213
|
+
lambda { Mspire::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
|
214
|
+
end
|
215
|
+
|
216
|
+
base_cmd = "#{SPEC::Srf_file} -o #{SPEC::Srf_output}"
|
217
|
+
shared_hash = {
|
218
|
+
lambdas: {
|
219
|
+
basic_conversion: self.commandline_lambda(base_cmd),
|
220
|
+
with_new_db_path: self.commandline_lambda(base_cmd + " --db-info --db-path #{MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}"),
|
221
|
+
update_the_db_path: self.commandline_lambda(base_cmd + " --db-path #{MS::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" ),
|
222
|
+
},
|
223
|
+
output: SPEC::Srf_output,
|
224
|
+
mkdir: SPEC::TMPDIR,
|
225
|
+
}
|
226
|
+
|
227
|
+
it_behaves_like "an srf to sqt converter", shared_hash
|
228
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mspire/sequest/srf_spec_helper' # in spec/
|
3
|
+
|
4
|
+
require 'mspire/sequest/srf'
|
5
|
+
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
include SRFHelper
|
9
|
+
|
10
|
+
class Hash
|
11
|
+
def object_match(obj)
|
12
|
+
self.all? do |k,v|
|
13
|
+
k = k.to_sym
|
14
|
+
retval =
|
15
|
+
if k == :peaks or k == :hits or k == :proteins
|
16
|
+
obj.send(k).size == v
|
17
|
+
elsif v.class == Float
|
18
|
+
delta =
|
19
|
+
if k == :ppm ; 0.0001
|
20
|
+
else ; 0.0000001
|
21
|
+
end
|
22
|
+
(v - obj.send(k)).abs <= delta
|
23
|
+
else
|
24
|
+
obj.send(k) == v
|
25
|
+
end
|
26
|
+
if retval == false
|
27
|
+
puts "BAD KEY: #{k}"
|
28
|
+
puts "need: #{v}"
|
29
|
+
puts "got: #{obj.send(k)}"
|
30
|
+
end
|
31
|
+
retval
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
shared_examples_for 'an srf reader' do |srf_obj, test_hash|
|
37
|
+
|
38
|
+
it 'retrieves correct header info' do
|
39
|
+
test_hash[:header].object_match(srf_obj.header).should be_true
|
40
|
+
test_hash[:dta_gen].object_match(srf_obj.header.dta_gen).should be_true
|
41
|
+
end
|
42
|
+
|
43
|
+
# a few more dta params could be added in here:
|
44
|
+
it 'retrieves correct dta files' do
|
45
|
+
test_hash[:dta_files_first].object_match(srf_obj.dta_files.first).should be_true
|
46
|
+
test_hash[:dta_files_last].object_match(srf_obj.dta_files.last).should be_true
|
47
|
+
end
|
48
|
+
|
49
|
+
# given an array of out_file objects, returns the first set of hits
|
50
|
+
def get_first_peps(out_files)
|
51
|
+
out_files.each do |outf|
|
52
|
+
if outf.num_hits > 0
|
53
|
+
return outf.hits
|
54
|
+
end
|
55
|
+
end
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'retrieves correct out files' do
|
60
|
+
test_hash[:out_files_first].object_match(srf_obj.out_files.first).should be_true
|
61
|
+
test_hash[:out_files_last].object_match(srf_obj.out_files.last).should be_true
|
62
|
+
# first available peptide hit
|
63
|
+
test_hash[:out_files_first_pep].object_match(get_first_peps(srf_obj.out_files).first).should be_true
|
64
|
+
# last available peptide hit
|
65
|
+
test_hash[:out_files_last_pep].object_match(get_first_peps(srf_obj.out_files.reverse).last).should be_true
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'retrieves correct params' do
|
69
|
+
test_hash[:params].object_match(srf_obj.params).should be_true
|
70
|
+
end
|
71
|
+
|
72
|
+
# TODO:
|
73
|
+
#it_should 'retrieve probabilities if available'
|
74
|
+
end
|
75
|
+
|
76
|
+
# TODO:, we should try to get some tests with sf values present!
|
77
|
+
|
78
|
+
|
79
|
+
Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
|
80
|
+
|
81
|
+
To_run = {
|
82
|
+
'3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
|
83
|
+
'3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
|
84
|
+
'3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
|
85
|
+
}
|
86
|
+
|
87
|
+
# I had these nicely combined under RSpec, but this is not as obvious a task
|
88
|
+
# under minispec given the corrupted include behavior...
|
89
|
+
|
90
|
+
describe 'reading srf with duplicate refs v3.2' do
|
91
|
+
|
92
|
+
info = To_run['3.2']
|
93
|
+
file = MS::TESTDATA + '/sequest' + info[:file]
|
94
|
+
srf_obj = Mspire::Sequest::Srf.new(file)
|
95
|
+
|
96
|
+
it_behaves_like 'an srf reader', srf_obj, info[:hash]
|
97
|
+
end
|
98
|
+
|
99
|
+
describe 'reading srf with duplicate refs v3.3' do
|
100
|
+
info = To_run['3.3']
|
101
|
+
file = MS::TESTDATA + '/sequest' + info[:file]
|
102
|
+
srf_obj = Mspire::Sequest::Srf.new(file)
|
103
|
+
|
104
|
+
it_behaves_like 'an srf reader', srf_obj, info[:hash]
|
105
|
+
end
|
106
|
+
|
107
|
+
describe 'reading srf with duplicate refs v3.3.1' do
|
108
|
+
info = To_run['3.3.1']
|
109
|
+
file = MS::TESTDATA + '/sequest' + info[:file]
|
110
|
+
srf_obj = Mspire::Sequest::Srf.new(file)
|
111
|
+
|
112
|
+
it_behaves_like 'an srf reader', srf_obj, info[:hash]
|
113
|
+
end
|