ms-sequest 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,142 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
2
+
3
+ require 'ms/sequest/srf'
4
+ require 'ms/sequest/srf/sqt'
5
+
6
+ SpecHelperHeaderHash = {
7
+ 'SQTGenerator' => 'mspire: ms-sequest',
8
+ 'SQTGeneratorVersion' => String,
9
+ 'Database' => 'C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta',
10
+ 'FragmentMasses' => 'AVG',
11
+ 'PrecursorMasses' => 'AVG',
12
+ 'StartTime' => nil,
13
+ 'Alg-MSModel' => 'LCQ Deca XP',
14
+ 'Alg-PreMassUnits' => 'amu',
15
+ 'DBLocusCount' => '4237',
16
+ 'Alg-FragMassTol' => '1.0000',
17
+ 'Alg-PreMassTol' => '1.4000',
18
+ 'Alg-IonSeries' => '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0',
19
+ 'Alg-Enzyme' => 'Trypsin(KR/P) (2)',
20
+ 'Comment' => ['Created from Bioworks .srf file'],
21
+ 'DynamicMod' => ['STY*=+79.97990', 'M#=+14.02660'],
22
+ }
23
+
24
+ ExpasyStaticMods = ['C=160.1901','Cterm=10.1230','E=161.4455']
25
+ MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
26
+ SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
27
+
28
+
29
+ SpecHelperOtherLines =<<END
30
+ S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
31
+ S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
32
+ M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
33
+ L gi|16128266|ref|NP_414815.1|
34
+ END
35
+
36
+ SpecHelperOtherLinesEnd =<<END
37
+ L gi|90111093|ref|NP_414704.4|
38
+ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12 54 K.LQKIITNSY*K U
39
+ L gi|90111124|ref|NP_414904.2|
40
+ END
41
+
42
+ describe 'converting a large srf to sqt' do
43
+
44
+ @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
45
+ @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
46
+ @srf = Ms::Sequest::Srf.new(@file)
47
+ @original_db_filename = @srf.header.db_filename
48
+
49
+ def del(file)
50
+ if File.exist?(file)
51
+ File.unlink(file)
52
+ end
53
+ end
54
+
55
+ # returns true or false
56
+ def header_hash_match(header_lines, hash)
57
+ header_lines.all? do |line|
58
+ (h, k, v) = line.chomp.split("\t")
59
+ if hash[k].is_a? Array
60
+ if hash[k].include?(v)
61
+ true
62
+ else
63
+ puts "FAILED: "
64
+ p k
65
+ p v
66
+ p hash[k]
67
+ false
68
+ end
69
+ elsif hash[k] == String
70
+ v.is_a?(String)
71
+ else
72
+ if v == hash[k]
73
+ true
74
+ else
75
+ puts "FAILED: "
76
+ p k
77
+ p v
78
+ p hash[k]
79
+ false
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ it 'converts without bothering with the database' do
86
+ @srf.to_sqt(@output)
87
+ ok File.exist?(@output)
88
+ lines = File.readlines(@output)
89
+ lines.size.is 80910
90
+ header_lines = lines.grep(/^H/)
91
+ ok(header_lines.size > 10)
92
+ ok header_hash_match(header_lines, SpecHelperHeaderHash)
93
+ other_lines = lines.grep(/^[^H]/)
94
+ other_lines[0,4].join('').is SpecHelperOtherLines
95
+ other_lines[-3,3].join('').is SpecHelperOtherLinesEnd
96
+ del(@output)
97
+ end
98
+
99
+ it 'warns if the db path is incorrect and we want to update db info' do
100
+ # requires some knowledge of how the database file is extracted
101
+ # internally
102
+ wacky_path = '/not/a/real/path/wacky.fasta'
103
+ @srf.header.db_filename = wacky_path
104
+ my_error_string = ''
105
+ StringIO.open(my_error_string, 'w') do |strio|
106
+ $stderr = strio
107
+ @srf.to_sqt(@output, :db_info => true)
108
+ end
109
+ ok my_error_string.include?(wacky_path)
110
+ @srf.header.db_filename = @original_db_filename
111
+ $stderr = STDERR
112
+ ok File.exists?(@output)
113
+ IO.readlines(@output).size.is 80910
114
+ del(@output)
115
+ end
116
+ it 'can get db info with correct path' do
117
+ @srf.to_sqt(@output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33')
118
+ ok File.exist?(@output)
119
+ lines = IO.readlines(@output)
120
+ has_md5 = lines.any? do |line|
121
+ line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
122
+ end
123
+ ok has_md5
124
+
125
+ has_seq_len = lines.any? do |line|
126
+ # frozen
127
+ line =~ /DBSeqLength\s+1342842/
128
+ end
129
+ ok has_seq_len
130
+ lines.size.is 80912
131
+ del(@output)
132
+ end
133
+ it 'can update the Database' do
134
+ @srf.to_sqt(@output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true)
135
+ regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
136
+ updated_db = IO.readlines(@output).any? do |line|
137
+ line =~ regexp
138
+ end
139
+ ok updated_db
140
+ del(@output)
141
+ end
142
+ end
@@ -0,0 +1,182 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+ require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
3
+
4
+ require 'ms/sequest/srf'
5
+
6
+ require 'fileutils'
7
+
8
+ include SRFHelper
9
+
10
+ class Hash
11
+ def object_match(obj)
12
+ self.all? do |k,v|
13
+ k = k.to_sym
14
+ retval =
15
+ if k == :peaks or k == :hits or k == :prots
16
+ obj.send(k).size == v
17
+ elsif v.class == Float
18
+ delta =
19
+ if k == :ppm ; 0.0001
20
+ else ; 0.0000001
21
+ end
22
+ (v - obj.send(k)).abs <= delta
23
+ else
24
+ obj.send(k) == v
25
+ end
26
+ if retval == false
27
+ puts "BAD KEY: #{k}"
28
+ puts "need: #{v}"
29
+ puts "got: #{obj.send(k)}"
30
+ end
31
+ retval
32
+ end
33
+ end
34
+ end
35
+
36
+
37
+ shared 'an srf reader' do
38
+
39
+ it 'retrieves correct header info' do
40
+ ok @header.object_match(@srf_obj.header)
41
+ ok @dta_gen.object_match(@srf_obj.header.dta_gen)
42
+ end
43
+
44
+ # a few more dta params could be added in here:
45
+ it 'retrieves correct dta files' do
46
+ ok @dta_files_first.object_match(@srf_obj.dta_files.first)
47
+ ok @dta_files_last.object_match(@srf_obj.dta_files.last)
48
+ end
49
+
50
+ # given an array of out_file objects, returns the first set of hits
51
+ def get_first_peps(out_files)
52
+ out_files.each do |outf|
53
+ if outf.num_hits > 0
54
+ return outf.hits
55
+ end
56
+ end
57
+ return nil
58
+ end
59
+
60
+ it 'retrieves correct out files' do
61
+ ok @out_files_first.object_match(@srf_obj.out_files.first)
62
+ ok @out_files_last.object_match(@srf_obj.out_files.last)
63
+ # first available peptide hit
64
+ ok @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first)
65
+ # last available peptide hit
66
+ ok @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last)
67
+ end
68
+
69
+ it 'retrieves correct params' do
70
+ ok @params.object_match(@srf_obj.params)
71
+ end
72
+
73
+ # TODO:
74
+ #it_should 'retrieve probabilities if available'
75
+ end
76
+
77
+ # TODO:, we should try to get some tests with sf values present!
78
+
79
+
80
+ Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
81
+
82
+ To_run = {
83
+ '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
84
+ '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
85
+ '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
86
+ }
87
+
88
+ # I had these nicely combined under RSpec, but this is not as obvious a task
89
+ # under minispec given the corrupted include behavior...
90
+
91
+ describe 'reading srf with duplicate refs v3.2' do
92
+
93
+ info = To_run['3.2']
94
+ @file = Ms::TESTDATA + '/sequest' + info[:file]
95
+ @srf_obj = Ms::Sequest::Srf.new(@file)
96
+ Expected_hash_keys.each do |c|
97
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
98
+ end
99
+
100
+ behaves_like 'an srf reader'
101
+ end
102
+
103
+ describe 'reading srf with duplicate refs v3.3' do
104
+ info = To_run['3.3']
105
+ @file = Ms::TESTDATA + '/sequest' + info[:file]
106
+ @srf_obj = Ms::Sequest::Srf.new(@file)
107
+ Expected_hash_keys.each do |c|
108
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
109
+ end
110
+
111
+ behaves_like 'an srf reader'
112
+ end
113
+
114
+ describe 'reading srf with duplicate refs v3.3.1' do
115
+ info = To_run['3.3.1']
116
+ @file = Ms::TESTDATA + '/sequest' + info[:file]
117
+ @srf_obj = Ms::Sequest::Srf.new(@file)
118
+ Expected_hash_keys.each do |c|
119
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
120
+ end
121
+ behaves_like 'an srf reader'
122
+ end
123
+
124
+ #class SRFReadingACorruptedFile < MiniTest::Spec
125
+
126
+ # it 'reads a file from an aborted run w/o failing, but gives warning msg' do
127
+ # srf_file = TESTFILES + '/corrupted_900.srf'
128
+ # message = capture_stderr do
129
+ # srf_obj = Ms::Sequest::Srf.new(srf_file)
130
+ # srf_obj.base_name.is '900'
131
+ # srf_obj.params.is nil
132
+ # header = srf_obj.header
133
+ # header.db_filename.is "C:\\Xcalibur\\database\\sf_hs_44_36f_longesttrpt.fasta.hdr"
134
+ # header.enzyme.is 'Enzyme:Trypsin(KR) (2)'
135
+ # dta_gen = header.dta_gen
136
+ # dta_gen.start_time.must_be_close_to(1.39999997615814, 0.00000000001)
137
+ # srf_obj.dta_files.is []
138
+ # srf_obj.out_files.is []
139
+ # end
140
+ # message.must_match(/no SEQUEST/i)
141
+ # end
142
+ #end
143
+
144
+ #class SRFGroupCreatingAnSrg < MiniTest::Spec
145
+ #it 'creates one given some non-existing, relative filenames' do
146
+ ### TEST SRG GROUPING:
147
+ #filenames = %w(my/lucky/filename /another/filename)
148
+ #@srg = SRFGroup.new
149
+ #@srg.filenames = filenames
150
+ #srg_file = TESTFILES + '/tmp_srg_file.srg'
151
+ #begin
152
+ #@srg.to_srg(srg_file)
153
+ #ok File.exist?(srg_file)
154
+ #ensure
155
+ #File.unlink(srg_file)
156
+ #end
157
+ #end
158
+ #end
159
+
160
+
161
+ ## @TODO: this test needs to be created for a small mock dataset!!
162
+ #describe SRF, 'creating dta files' do
163
+ #spec_large do
164
+ #before(:all) do
165
+ #file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
166
+ #@srf = SRF.new(file)
167
+ #end
168
+
169
+ #it 'creates dta files' do
170
+ #@srf.to_dta
171
+ #ok File.exist?('020')
172
+ #ok File.directory?('020')
173
+ #ok File.exist?('020/020.3366.3366.2.dta')
174
+ #lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
175
+ #lines.first.is "1113.106493 2\r\n"
176
+ #lines[1].is "164.5659 4817\r\n"
177
+
178
+ #FileUtils.rm_rf '020'
179
+ #end
180
+ #end
181
+
182
+ #end
@@ -0,0 +1,172 @@
1
+
2
+ class Hash
3
+ def dup_hash
4
+ new_hash = {}
5
+ self.each do |k,v|
6
+ new_val = if v.is_a?(Fixnum) || v.is_a?(Symbol) ||v.is_a?(Float)
7
+ v
8
+ else
9
+ v.dup
10
+ end
11
+ new_hash[k] = new_val
12
+ end
13
+ new_hash
14
+ end
15
+ end
16
+
17
+
18
+
19
+ module SRFHelper
20
+
21
+
22
+ File_32 = {
23
+ :header =>
24
+ {
25
+ :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
26
+ :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
27
+ :modifications=>"(M* +15.99940) (STY# +79.97990)",
28
+ :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
29
+ :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
30
+ :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
31
+ :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
32
+ :version=>"3.2",
33
+ :model=>"LCQ Deca XP",
34
+ :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
35
+ },
36
+ :dta_gen => {
37
+ :min_group_count => 1,
38
+ :start_time => 1.5,
39
+ :start_mass => 300.0,
40
+ :end_scan => 3620,
41
+ :group_scan => 1,
42
+ :start_scan => 1,
43
+ :num_dta_files => 3747,
44
+ :min_ion_threshold => 15,
45
+ :end_mass => 4500.0,
46
+ },
47
+ :dta_files_first => {
48
+
49
+ :mh=>390.92919921875,
50
+ :dta_tic=>9041311.0,
51
+ :num_peaks=>48,
52
+ :charge=>1,
53
+ :ms_level=>2,
54
+ :total_num_possible_charge_states=>0,
55
+ },
56
+ :dta_files_last => {
57
+ :dta_tic=>842424.0,
58
+ :mh=>357.041198730469,
59
+ :num_peaks=>78,
60
+ :ms_level=>2,
61
+ :charge=>1,
62
+ :total_num_possible_charge_states=>0,
63
+ },
64
+ :out_files_first => {
65
+ :num_hits => 0,
66
+ :computer => 'VELA',
67
+ :date_time => '05/06/2008, 02:08 PM,',
68
+ :hits => 0,
69
+ },
70
+ :out_files_last => {
71
+ :num_hits => 0,
72
+ :computer => 'VELA',
73
+ :date_time => '05/06/2008, 02:11 PM,',
74
+ :hits => 0,
75
+ },
76
+ :out_files_first_pep => {
77
+ :aaseq=>"YRLGGSTK",
78
+ :sequence=>"R.Y#RLGGS#T#K.K",
79
+ :mh=>1121.9390244522,
80
+ :deltacn_orig=>0.0,
81
+ :sp=>29.8529319763184,
82
+ :sf=>0.0,
83
+ :xcorr=>0.123464643955231,
84
+ :id=>2104,
85
+ :rsp=>1,
86
+ :ions_matched=>5,
87
+ :ions_total=>35,
88
+ :prots=>1,
89
+ :deltamass=>-0.00579976654989878,
90
+ :ppm=>5.16938660859491,
91
+ :base_name=>"020",
92
+ :first_scan=>3,
93
+ :last_scan=>3,
94
+ :charge=>1,
95
+ :deltacn=>0.795928299427032,
96
+ :base_name=>"020",
97
+ },
98
+ :out_files_last_pep =>
99
+ {
100
+ :aaseq=>"LLPGTARTMRR",
101
+ :sequence=>"R.LLPGTARTMRR.M",
102
+ :mh=>1272.5493424522,
103
+ :deltacn_orig=>0.835508584976196,
104
+ :deltacn=>1.1,
105
+ :sp=>57.9885787963867,
106
+ :sf=>0.0,
107
+ :xcorr=>0.109200321137905,
108
+ :id=>1361,
109
+ :rsp=>11,
110
+ :ions_matched=>6,
111
+ :ions_total=>40,
112
+ :prots=>1,
113
+ :deltamass=>0.00243330985608736,
114
+ :ppm=>1.91215729542523,
115
+ :base_name=>"020",
116
+ :first_scan=>3619,
117
+ :last_scan=>3619,
118
+ :charge=>3,
119
+ :deltacn=>1.1,
120
+ :base_name=>"020",
121
+ },
122
+
123
+ :params => {
124
+ "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
125
+ }
126
+ }
127
+
128
+ File_33 = {}
129
+ File_32.each do |k,v|
130
+ File_33[k] = v.dup_hash
131
+ end
132
+
133
+ ## Bioworks 3.3 (srf version 3.3)
134
+ File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
135
+ File_33[:header][:version] = "3.3"
136
+
137
+ File_33[:out_files_first][:computer] = 'TESLA'
138
+ File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
139
+ File_33[:out_files_last][:computer] = 'TESLA'
140
+ File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
141
+
142
+ File_33[:out_files_first_pep][:sp] = 29.8535556793213
143
+ File_33[:out_files_last_pep][:sp] = 57.987476348877
144
+ File_33[:out_files_first_pep][:sf] = 0.0
145
+ File_33[:out_files_last_pep][:sf] = 0.0
146
+ File_33[:out_files_last_pep][:rsp] = 10
147
+ File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
148
+
149
+
150
+ ## Bioworks 3.3.1 (srf version 3.5)
151
+ File_331 = {}
152
+
153
+
154
+ File_33.each do |k,v|
155
+ File_331[k] = v.dup_hash
156
+ end
157
+
158
+ File_331[:params] = File_33[:params].dup_hash
159
+
160
+ adjust_keys = File_331[:params].keys.select {|k| k =~ /^add/ }.push(*%w(ion_cutoff_percentage peptide_mass_tolerance match_peak_tolerance fragment_ion_tolerance) )
161
+ adjust_keys.each do |key|
162
+ File_331[:params][key] = File_331[:params][key] << "0"
163
+ end
164
+
165
+ File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
166
+ File_331[:header][:version] = "3.5"
167
+ File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
168
+ File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
169
+
170
+ end
171
+
172
+