ms-sequest 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
2
+
3
+ require 'ms/sequest/srf'
4
+ require 'ms/sequest/srf/sqt'
5
+
6
+ SpecHelperHeaderHash = {
7
+ 'SQTGenerator' => 'mspire: ms-sequest',
8
+ 'SQTGeneratorVersion' => String,
9
+ 'Database' => 'C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta',
10
+ 'FragmentMasses' => 'AVG',
11
+ 'PrecursorMasses' => 'AVG',
12
+ 'StartTime' => nil,
13
+ 'Alg-MSModel' => 'LCQ Deca XP',
14
+ 'Alg-PreMassUnits' => 'amu',
15
+ 'DBLocusCount' => '4237',
16
+ 'Alg-FragMassTol' => '1.0000',
17
+ 'Alg-PreMassTol' => '1.4000',
18
+ 'Alg-IonSeries' => '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0',
19
+ 'Alg-Enzyme' => 'Trypsin(KR/P) (2)',
20
+ 'Comment' => ['Created from Bioworks .srf file'],
21
+ 'DynamicMod' => ['STY*=+79.97990', 'M#=+14.02660'],
22
+ }
23
+
24
+ ExpasyStaticMods = ['C=160.1901','Cterm=10.1230','E=161.4455']
25
+ MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
26
+ SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
27
+
28
+
29
+ SpecHelperOtherLines =<<END
30
+ S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
31
+ S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
32
+ M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
33
+ L gi|16128266|ref|NP_414815.1|
34
+ END
35
+
36
+ SpecHelperOtherLinesEnd =<<END
37
+ L gi|90111093|ref|NP_414704.4|
38
+ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12 54 K.LQKIITNSY*K U
39
+ L gi|90111124|ref|NP_414904.2|
40
+ END
41
+
42
+ describe 'converting a large srf to sqt' do
43
+
44
+ @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
45
+ @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
46
+ @srf = Ms::Sequest::Srf.new(@file)
47
+ @original_db_filename = @srf.header.db_filename
48
+
49
+ def del(file)
50
+ if File.exist?(file)
51
+ File.unlink(file)
52
+ end
53
+ end
54
+
55
+ # returns true or false
56
+ def header_hash_match(header_lines, hash)
57
+ header_lines.all? do |line|
58
+ (h, k, v) = line.chomp.split("\t")
59
+ if hash[k].is_a? Array
60
+ if hash[k].include?(v)
61
+ true
62
+ else
63
+ puts "FAILED: "
64
+ p k
65
+ p v
66
+ p hash[k]
67
+ false
68
+ end
69
+ elsif hash[k] == String
70
+ v.is_a?(String)
71
+ else
72
+ if v == hash[k]
73
+ true
74
+ else
75
+ puts "FAILED: "
76
+ p k
77
+ p v
78
+ p hash[k]
79
+ false
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ it 'converts without bothering with the database' do
86
+ @srf.to_sqt(@output)
87
+ ok File.exist?(@output)
88
+ lines = File.readlines(@output)
89
+ lines.size.is 80910
90
+ header_lines = lines.grep(/^H/)
91
+ ok(header_lines.size > 10)
92
+ ok header_hash_match(header_lines, SpecHelperHeaderHash)
93
+ other_lines = lines.grep(/^[^H]/)
94
+ other_lines[0,4].join('').is SpecHelperOtherLines
95
+ other_lines[-3,3].join('').is SpecHelperOtherLinesEnd
96
+ del(@output)
97
+ end
98
+
99
+ it 'warns if the db path is incorrect and we want to update db info' do
100
+ # requires some knowledge of how the database file is extracted
101
+ # internally
102
+ wacky_path = '/not/a/real/path/wacky.fasta'
103
+ @srf.header.db_filename = wacky_path
104
+ my_error_string = ''
105
+ StringIO.open(my_error_string, 'w') do |strio|
106
+ $stderr = strio
107
+ @srf.to_sqt(@output, :db_info => true)
108
+ end
109
+ ok my_error_string.include?(wacky_path)
110
+ @srf.header.db_filename = @original_db_filename
111
+ $stderr = STDERR
112
+ ok File.exists?(@output)
113
+ IO.readlines(@output).size.is 80910
114
+ del(@output)
115
+ end
116
+ it 'can get db info with correct path' do
117
+ @srf.to_sqt(@output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33')
118
+ ok File.exist?(@output)
119
+ lines = IO.readlines(@output)
120
+ has_md5 = lines.any? do |line|
121
+ line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
122
+ end
123
+ ok has_md5
124
+
125
+ has_seq_len = lines.any? do |line|
126
+ # frozen
127
+ line =~ /DBSeqLength\s+1342842/
128
+ end
129
+ ok has_seq_len
130
+ lines.size.is 80912
131
+ del(@output)
132
+ end
133
+ it 'can update the Database' do
134
+ @srf.to_sqt(@output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true)
135
+ regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
136
+ updated_db = IO.readlines(@output).any? do |line|
137
+ line =~ regexp
138
+ end
139
+ ok updated_db
140
+ del(@output)
141
+ end
142
+ end
@@ -0,0 +1,182 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+ require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
3
+
4
+ require 'ms/sequest/srf'
5
+
6
+ require 'fileutils'
7
+
8
+ include SRFHelper
9
+
10
+ class Hash
11
+ def object_match(obj)
12
+ self.all? do |k,v|
13
+ k = k.to_sym
14
+ retval =
15
+ if k == :peaks or k == :hits or k == :prots
16
+ obj.send(k).size == v
17
+ elsif v.class == Float
18
+ delta =
19
+ if k == :ppm ; 0.0001
20
+ else ; 0.0000001
21
+ end
22
+ (v - obj.send(k)).abs <= delta
23
+ else
24
+ obj.send(k) == v
25
+ end
26
+ if retval == false
27
+ puts "BAD KEY: #{k}"
28
+ puts "need: #{v}"
29
+ puts "got: #{obj.send(k)}"
30
+ end
31
+ retval
32
+ end
33
+ end
34
+ end
35
+
36
+
37
+ shared 'an srf reader' do
38
+
39
+ it 'retrieves correct header info' do
40
+ ok @header.object_match(@srf_obj.header)
41
+ ok @dta_gen.object_match(@srf_obj.header.dta_gen)
42
+ end
43
+
44
+ # a few more dta params could be added in here:
45
+ it 'retrieves correct dta files' do
46
+ ok @dta_files_first.object_match(@srf_obj.dta_files.first)
47
+ ok @dta_files_last.object_match(@srf_obj.dta_files.last)
48
+ end
49
+
50
+ # given an array of out_file objects, returns the first set of hits
51
+ def get_first_peps(out_files)
52
+ out_files.each do |outf|
53
+ if outf.num_hits > 0
54
+ return outf.hits
55
+ end
56
+ end
57
+ return nil
58
+ end
59
+
60
+ it 'retrieves correct out files' do
61
+ ok @out_files_first.object_match(@srf_obj.out_files.first)
62
+ ok @out_files_last.object_match(@srf_obj.out_files.last)
63
+ # first available peptide hit
64
+ ok @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first)
65
+ # last available peptide hit
66
+ ok @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last)
67
+ end
68
+
69
+ it 'retrieves correct params' do
70
+ ok @params.object_match(@srf_obj.params)
71
+ end
72
+
73
+ # TODO:
74
+ #it_should 'retrieve probabilities if available'
75
+ end
76
+
77
+ # TODO:, we should try to get some tests with sf values present!
78
+
79
+
80
+ Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
81
+
82
+ To_run = {
83
+ '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
84
+ '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
85
+ '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
86
+ }
87
+
88
+ # I had these nicely combined under RSpec, but this is not as obvious a task
89
+ # under minispec given the corrupted include behavior...
90
+
91
+ describe 'reading srf with duplicate refs v3.2' do
92
+
93
+ info = To_run['3.2']
94
+ @file = Ms::TESTDATA + '/sequest' + info[:file]
95
+ @srf_obj = Ms::Sequest::Srf.new(@file)
96
+ Expected_hash_keys.each do |c|
97
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
98
+ end
99
+
100
+ behaves_like 'an srf reader'
101
+ end
102
+
103
+ describe 'reading srf with duplicate refs v3.3' do
104
+ info = To_run['3.3']
105
+ @file = Ms::TESTDATA + '/sequest' + info[:file]
106
+ @srf_obj = Ms::Sequest::Srf.new(@file)
107
+ Expected_hash_keys.each do |c|
108
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
109
+ end
110
+
111
+ behaves_like 'an srf reader'
112
+ end
113
+
114
+ describe 'reading srf with duplicate refs v3.3.1' do
115
+ info = To_run['3.3.1']
116
+ @file = Ms::TESTDATA + '/sequest' + info[:file]
117
+ @srf_obj = Ms::Sequest::Srf.new(@file)
118
+ Expected_hash_keys.each do |c|
119
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
120
+ end
121
+ behaves_like 'an srf reader'
122
+ end
123
+
124
+ #class SRFReadingACorruptedFile < MiniTest::Spec
125
+
126
+ # it 'reads a file from an aborted run w/o failing, but gives warning msg' do
127
+ # srf_file = TESTFILES + '/corrupted_900.srf'
128
+ # message = capture_stderr do
129
+ # srf_obj = Ms::Sequest::Srf.new(srf_file)
130
+ # srf_obj.base_name.is '900'
131
+ # srf_obj.params.is nil
132
+ # header = srf_obj.header
133
+ # header.db_filename.is "C:\\Xcalibur\\database\\sf_hs_44_36f_longesttrpt.fasta.hdr"
134
+ # header.enzyme.is 'Enzyme:Trypsin(KR) (2)'
135
+ # dta_gen = header.dta_gen
136
+ # dta_gen.start_time.must_be_close_to(1.39999997615814, 0.00000000001)
137
+ # srf_obj.dta_files.is []
138
+ # srf_obj.out_files.is []
139
+ # end
140
+ # message.must_match(/no SEQUEST/i)
141
+ # end
142
+ #end
143
+
144
+ #class SRFGroupCreatingAnSrg < MiniTest::Spec
145
+ #it 'creates one given some non-existing, relative filenames' do
146
+ ### TEST SRG GROUPING:
147
+ #filenames = %w(my/lucky/filename /another/filename)
148
+ #@srg = SRFGroup.new
149
+ #@srg.filenames = filenames
150
+ #srg_file = TESTFILES + '/tmp_srg_file.srg'
151
+ #begin
152
+ #@srg.to_srg(srg_file)
153
+ #ok File.exist?(srg_file)
154
+ #ensure
155
+ #File.unlink(srg_file)
156
+ #end
157
+ #end
158
+ #end
159
+
160
+
161
+ ## @TODO: this test needs to be created for a small mock dataset!!
162
+ #describe SRF, 'creating dta files' do
163
+ #spec_large do
164
+ #before(:all) do
165
+ #file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
166
+ #@srf = SRF.new(file)
167
+ #end
168
+
169
+ #it 'creates dta files' do
170
+ #@srf.to_dta
171
+ #ok File.exist?('020')
172
+ #ok File.directory?('020')
173
+ #ok File.exist?('020/020.3366.3366.2.dta')
174
+ #lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
175
+ #lines.first.is "1113.106493 2\r\n"
176
+ #lines[1].is "164.5659 4817\r\n"
177
+
178
+ #FileUtils.rm_rf '020'
179
+ #end
180
+ #end
181
+
182
+ #end
@@ -0,0 +1,172 @@
1
+
2
+ class Hash
3
+ def dup_hash
4
+ new_hash = {}
5
+ self.each do |k,v|
6
+ new_val = if v.is_a?(Fixnum) || v.is_a?(Symbol) ||v.is_a?(Float)
7
+ v
8
+ else
9
+ v.dup
10
+ end
11
+ new_hash[k] = new_val
12
+ end
13
+ new_hash
14
+ end
15
+ end
16
+
17
+
18
+
19
+ module SRFHelper
20
+
21
+
22
+ File_32 = {
23
+ :header =>
24
+ {
25
+ :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
26
+ :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
27
+ :modifications=>"(M* +15.99940) (STY# +79.97990)",
28
+ :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
29
+ :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
30
+ :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
31
+ :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
32
+ :version=>"3.2",
33
+ :model=>"LCQ Deca XP",
34
+ :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
35
+ },
36
+ :dta_gen => {
37
+ :min_group_count => 1,
38
+ :start_time => 1.5,
39
+ :start_mass => 300.0,
40
+ :end_scan => 3620,
41
+ :group_scan => 1,
42
+ :start_scan => 1,
43
+ :num_dta_files => 3747,
44
+ :min_ion_threshold => 15,
45
+ :end_mass => 4500.0,
46
+ },
47
+ :dta_files_first => {
48
+
49
+ :mh=>390.92919921875,
50
+ :dta_tic=>9041311.0,
51
+ :num_peaks=>48,
52
+ :charge=>1,
53
+ :ms_level=>2,
54
+ :total_num_possible_charge_states=>0,
55
+ },
56
+ :dta_files_last => {
57
+ :dta_tic=>842424.0,
58
+ :mh=>357.041198730469,
59
+ :num_peaks=>78,
60
+ :ms_level=>2,
61
+ :charge=>1,
62
+ :total_num_possible_charge_states=>0,
63
+ },
64
+ :out_files_first => {
65
+ :num_hits => 0,
66
+ :computer => 'VELA',
67
+ :date_time => '05/06/2008, 02:08 PM,',
68
+ :hits => 0,
69
+ },
70
+ :out_files_last => {
71
+ :num_hits => 0,
72
+ :computer => 'VELA',
73
+ :date_time => '05/06/2008, 02:11 PM,',
74
+ :hits => 0,
75
+ },
76
+ :out_files_first_pep => {
77
+ :aaseq=>"YRLGGSTK",
78
+ :sequence=>"R.Y#RLGGS#T#K.K",
79
+ :mh=>1121.9390244522,
80
+ :deltacn_orig=>0.0,
81
+ :sp=>29.8529319763184,
82
+ :sf=>0.0,
83
+ :xcorr=>0.123464643955231,
84
+ :id=>2104,
85
+ :rsp=>1,
86
+ :ions_matched=>5,
87
+ :ions_total=>35,
88
+ :prots=>1,
89
+ :deltamass=>-0.00579976654989878,
90
+ :ppm=>5.16938660859491,
91
+ :base_name=>"020",
92
+ :first_scan=>3,
93
+ :last_scan=>3,
94
+ :charge=>1,
95
+ :deltacn=>0.795928299427032,
96
+ :base_name=>"020",
97
+ },
98
+ :out_files_last_pep =>
99
+ {
100
+ :aaseq=>"LLPGTARTMRR",
101
+ :sequence=>"R.LLPGTARTMRR.M",
102
+ :mh=>1272.5493424522,
103
+ :deltacn_orig=>0.835508584976196,
104
+ :deltacn=>1.1,
105
+ :sp=>57.9885787963867,
106
+ :sf=>0.0,
107
+ :xcorr=>0.109200321137905,
108
+ :id=>1361,
109
+ :rsp=>11,
110
+ :ions_matched=>6,
111
+ :ions_total=>40,
112
+ :prots=>1,
113
+ :deltamass=>0.00243330985608736,
114
+ :ppm=>1.91215729542523,
115
+ :base_name=>"020",
116
+ :first_scan=>3619,
117
+ :last_scan=>3619,
118
+ :charge=>3,
119
+ :deltacn=>1.1,
120
+ :base_name=>"020",
121
+ },
122
+
123
+ :params => {
124
+ "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
125
+ }
126
+ }
127
+
128
+ File_33 = {}
129
+ File_32.each do |k,v|
130
+ File_33[k] = v.dup_hash
131
+ end
132
+
133
+ ## Bioworks 3.3 (srf version 3.3)
134
+ File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
135
+ File_33[:header][:version] = "3.3"
136
+
137
+ File_33[:out_files_first][:computer] = 'TESLA'
138
+ File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
139
+ File_33[:out_files_last][:computer] = 'TESLA'
140
+ File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
141
+
142
+ File_33[:out_files_first_pep][:sp] = 29.8535556793213
143
+ File_33[:out_files_last_pep][:sp] = 57.987476348877
144
+ File_33[:out_files_first_pep][:sf] = 0.0
145
+ File_33[:out_files_last_pep][:sf] = 0.0
146
+ File_33[:out_files_last_pep][:rsp] = 10
147
+ File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
148
+
149
+
150
+ ## Bioworks 3.3.1 (srf version 3.5)
151
+ File_331 = {}
152
+
153
+
154
+ File_33.each do |k,v|
155
+ File_331[k] = v.dup_hash
156
+ end
157
+
158
+ File_331[:params] = File_33[:params].dup_hash
159
+
160
+ adjust_keys = File_331[:params].keys.select {|k| k =~ /^add/ }.push(*%w(ion_cutoff_percentage peptide_mass_tolerance match_peak_tolerance fragment_ion_tolerance) )
161
+ adjust_keys.each do |key|
162
+ File_331[:params][key] = File_331[:params][key] << "0"
163
+ end
164
+
165
+ File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
166
+ File_331[:header][:version] = "3.5"
167
+ File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
168
+ File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
169
+
170
+ end
171
+
172
+