mspire 0.3.9 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +24 -7
- data/README +15 -13
- data/README.rdoc +18 -0
- data/Rakefile +50 -14
- data/bin/aafreqs.rb +0 -0
- data/bin/bioworks2excel.rb +0 -0
- data/bin/bioworks_to_pepxml.rb +2 -1
- data/bin/bioworks_to_pepxml_gui.rb +0 -0
- data/bin/fasta_shaker.rb +0 -0
- data/bin/filter_and_validate.rb +0 -0
- data/bin/gi2annot.rb +0 -0
- data/bin/id_class_anal.rb +0 -0
- data/bin/id_precision.rb +0 -0
- data/bin/ms_to_lmat.rb +0 -0
- data/bin/pepproph_filter.rb +0 -0
- data/bin/protein_summary.rb +0 -0
- data/bin/protxml2prots_peps.rb +0 -0
- data/bin/raw_to_mzXML.rb +3 -3
- data/bin/run_percolator.rb +122 -0
- data/bin/sqt_group.rb +0 -0
- data/bin/srf_group.rb +0 -0
- data/changelog.txt +29 -0
- data/lib/ms/gradient_program.rb +0 -1
- data/lib/ms/msrun.rb +62 -29
- data/lib/ms/parser/mzdata/axml.rb +55 -0
- data/lib/ms/parser/mzdata/dom.rb +51 -36
- data/lib/ms/parser/mzdata.rb +8 -2
- data/lib/ms/parser/mzxml/axml.rb +59 -0
- data/lib/ms/parser/mzxml/dom.rb +80 -57
- data/lib/ms/parser/mzxml/hpricot.rb +1 -1
- data/lib/ms/parser/mzxml/libxml.rb +6 -2
- data/lib/ms/parser/mzxml.rb +110 -3
- data/lib/ms/parser.rb +4 -4
- data/lib/ms/precursor.rb +19 -4
- data/lib/ms/scan.rb +7 -7
- data/lib/ms/spectrum.rb +249 -58
- data/lib/mspire.rb +1 -1
- data/lib/spec_id/bioworks.rb +2 -2
- data/lib/spec_id/precision/filter/cmdline.rb +8 -1
- data/lib/spec_id/precision/prob/cmdline.rb +2 -2
- data/lib/spec_id/precision/prob.rb +1 -0
- data/lib/spec_id/proph/pep_summary.rb +3 -4
- data/lib/spec_id/proph/prot_summary.rb +3 -3
- data/lib/spec_id/protein_summary.rb +1 -1
- data/lib/spec_id/sequest/pepxml.rb +5 -5
- data/lib/spec_id/sqt.rb +4 -4
- data/lib/spec_id/srf.rb +49 -8
- data/lib/spec_id.rb +5 -0
- data/lib/xml_style_parser.rb +16 -2
- data/script/compile_and_plot_smriti_final.rb +0 -0
- data/script/create_little_pepxml.rb +0 -0
- data/script/degenerate_peptides.rb +0 -0
- data/script/estimate_fpr_by_cysteine.rb +0 -0
- data/script/extract_gradient_programs.rb +1 -1
- data/script/find_cysteine_background.rb +0 -0
- data/script/genuine_tps_and_probs.rb +0 -0
- data/script/get_apex_values_rexml.rb +0 -0
- data/script/mascot_fix_pepxml.rb +123 -0
- data/script/msvis.rb +0 -0
- data/script/mzXML2timeIndex.rb +0 -0
- data/script/peps_per_bin.rb +0 -0
- data/script/prep_dir.rb +0 -0
- data/script/simple_protein_digestion.rb +0 -0
- data/script/smriti_final_analysis.rb +0 -0
- data/script/sqt_to_meta.rb +0 -0
- data/script/top_hit_per_scan.rb +0 -0
- data/script/toppred_to_yaml.rb +0 -0
- data/script/tpp_installer.rb +0 -0
- data/specs/bin/prob_validate_spec.rb +5 -2
- data/specs/bin/protein_summary_spec.rb +5 -1
- data/specs/ms/msrun_spec.rb +176 -133
- data/specs/ms/parser_spec.rb +3 -3
- data/specs/ms/spectrum_spec.rb +0 -2
- data/specs/spec_id/precision/filter_spec.rb +4 -1
- data/specs/spec_id/precision/prob_spec.rb +2 -2
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
- data/specs/spec_id/sqt_spec.rb +5 -5
- data/specs/spec_id/srf_spec.rb +56 -93
- data/specs/spec_id/srf_spec_helper.rb +121 -284
- data/specs/spec_id_spec.rb +3 -0
- data/specs/transmem/toppred_spec.rb +1 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
- metadata +247 -229
data/specs/spec_id/srf_spec.rb
CHANGED
@@ -3,6 +3,8 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
|
3
3
|
require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
|
4
4
|
require 'spec_id/srf'
|
5
5
|
|
6
|
+
require 'fileutils'
|
7
|
+
|
6
8
|
include SRFHelper
|
7
9
|
|
8
10
|
#tfiles = File.dirname(__FILE__) + '/tfiles/'
|
@@ -60,11 +62,23 @@ describe 'an srf reader', :shared => true do
|
|
60
62
|
@dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
|
61
63
|
end
|
62
64
|
|
65
|
+
# given an array of out_file objects, returns the first set of hits
|
66
|
+
def get_first_peps(out_files)
|
67
|
+
out_files.each do |outf|
|
68
|
+
if outf.num_hits > 0
|
69
|
+
return outf.hits
|
70
|
+
end
|
71
|
+
end
|
72
|
+
return nil
|
73
|
+
end
|
74
|
+
|
63
75
|
it 'retrieves correct out files' do
|
64
76
|
@out_files_first.object_match(@srf_obj.out_files.first).should be_true
|
65
77
|
@out_files_last.object_match(@srf_obj.out_files.last).should be_true
|
66
|
-
|
67
|
-
@
|
78
|
+
# first available peptide hit
|
79
|
+
@out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first).should be_true
|
80
|
+
# last available peptide hit
|
81
|
+
@out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last).should be_true
|
68
82
|
end
|
69
83
|
|
70
84
|
xit 'retrieves correct params' do
|
@@ -75,35 +89,30 @@ describe 'an srf reader', :shared => true do
|
|
75
89
|
end
|
76
90
|
|
77
91
|
|
92
|
+
Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
|
78
93
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
94
|
+
to_run = {
|
95
|
+
'3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
|
96
|
+
'3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
|
97
|
+
'3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
|
98
|
+
}
|
99
|
+
|
100
|
+
to_run.each do |version,info|
|
101
|
+
describe klass, " reading a version #{version} .srf file" do
|
102
|
+
spec_large do
|
103
|
+
before(:all) do
|
104
|
+
@file = Tfiles_l + info[:file]
|
105
|
+
Expected_hash_keys.each do |c|
|
106
|
+
instance_variable_set("@#{c}", info[:hash][c.to_sym])
|
107
|
+
end
|
85
108
|
end
|
109
|
+
it_should_behave_like "an srf reader"
|
86
110
|
end
|
87
|
-
it_should_behave_like "an srf reader"
|
88
111
|
end
|
89
112
|
end
|
90
113
|
|
91
114
|
|
92
|
-
describe klass, " reading a
|
93
|
-
it_should 'reading a version 3.3 .srf file'
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
|
98
|
-
spec_large do
|
99
|
-
before(:all) do
|
100
|
-
@file = Tfiles_l + '/sash7/sequest/bioworks331/7MIX_STD_110802_1.srf'
|
101
|
-
%w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
|
102
|
-
instance_variable_set("@#{c}", File_35[c.to_sym])
|
103
|
-
end
|
104
|
-
end
|
105
|
-
it_should_behave_like "an srf reader"
|
106
|
-
end
|
115
|
+
describe klass, " reading a corrupted file" do
|
107
116
|
it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
|
108
117
|
file = Tfiles + '/corrupted_900.srf'
|
109
118
|
error_msg = Tfiles + '/error_msg.tmp'
|
@@ -123,78 +132,9 @@ describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
|
|
123
132
|
IO.read(error_msg).should =~ /corrupted_900\.srf/
|
124
133
|
File.unlink error_msg
|
125
134
|
end
|
126
|
-
|
127
|
-
end
|
128
|
-
|
129
|
-
|
130
|
-
describe klass, 'reading an srf file' do
|
131
|
-
|
132
|
-
spec_large do
|
133
|
-
before(:all) do
|
134
|
-
start = Time.now
|
135
|
-
tf_srf = Tfiles_l + "/sash7/sequest/older/7MIX_STD_110802_1.srf"
|
136
|
-
@srf = klass.new(tf_srf)
|
137
|
-
puts "- read in #{Time.now - start} seconds"
|
138
|
-
end
|
139
|
-
|
140
|
-
#def initialize(arg)
|
141
|
-
# super(arg)
|
142
|
-
# @tfiles = File.dirname(__FILE__) + '/tfiles/'
|
143
|
-
# @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
144
|
-
# @srg_file = @tfiles + "tmp_bioworks.srg"
|
145
|
-
# @srf = $srf
|
146
|
-
# @group = $group
|
147
|
-
#end
|
148
|
-
|
149
|
-
it 'reads' do
|
150
|
-
end
|
151
|
-
|
152
|
-
it 'reads an srf file (w/o probs) and extracts all basic information' do
|
153
|
-
## Verify that we have everything and it is as we expect (not exhaustive)
|
154
|
-
head = @srf.header
|
155
|
-
dtgen = head.dta_gen
|
156
|
-
## HEADER
|
157
|
-
hash_match(Header, head)
|
158
|
-
hash_match(Dta_gen, dtgen)
|
159
|
-
## DTA_FILES
|
160
|
-
hash_match(Dta_files_first, @srf.dta_files.first)
|
161
|
-
hash_match(Dta_files_last, @srf.dta_files.last)
|
162
|
-
## OUT_FILES
|
163
|
-
hash_match(Out_files_first, @srf.out_files.first)
|
164
|
-
hit = @srf.out_files.first.hits.first
|
165
|
-
hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
|
166
|
-
hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
|
167
|
-
hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
|
168
|
-
## SEQUEST_PARAMS
|
169
|
-
hash_match(Sequest_params, @srf.params)
|
170
|
-
## INDEX
|
171
|
-
@srf.index.last.should == [7161, 7161, 3]
|
172
|
-
@srf.index.first.should == [2, 2, 1]
|
173
|
-
|
174
|
-
@srf.dta_files.size.should == @srf.index.size
|
175
|
-
@srf.dta_files.size.should == @srf.out_files.size
|
176
|
-
end
|
177
|
-
it_should 'give accurate peptides' do
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
## treats reference special
|
182
|
-
def hash_match(hash, srf)
|
183
|
-
hash.each do |k,v|
|
184
|
-
if v.is_a? Float
|
185
|
-
delta = v/100000
|
186
|
-
srf.send(k.to_sym).should be_close(v, delta)
|
187
|
-
elsif k == :reference
|
188
|
-
srf.prots.first.reference.should == v[0,38]
|
189
|
-
else
|
190
|
-
srf.send(k.to_sym).should == v
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
135
|
end
|
195
136
|
|
196
137
|
describe SRFGroup, 'creating an srg file' do
|
197
|
-
|
198
138
|
it 'creates one given some non-existing, relative filenames' do
|
199
139
|
## TEST SRG GROUPING:
|
200
140
|
filenames = %w(my/lucky/filename /another/filename)
|
@@ -205,5 +145,28 @@ describe SRFGroup, 'creating an srg file' do
|
|
205
145
|
File.exist?(srg_file).should be_true
|
206
146
|
File.unlink(srg_file)
|
207
147
|
end
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
# @TODO: this test needs to be created for a small mock dataset!!
|
152
|
+
describe SRF, 'creating dta files' do
|
153
|
+
spec_large do
|
154
|
+
before(:all) do
|
155
|
+
file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
|
156
|
+
@srf = SRF.new(file)
|
157
|
+
end
|
158
|
+
|
159
|
+
it 'creates dta files' do
|
160
|
+
@srf.to_dta_files
|
161
|
+
File.exist?('020').should be_true
|
162
|
+
File.directory?('020').should be_true
|
163
|
+
File.exist?('020/020.3366.3366.2.dta').should be_true
|
164
|
+
lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
|
165
|
+
lines.first.should == "1113.10649290125 2\r\n"
|
166
|
+
lines[1].should == "164.56591796875 4817.0\r\n"
|
167
|
+
|
168
|
+
FileUtils.rm_rf '020'
|
169
|
+
end
|
170
|
+
end
|
208
171
|
|
209
172
|
end
|
@@ -3,300 +3,137 @@ module SRFHelper
|
|
3
3
|
File_32 = {
|
4
4
|
:header =>
|
5
5
|
{
|
6
|
-
:params_filename
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:enzyme
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
6
|
+
:params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
|
7
|
+
:raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
|
8
|
+
:modifications=>"(M* +15.99940) (STY# +79.97990)",
|
9
|
+
:sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
|
10
|
+
:ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
|
11
|
+
:db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
|
12
|
+
:enzyme=>"Enzyme:Trypsin(KR/P) (2)",
|
13
|
+
:version=>"3.2",
|
14
|
+
:model=>"LCQ Deca XP",
|
15
|
+
:dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
|
16
16
|
},
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
:mh => 1221.88989257812,
|
30
|
-
:dta_tic => 7703132.0,
|
31
|
-
:num_peaks => 74,
|
32
|
-
:charge => 1,
|
33
|
-
:ms_level => 2,
|
34
|
-
:total_num_possible_charge_states => 0,
|
35
|
-
:peaks => 592,
|
36
|
-
},
|
37
|
-
:dta_files_last => {
|
38
|
-
:mh => 2604.8360326775,
|
39
|
-
:dta_tic => 31977.0,
|
40
|
-
:num_peaks => 17,
|
41
|
-
:charge => 3,
|
42
|
-
:ms_level => 2,
|
43
|
-
:total_num_possible_charge_states => 0,
|
44
|
-
:peaks => 136,
|
45
|
-
},
|
46
|
-
:out_files_first => {
|
47
|
-
:num_hits => 10,
|
48
|
-
:computer => 'VELA',
|
49
|
-
:date_time => '05/12/2006, 10:58 AM,',
|
50
|
-
:hits => 10
|
51
|
-
},
|
52
|
-
:out_files_last => {
|
53
|
-
:num_hits => 10,
|
54
|
-
:computer => 'VELA',
|
55
|
-
:date_time => '05/12/2006, 11:11 AM,',
|
56
|
-
:hits => 10
|
57
|
-
},
|
58
|
-
:out_files_first_last_pep => {
|
59
|
-
:aaseq => 'QFSLSKSSLPK',
|
60
|
-
:sequence => 'K.QFSLSKSSLPK.S',
|
61
|
-
:mh => 1222.4156904522,
|
62
|
-
:deltacn => 1.1,
|
63
|
-
:sp => 57.4083709716797,
|
64
|
-
:xcorr => 0.802009999752045,
|
65
|
-
:id => 19977,
|
66
|
-
:rsp => 60,
|
67
|
-
:ions_matched => 7,
|
68
|
-
:ions_total => 20,
|
69
|
-
:prots => 1,
|
70
|
-
:deltamass => 0.525797874074897,
|
71
|
-
:ppm => 430.315265940608,
|
72
|
-
:base_name => '7MIX_STD_110802_1',
|
73
|
-
:first_scan => 2,
|
74
|
-
:last_scan => 2,
|
75
|
-
:charge => 1
|
76
|
-
},
|
77
|
-
:out_files_last_last_pep =>
|
78
|
-
{
|
79
|
-
:aaseq => 'EAFLVNSDLTLRAQLTEFRDHK',
|
80
|
-
:sequence => 'R.EAFLVNSDLTLRAQLTEFRDHK.L',
|
81
|
-
:mh => 2604.9025174522,
|
82
|
-
:deltacn => 1.1,
|
83
|
-
:sp => 26.1511478424072,
|
84
|
-
:xcorr => 0.634012818336487,
|
85
|
-
:id => 8105,
|
86
|
-
:rsp => 165,
|
87
|
-
:ions_matched => 6,
|
88
|
-
:ions_total => 84,
|
89
|
-
:prots => 1,
|
90
|
-
:deltamass => 0.0664847746993473,
|
91
|
-
:ppm => 25.523592988311,
|
92
|
-
:base_name => '7MIX_STD_110802_1',
|
93
|
-
:first_scan => 7161,
|
94
|
-
:last_scan => 7161,
|
95
|
-
:charge => 3,
|
96
|
-
},
|
17
|
+
:dta_gen => {
|
18
|
+
:min_group_count => 1,
|
19
|
+
:start_time => 1.5,
|
20
|
+
:start_mass => 300.0,
|
21
|
+
:end_scan => 3620,
|
22
|
+
:group_scan => 1,
|
23
|
+
:start_scan => 1,
|
24
|
+
:num_dta_files => 3747,
|
25
|
+
:min_ion_threshold => 15,
|
26
|
+
:end_mass => 4500.0,
|
27
|
+
},
|
28
|
+
:dta_files_first => {
|
97
29
|
|
98
|
-
:
|
99
|
-
|
30
|
+
:mh=>390.92919921875,
|
31
|
+
:dta_tic=>9041311.0,
|
32
|
+
:num_peaks=>48,
|
33
|
+
:charge=>1,
|
34
|
+
:ms_level=>2,
|
35
|
+
:total_num_possible_charge_states=>0,
|
36
|
+
},
|
37
|
+
:dta_files_last => {
|
38
|
+
:dta_tic=>842424.0,
|
39
|
+
:mh=>357.041198730469,
|
40
|
+
:num_peaks=>78,
|
41
|
+
:ms_level=>2,
|
42
|
+
:charge=>1,
|
43
|
+
:total_num_possible_charge_states=>0,
|
44
|
+
},
|
45
|
+
:out_files_first => {
|
46
|
+
:num_hits => 0,
|
47
|
+
:computer => 'VELA',
|
48
|
+
:date_time => '05/06/2008, 02:08 PM,',
|
49
|
+
:hits => 0,
|
50
|
+
},
|
51
|
+
:out_files_last => {
|
52
|
+
:num_hits => 0,
|
53
|
+
:computer => 'VELA',
|
54
|
+
:date_time => '05/06/2008, 02:11 PM,',
|
55
|
+
:hits => 0,
|
56
|
+
},
|
57
|
+
:out_files_first_pep => {
|
58
|
+
:aaseq=>"YRLGGSTK",
|
59
|
+
:sequence=>"R.Y#RLGGS#T#K.K",
|
60
|
+
:mh=>1121.9390244522,
|
61
|
+
:deltacn_orig=>0.0,
|
62
|
+
:sp=>29.8529319763184,
|
63
|
+
:xcorr=>0.123464643955231,
|
64
|
+
:id=>2104,
|
65
|
+
:rsp=>1,
|
66
|
+
:ions_matched=>5,
|
67
|
+
:ions_total=>35,
|
68
|
+
:prots=>1,
|
69
|
+
:deltamass=>-0.00579976654989878,
|
70
|
+
:ppm=>5.16938660859491,
|
71
|
+
:base_name=>"020",
|
72
|
+
:first_scan=>3,
|
73
|
+
:last_scan=>3,
|
74
|
+
:charge=>1,
|
75
|
+
:deltacn=>0.795928299427032,
|
76
|
+
:base_name=>"020",
|
77
|
+
},
|
78
|
+
:out_files_last_pep =>
|
79
|
+
{
|
80
|
+
:aaseq=>"LLPGTARTMRR",
|
81
|
+
:sequence=>"R.LLPGTARTMRR.M",
|
82
|
+
:mh=>1272.5493424522,
|
83
|
+
:deltacn_orig=>0.835508584976196,
|
84
|
+
:deltacn=>1.1,
|
85
|
+
:sp=>57.9885787963867,
|
86
|
+
:xcorr=>0.109200321137905,
|
87
|
+
:id=>1361,
|
88
|
+
:rsp=>11,
|
89
|
+
:ions_matched=>6,
|
90
|
+
:ions_total=>40,
|
91
|
+
:prots=>1,
|
92
|
+
:deltamass=>0.00243330985608736,
|
93
|
+
:ppm=>1.91215729542523,
|
94
|
+
:base_name=>"020",
|
95
|
+
:first_scan=>3619,
|
96
|
+
:last_scan=>3619,
|
97
|
+
:charge=>3,
|
98
|
+
:deltacn=>1.1,
|
99
|
+
:base_name=>"020",
|
100
|
+
},
|
100
101
|
|
102
|
+
:params => {
|
103
|
+
"add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
|
104
|
+
}
|
101
105
|
}
|
102
106
|
|
103
|
-
|
104
|
-
File_32.each
|
105
|
-
|
106
|
-
|
107
|
-
:sequest_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_sequest.log",
|
108
|
-
:raw_filename => "C:\\Xcalibur\\data\\john\\sash7\\7MIX_STD_110802_1.RAW",
|
109
|
-
:params_filename => "C:\\Xcalibur\\sequest\\john\\bioworks331\\sashimi7.params",
|
110
|
-
:dta_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_dta.log",
|
111
|
-
:version=>"3.5"
|
112
|
-
} )
|
113
|
-
|
114
|
-
File_35[:params].merge!( {
|
115
|
-
"add_O_Ornithine"=>"0.00000", "add_F_Phenylalanine"=>"0.00000", "add_A_Alanine"=>"0.00000", "add_C_Cysteine"=>"0.00000", "add_Y_Tyrosine"=>"0.00000", "add_X_LorI"=>"0.00000", "add_J_user_amino_acid"=>"0.00000", "add_Cterm_peptide"=>"0.00000", "add_S_Serine"=>"0.00000", "add_Nterm_protein"=>"0.00000", "add_D_Aspartic_Acid"=>"0.00000", "add_Q_Glutamine"=>"0.00000", "add_K_Lysine"=>"0.00000", "add_R_Arginine"=>"0.00000", "add_W_Tryptophan"=>"0.00000", "add_Nterm_peptide"=>"0.00000", "add_H_Histidine"=>"0.00000", "add_L_Leucine"=>"0.00000", "add_I_Isoleucine"=>"0.00000", "add_N_Asparagine"=>"0.00000", "add_B_avg_NandD"=>"0.00000", "add_Z_avg_QandE"=>"0.00000", "add_E_Glutamic_Acid"=>"0.00000", "add_G_Glycine"=>"0.00000", "add_P_Proline"=>"0.00000", "add_M_Methionine"=>"0.00000", "add_Cterm_protein"=>"0.00000", "add_V_Valine"=>"0.00000", "add_T_Threonine"=>"0.00000", "add_U_user_amino_acid"=>"0.00000", "match_peak_tolerance"=>"1.00000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.40000", "digest_mass_range"=>"400.0000 4500.0000", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.00000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "fragment_ion_units"=>"0", "ion_cutoff_percentage"=>"0.00000", "mass_type_fragment"=>"0"}
|
116
|
-
|
117
|
-
)
|
107
|
+
File_33 = {}
|
108
|
+
File_32.each do |k,v|
|
109
|
+
File_33[k] = v.dup
|
110
|
+
end
|
118
111
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
# parsing error)
|
123
|
-
File_35[:out_files_first_last_pep][:rsp] = 56
|
124
|
-
File_35[:out_files_last_last_pep][:rsp] = 125
|
125
|
-
|
112
|
+
## Bioworks 3.3 (srf version 3.3)
|
113
|
+
File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
|
114
|
+
File_33[:header][:version] = "3.3"
|
126
115
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
:raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
|
132
|
-
:enzyme => "Enzyme:Trypsin(KR/P) (2)",
|
133
|
-
:params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
|
134
|
-
:modifications => "",
|
135
|
-
:version => "3.2",
|
136
|
-
:dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
|
137
|
-
:model => "LCQ Deca XP",
|
138
|
-
}
|
139
|
-
## DTA Gen
|
140
|
-
Dta_gen = {
|
141
|
-
:group_scan => 1,
|
142
|
-
:start_time => 1.39999997615814,
|
143
|
-
:start_scan => 1,
|
144
|
-
:num_dta_files => 6952,
|
145
|
-
:min_ion_threshold => 15,
|
146
|
-
:end_mass => 4500.0,
|
147
|
-
:min_group_count => 1,
|
148
|
-
:start_mass => 400.0,
|
149
|
-
:end_scan => 7161,
|
150
|
-
}
|
151
|
-
|
152
|
-
Dta_files_first = {
|
153
|
-
:mh => 1221.88989257812,
|
154
|
-
:dta_tic => 7703132.0,
|
155
|
-
:num_peaks => 74,
|
156
|
-
:charge => 1,
|
157
|
-
:ms_level => 2,
|
158
|
-
:total_num_possible_charge_states => 0,
|
159
|
-
:peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
|
160
|
-
}
|
161
|
-
Dta_files_last = {
|
162
|
-
:mh => 2604.8360326775,
|
163
|
-
:dta_tic => 31977.0,
|
164
|
-
:num_peaks => 17,
|
165
|
-
:charge => 3,
|
166
|
-
:ms_level => 2,
|
167
|
-
:total_num_possible_charge_states => 0,
|
168
|
-
:peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
|
169
|
-
}
|
116
|
+
File_33[:out_files_first][:computer] = 'TESLA'
|
117
|
+
File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
|
118
|
+
File_33[:out_files_last][:computer] = 'TESLA'
|
119
|
+
File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
|
170
120
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
}
|
121
|
+
File_33[:out_files_first_pep][:sp] = 29.8535556793213
|
122
|
+
File_33[:out_files_last_pep][:sp] = 57.987476348877
|
123
|
+
File_33[:out_files_last_pep][:rsp] = 10
|
124
|
+
File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
|
176
125
|
|
177
|
-
Out_files_first_hit = {
|
178
|
-
:mh => 1220.5128044522,
|
179
|
-
:deltacn => 0.071944423019886, ## this is the modified version
|
180
|
-
:sp => 96.5815887451172,
|
181
|
-
:xcorr => 1.08377742767334,
|
182
|
-
:id => 224,
|
183
|
-
:rsp => 13,
|
184
|
-
:ions_matched => 8,
|
185
|
-
:ions_total => 20,
|
186
|
-
:sequence => "K.LCPHLTLLPGR.F",
|
187
|
-
:aaseq => "LCPHLTLLPGR",
|
188
|
-
:reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
|
189
|
-
:first_scan => 2,
|
190
|
-
:last_scan => 2,
|
191
|
-
:base_name => '7MIX_STD_110802_1',
|
192
|
-
:charge => 1,
|
193
|
-
}
|
194
126
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
:xcorr => 0.915680646896362,
|
205
|
-
:id => 13562,
|
206
|
-
:rsp => 4,
|
207
|
-
:ions_matched => 10,
|
208
|
-
:ions_total => 84,
|
209
|
-
:sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
|
210
|
-
:aaseq => "HLEINPNHPIVETLRQKAETHK",
|
211
|
-
:reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
|
212
|
-
:first_scan => 7161,
|
213
|
-
:last_scan => 7161,
|
214
|
-
:base_name => '7MIX_STD_110802_1',
|
215
|
-
:deltamass => 2605.9368784522 - 2604.8360326775,
|
216
|
-
:ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
|
217
|
-
:charge => 3,
|
218
|
-
}
|
219
|
-
Out_files_last_last_hit = {
|
220
|
-
:mh => 2604.9025174522,
|
221
|
-
:deltacn => 1.1,
|
222
|
-
:sp => 26.1511478424072,
|
223
|
-
:xcorr => 0.634012818336487,
|
224
|
-
:id => 8105,
|
225
|
-
:rsp => 165,
|
226
|
-
:ions_matched => 6,
|
227
|
-
:ions_total => 84,
|
228
|
-
:sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
|
229
|
-
:aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
|
230
|
-
:reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
|
231
|
-
:first_scan => 7161,
|
232
|
-
:last_scan => 7161,
|
233
|
-
:base_name => '7MIX_STD_110802_1',
|
234
|
-
:deltamass => 2604.9025174522 - 2604.8360326775,
|
235
|
-
:ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
|
236
|
-
:charge => 3,
|
237
|
-
}
|
238
|
-
Sequest_params = {
|
239
|
-
"add_F_Phenylalanine"=>"0.0000",
|
240
|
-
"add_O_Ornithine"=>"0.0000",
|
241
|
-
"add_Y_Tyrosine"=>"0.0000",
|
242
|
-
"add_C_Cysteine"=>"0.0000",
|
243
|
-
"add_A_Alanine"=>"0.0000",
|
244
|
-
"add_J_user_amino_acid"=>"0.0000",
|
245
|
-
"add_X_LorI"=>"0.0000",
|
246
|
-
"add_S_Serine"=>"0.0000",
|
247
|
-
"add_Cterm_peptide"=>"0.0000",
|
248
|
-
"add_Q_Glutamine"=>"0.0000",
|
249
|
-
"add_D_Aspartic_Acid"=>"0.0000",
|
250
|
-
"add_Nterm_protein"=>"0.0000",
|
251
|
-
"add_W_Tryptophan"=>"0.0000",
|
252
|
-
"add_R_Arginine"=>"0.0000",
|
253
|
-
"add_K_Lysine"=>"0.0000",
|
254
|
-
"add_H_Histidine"=>"0.0000",
|
255
|
-
"add_Nterm_peptide"=>"0.0000",
|
256
|
-
"add_E_Glutamic_Acid"=>"0.0000",
|
257
|
-
"add_Z_avg_QandE"=>"0.0000",
|
258
|
-
"add_B_avg_NandD"=>"0.0000",
|
259
|
-
"add_N_Asparagine"=>"0.0000",
|
260
|
-
"add_I_Isoleucine"=>"0.0000",
|
261
|
-
"add_L_Leucine"=>"0.0000",
|
262
|
-
"add_M_Methionine"=>"0.0000",
|
263
|
-
"add_P_Proline"=>"0.0000",
|
264
|
-
"add_G_Glycine"=>"0.0000",
|
265
|
-
"add_U_user_amino_acid"=>"0.0000",
|
266
|
-
"add_T_Threonine"=>"0.0000",
|
267
|
-
"add_V_Valine"=>"0.0000",
|
268
|
-
"add_Cterm_protein"=>"0.0000",
|
269
|
-
"match_peak_tolerance"=>"1.0000",
|
270
|
-
"match_peak_allowed_error"=>"1",
|
271
|
-
"normalize_xcorr"=>"0",
|
272
|
-
"nucleotide_reading_frame"=>"0",
|
273
|
-
"num_results"=>"250",
|
274
|
-
"sequence_header_filter"=>"",
|
275
|
-
"diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
|
276
|
-
"partial_sequence"=>"",
|
277
|
-
"max_num_internal_cleavage_sites"=>"2",
|
278
|
-
"search_engine"=>"SEQUEST",
|
279
|
-
"print_duplicate_references"=>"40",
|
280
|
-
"ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
|
281
|
-
"remove_precursor_peak"=>"0",
|
282
|
-
"num_output_lines"=>"10",
|
283
|
-
"second_database_name"=>"",
|
284
|
-
"first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
|
285
|
-
"peptide_mass_tolerance"=>"1.4000",
|
286
|
-
"digest_mass_range"=>"600.0 3500.0",
|
287
|
-
"enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
|
288
|
-
"show_fragment_ions"=>"0",
|
289
|
-
"protein_mass_filter"=>"0 0",
|
290
|
-
"term_diff_search_options"=>"0.000000 0.000000",
|
291
|
-
"num_description_lines"=>"5",
|
292
|
-
"fragment_ion_tolerance"=>"1.0000",
|
293
|
-
"peptide_mass_units"=>"0",
|
294
|
-
"mass_type_parent"=>"0",
|
295
|
-
"match_peak_count"=>"0",
|
296
|
-
"max_num_differential_per_peptide"=>"1",
|
297
|
-
"ion_cutoff_percentage"=>"0.0000",
|
298
|
-
"mass_type_fragment"=>"0"
|
299
|
-
}
|
127
|
+
## Bioworks 3.3.1 (srf version 3.5)
|
128
|
+
File_331 = {}
|
129
|
+
File_33.each do |k,v|
|
130
|
+
File_331[k] = v.dup
|
131
|
+
end
|
132
|
+
File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
|
133
|
+
File_331[:header][:version] = "3.5"
|
134
|
+
File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
|
135
|
+
File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
|
300
136
|
|
301
137
|
end
|
302
138
|
|
139
|
+
|
data/specs/spec_id_spec.rb
CHANGED
@@ -85,6 +85,7 @@ describe 'creating a list of proteins from peptides', :shared => true do
|
|
85
85
|
end
|
86
86
|
|
87
87
|
describe SpecID, 'with generic proteins' do
|
88
|
+
include SpecID
|
88
89
|
before(:all) do
|
89
90
|
@prots = (0..7).map do |n|
|
90
91
|
SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
@@ -95,6 +96,7 @@ describe SpecID, 'with generic proteins' do
|
|
95
96
|
end
|
96
97
|
|
97
98
|
describe SpecID, 'with array based proteins' do
|
99
|
+
include SpecID
|
98
100
|
before(:all) do
|
99
101
|
@prots = (0..7).map do |n|
|
100
102
|
SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
@@ -109,6 +111,7 @@ class TrueClass ; include Boolean end
|
|
109
111
|
class FalseClass; include Boolean end
|
110
112
|
|
111
113
|
describe SpecID, 'being created' do
|
114
|
+
include SpecID
|
112
115
|
it 'can be from small bioworks.xml' do
|
113
116
|
sp = SpecID.new(Tfiles + '/bioworks_small.xml')
|
114
117
|
sp.prots.size.should == 106
|