ms-sequest 0.0.15 → 0.0.16
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/ms/sequest/srf.rb +50 -45
- data/spec/ms/sequest/srf/sqt_spec.rb +12 -9
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.16
|
data/lib/ms/sequest/srf.rb
CHANGED
@@ -48,21 +48,26 @@ class Ms::Sequest::Srf
|
|
48
48
|
end
|
49
49
|
|
50
50
|
# returns a Sequest::Params object or nil if none
|
51
|
-
def self.
|
51
|
+
def self.get_sequest_params_and_finish_pos(filename)
|
52
52
|
# split the file in half and only read the second half (since we can be
|
53
53
|
# confident that the params file will be there!)
|
54
|
+
|
55
|
+
params = nil
|
56
|
+
finish_parsing_io_pos = nil
|
54
57
|
File.open(filename, 'rb') do |handle|
|
55
58
|
halfway = handle.stat.size / 2
|
56
59
|
handle.seek halfway
|
57
60
|
last_half = handle.read
|
58
|
-
if
|
59
|
-
params_start_index =
|
61
|
+
if sequest_start_from_last_half = last_half.rindex('[SEQUEST]')
|
62
|
+
params_start_index = sequest_start_from_last_half + halfway
|
60
63
|
handle.seek(params_start_index)
|
61
|
-
Ms::Sequest::Params.new.parse_io(handle)
|
64
|
+
params = Ms::Sequest::Params.new.parse_io(handle)
|
65
|
+
finish_parsing_io_pos = handle.pos
|
62
66
|
else
|
63
67
|
nil # not found
|
64
68
|
end
|
65
69
|
end
|
70
|
+
[params, finish_parsing_io_pos]
|
66
71
|
end
|
67
72
|
|
68
73
|
def dta_start_byte
|
@@ -91,9 +96,6 @@ class Ms::Sequest::Srf
|
|
91
96
|
# :read_pephits => true | false (default true)
|
92
97
|
# # will attempt to read peptide hit information (equivalent to .out
|
93
98
|
# # files), otherwise, just reads the dta information.
|
94
|
-
#
|
95
|
-
# :params => <path/to/sequest.params> Some srf files do not include
|
96
|
-
# their sequest params file - include it here if necessary.
|
97
99
|
def initialize(filename=nil, opts={})
|
98
100
|
@peps = []
|
99
101
|
|
@@ -172,33 +174,28 @@ class Ms::Sequest::Srf
|
|
172
174
|
def from_file(filename, opts)
|
173
175
|
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
|
174
176
|
|
175
|
-
@params =
|
176
|
-
|
177
|
-
Ms::Sequest::Params.new(opts[:params])
|
178
|
-
else
|
179
|
-
Ms::Sequest::Srf.get_sequest_params(filename)
|
180
|
-
end
|
177
|
+
(@params, after_params_io_pos) = Ms::Sequest::Srf.get_sequest_params_and_finish_pos(filename)
|
178
|
+
return unless @params
|
181
179
|
|
182
180
|
dup_references = 0
|
183
181
|
dup_refs_gt_0 = false
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
#*****************************************************************************
|
189
|
-
#WARNING: This srf file lists only 1 protein per peptide! (based on the
|
190
|
-
#print_duplicate_references parameter in the sequest.params file used in its
|
191
|
-
#creation) So, downstream output will likewise only contain a single protein
|
192
|
-
#for each peptide hit. In many instances this is OK since downstream programs
|
193
|
-
#will recalculate protein-to-peptide linkages from the database file anyway.
|
194
|
-
#For complete protein lists per peptide hit, .srf files must be created with
|
195
|
-
#print_duplicate_references > 0. HINT: to capture all duplicate references,
|
196
|
-
#set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
197
|
-
#*****************************************************************************
|
198
|
-
# }
|
199
|
-
|
200
|
-
|
201
|
-
end
|
182
|
+
|
183
|
+
dup_references = @params.print_duplicate_references.to_i
|
184
|
+
if dup_references == 0
|
185
|
+
# warn %Q{
|
186
|
+
#*****************************************************************************
|
187
|
+
#WARNING: This srf file lists only 1 protein per peptide! (based on the
|
188
|
+
#print_duplicate_references parameter in the sequest.params file used in its
|
189
|
+
#creation) So, downstream output will likewise only contain a single protein
|
190
|
+
#for each peptide hit. In many instances this is OK since downstream programs
|
191
|
+
#will recalculate protein-to-peptide linkages from the database file anyway.
|
192
|
+
#For complete protein lists per peptide hit, .srf files must be created with
|
193
|
+
#print_duplicate_references > 0. HINT: to capture all duplicate references,
|
194
|
+
#set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
195
|
+
#*****************************************************************************
|
196
|
+
# }
|
197
|
+
else
|
198
|
+
dup_refs_gt_0 = true
|
202
199
|
end
|
203
200
|
|
204
201
|
File.open(filename, 'rb') do |fh|
|
@@ -225,11 +222,20 @@ class Ms::Sequest::Srf
|
|
225
222
|
(@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
|
226
223
|
else
|
227
224
|
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
225
|
+
|
228
226
|
@dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
|
229
227
|
if opts[:read_pephits]
|
230
228
|
# need the params file to know if the duplicate_references is set > 0
|
231
229
|
raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
|
232
230
|
@out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
|
231
|
+
|
232
|
+
# FOR DISPLAY ONLY!
|
233
|
+
#@out_files.each do |f|
|
234
|
+
# if f.num_hits == 10
|
235
|
+
# p f.hits.last
|
236
|
+
# end
|
237
|
+
#end
|
238
|
+
|
233
239
|
if fh.eof?
|
234
240
|
#warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
|
235
241
|
@params = nil
|
@@ -237,20 +243,12 @@ class Ms::Sequest::Srf
|
|
237
243
|
end
|
238
244
|
end
|
239
245
|
end
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
puts "MATCHING NULLS: "
|
247
|
-
p m
|
248
|
-
end
|
249
|
-
warn "no params file, no index, corrupt file"
|
250
|
-
else # we have a params file
|
251
|
-
# This is very sensitive to the grab_params method in sequest params
|
252
|
-
fh.read(12) ## gap between last params entry and index
|
253
|
-
end
|
246
|
+
|
247
|
+
fh.pos = after_params_io_pos
|
248
|
+
|
249
|
+
# This is very sensitive to the grab_params method in sequest params
|
250
|
+
fh.read(12) ## gap between last params entry and index
|
251
|
+
|
254
252
|
@index = read_scan_index(fh,@header.num_dta_files)
|
255
253
|
end
|
256
254
|
|
@@ -287,6 +285,12 @@ class Ms::Sequest::Srf
|
|
287
285
|
|
288
286
|
# returns an index where each entry is [first_scan, last_scan, charge]
|
289
287
|
def read_scan_index(fh, num)
|
288
|
+
#string = fh.read(80)
|
289
|
+
#puts "STRING: "
|
290
|
+
#p string
|
291
|
+
#puts string
|
292
|
+
#File.open("tmp.tmp",'wb') {|out| out.print string }
|
293
|
+
#abort 'her'
|
290
294
|
ind_len = 24
|
291
295
|
index = Array.new(num)
|
292
296
|
unpack_string = 'III'
|
@@ -294,6 +298,7 @@ class Ms::Sequest::Srf
|
|
294
298
|
ind_len.times do st << '0' end ## create a 24 byte string to receive data
|
295
299
|
num.times do |i|
|
296
300
|
fh.read(ind_len, st)
|
301
|
+
result = st.unpack(unpack_string)
|
297
302
|
index[i] = st.unpack(unpack_string)
|
298
303
|
end
|
299
304
|
index
|
@@ -39,14 +39,17 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
|
|
39
39
|
L gi|90111124|ref|NP_414904.2|
|
40
40
|
END
|
41
41
|
|
42
|
-
|
43
|
-
|
42
|
+
|
43
|
+
module SPEC
|
44
|
+
Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
45
|
+
Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
|
46
|
+
end
|
44
47
|
|
45
48
|
shared 'an srf to sqt converter' do
|
46
49
|
|
47
50
|
before do
|
48
51
|
@original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
49
|
-
@output = Srf_output
|
52
|
+
@output = SPEC::Srf_output
|
50
53
|
end
|
51
54
|
|
52
55
|
def del(file)
|
@@ -132,14 +135,14 @@ end
|
|
132
135
|
|
133
136
|
describe "programmatic interface srf to sqt" do
|
134
137
|
|
135
|
-
@srf = Ms::Sequest::Srf.new(Srf_file)
|
138
|
+
@srf = Ms::Sequest::Srf.new(SPEC::Srf_file)
|
136
139
|
|
137
|
-
@basic_conversion = lambda { @srf.to_sqt(Srf_output) }
|
138
|
-
@with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
|
139
|
-
@update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
|
140
|
+
@basic_conversion = lambda { @srf.to_sqt(SPEC::Srf_output) }
|
141
|
+
@with_new_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
|
142
|
+
@update_the_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
|
140
143
|
|
141
144
|
before do
|
142
|
-
@output = Srf_output
|
145
|
+
@output = SPEC::Srf_output
|
143
146
|
end
|
144
147
|
|
145
148
|
behaves_like "an srf to sqt converter"
|
@@ -170,7 +173,7 @@ describe "command-line interface srf to sqt" do
|
|
170
173
|
lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
|
171
174
|
end
|
172
175
|
|
173
|
-
base_cmd = "#{Srf_file} -o #{Srf_output}"
|
176
|
+
base_cmd = "#{SPEC::Srf_file} -o #{SPEC::Srf_output}"
|
174
177
|
@basic_conversion = commandline_lambda(base_cmd)
|
175
178
|
@with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
|
176
179
|
@update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 16
|
9
|
+
version: 0.0.16
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John T. Prince
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-09-02 00:00:00 -06:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|