ms-sequest 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/ms/sequest/srf.rb +50 -45
- data/spec/ms/sequest/srf/sqt_spec.rb +12 -9
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.16
|
data/lib/ms/sequest/srf.rb
CHANGED
@@ -48,21 +48,26 @@ class Ms::Sequest::Srf
|
|
48
48
|
end
|
49
49
|
|
50
50
|
# returns a Sequest::Params object or nil if none
|
51
|
-
def self.
|
51
|
+
def self.get_sequest_params_and_finish_pos(filename)
|
52
52
|
# split the file in half and only read the second half (since we can be
|
53
53
|
# confident that the params file will be there!)
|
54
|
+
|
55
|
+
params = nil
|
56
|
+
finish_parsing_io_pos = nil
|
54
57
|
File.open(filename, 'rb') do |handle|
|
55
58
|
halfway = handle.stat.size / 2
|
56
59
|
handle.seek halfway
|
57
60
|
last_half = handle.read
|
58
|
-
if
|
59
|
-
params_start_index =
|
61
|
+
if sequest_start_from_last_half = last_half.rindex('[SEQUEST]')
|
62
|
+
params_start_index = sequest_start_from_last_half + halfway
|
60
63
|
handle.seek(params_start_index)
|
61
|
-
Ms::Sequest::Params.new.parse_io(handle)
|
64
|
+
params = Ms::Sequest::Params.new.parse_io(handle)
|
65
|
+
finish_parsing_io_pos = handle.pos
|
62
66
|
else
|
63
67
|
nil # not found
|
64
68
|
end
|
65
69
|
end
|
70
|
+
[params, finish_parsing_io_pos]
|
66
71
|
end
|
67
72
|
|
68
73
|
def dta_start_byte
|
@@ -91,9 +96,6 @@ class Ms::Sequest::Srf
|
|
91
96
|
# :read_pephits => true | false (default true)
|
92
97
|
# # will attempt to read peptide hit information (equivalent to .out
|
93
98
|
# # files), otherwise, just reads the dta information.
|
94
|
-
#
|
95
|
-
# :params => <path/to/sequest.params> Some srf files do not include
|
96
|
-
# their sequest params file - include it here if necessary.
|
97
99
|
def initialize(filename=nil, opts={})
|
98
100
|
@peps = []
|
99
101
|
|
@@ -172,33 +174,28 @@ class Ms::Sequest::Srf
|
|
172
174
|
def from_file(filename, opts)
|
173
175
|
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
|
174
176
|
|
175
|
-
@params =
|
176
|
-
|
177
|
-
Ms::Sequest::Params.new(opts[:params])
|
178
|
-
else
|
179
|
-
Ms::Sequest::Srf.get_sequest_params(filename)
|
180
|
-
end
|
177
|
+
(@params, after_params_io_pos) = Ms::Sequest::Srf.get_sequest_params_and_finish_pos(filename)
|
178
|
+
return unless @params
|
181
179
|
|
182
180
|
dup_references = 0
|
183
181
|
dup_refs_gt_0 = false
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
#*****************************************************************************
|
189
|
-
#WARNING: This srf file lists only 1 protein per peptide! (based on the
|
190
|
-
#print_duplicate_references parameter in the sequest.params file used in its
|
191
|
-
#creation) So, downstream output will likewise only contain a single protein
|
192
|
-
#for each peptide hit. In many instances this is OK since downstream programs
|
193
|
-
#will recalculate protein-to-peptide linkages from the database file anyway.
|
194
|
-
#For complete protein lists per peptide hit, .srf files must be created with
|
195
|
-
#print_duplicate_references > 0. HINT: to capture all duplicate references,
|
196
|
-
#set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
197
|
-
#*****************************************************************************
|
198
|
-
# }
|
199
|
-
|
200
|
-
|
201
|
-
end
|
182
|
+
|
183
|
+
dup_references = @params.print_duplicate_references.to_i
|
184
|
+
if dup_references == 0
|
185
|
+
# warn %Q{
|
186
|
+
#*****************************************************************************
|
187
|
+
#WARNING: This srf file lists only 1 protein per peptide! (based on the
|
188
|
+
#print_duplicate_references parameter in the sequest.params file used in its
|
189
|
+
#creation) So, downstream output will likewise only contain a single protein
|
190
|
+
#for each peptide hit. In many instances this is OK since downstream programs
|
191
|
+
#will recalculate protein-to-peptide linkages from the database file anyway.
|
192
|
+
#For complete protein lists per peptide hit, .srf files must be created with
|
193
|
+
#print_duplicate_references > 0. HINT: to capture all duplicate references,
|
194
|
+
#set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
195
|
+
#*****************************************************************************
|
196
|
+
# }
|
197
|
+
else
|
198
|
+
dup_refs_gt_0 = true
|
202
199
|
end
|
203
200
|
|
204
201
|
File.open(filename, 'rb') do |fh|
|
@@ -225,11 +222,20 @@ class Ms::Sequest::Srf
|
|
225
222
|
(@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
|
226
223
|
else
|
227
224
|
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
225
|
+
|
228
226
|
@dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
|
229
227
|
if opts[:read_pephits]
|
230
228
|
# need the params file to know if the duplicate_references is set > 0
|
231
229
|
raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
|
232
230
|
@out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
|
231
|
+
|
232
|
+
# FOR DISPLAY ONLY!
|
233
|
+
#@out_files.each do |f|
|
234
|
+
# if f.num_hits == 10
|
235
|
+
# p f.hits.last
|
236
|
+
# end
|
237
|
+
#end
|
238
|
+
|
233
239
|
if fh.eof?
|
234
240
|
#warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
|
235
241
|
@params = nil
|
@@ -237,20 +243,12 @@ class Ms::Sequest::Srf
|
|
237
243
|
end
|
238
244
|
end
|
239
245
|
end
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
puts "MATCHING NULLS: "
|
247
|
-
p m
|
248
|
-
end
|
249
|
-
warn "no params file, no index, corrupt file"
|
250
|
-
else # we have a params file
|
251
|
-
# This is very sensitive to the grab_params method in sequest params
|
252
|
-
fh.read(12) ## gap between last params entry and index
|
253
|
-
end
|
246
|
+
|
247
|
+
fh.pos = after_params_io_pos
|
248
|
+
|
249
|
+
# This is very sensitive to the grab_params method in sequest params
|
250
|
+
fh.read(12) ## gap between last params entry and index
|
251
|
+
|
254
252
|
@index = read_scan_index(fh,@header.num_dta_files)
|
255
253
|
end
|
256
254
|
|
@@ -287,6 +285,12 @@ class Ms::Sequest::Srf
|
|
287
285
|
|
288
286
|
# returns an index where each entry is [first_scan, last_scan, charge]
|
289
287
|
def read_scan_index(fh, num)
|
288
|
+
#string = fh.read(80)
|
289
|
+
#puts "STRING: "
|
290
|
+
#p string
|
291
|
+
#puts string
|
292
|
+
#File.open("tmp.tmp",'wb') {|out| out.print string }
|
293
|
+
#abort 'her'
|
290
294
|
ind_len = 24
|
291
295
|
index = Array.new(num)
|
292
296
|
unpack_string = 'III'
|
@@ -294,6 +298,7 @@ class Ms::Sequest::Srf
|
|
294
298
|
ind_len.times do st << '0' end ## create a 24 byte string to receive data
|
295
299
|
num.times do |i|
|
296
300
|
fh.read(ind_len, st)
|
301
|
+
result = st.unpack(unpack_string)
|
297
302
|
index[i] = st.unpack(unpack_string)
|
298
303
|
end
|
299
304
|
index
|
@@ -39,14 +39,17 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
|
|
39
39
|
L gi|90111124|ref|NP_414904.2|
|
40
40
|
END
|
41
41
|
|
42
|
-
|
43
|
-
|
42
|
+
|
43
|
+
module SPEC
|
44
|
+
Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
45
|
+
Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
|
46
|
+
end
|
44
47
|
|
45
48
|
shared 'an srf to sqt converter' do
|
46
49
|
|
47
50
|
before do
|
48
51
|
@original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
49
|
-
@output = Srf_output
|
52
|
+
@output = SPEC::Srf_output
|
50
53
|
end
|
51
54
|
|
52
55
|
def del(file)
|
@@ -132,14 +135,14 @@ end
|
|
132
135
|
|
133
136
|
describe "programmatic interface srf to sqt" do
|
134
137
|
|
135
|
-
@srf = Ms::Sequest::Srf.new(Srf_file)
|
138
|
+
@srf = Ms::Sequest::Srf.new(SPEC::Srf_file)
|
136
139
|
|
137
|
-
@basic_conversion = lambda { @srf.to_sqt(Srf_output) }
|
138
|
-
@with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
|
139
|
-
@update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
|
140
|
+
@basic_conversion = lambda { @srf.to_sqt(SPEC::Srf_output) }
|
141
|
+
@with_new_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
|
142
|
+
@update_the_db_path = lambda { @srf.to_sqt(SPEC::Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
|
140
143
|
|
141
144
|
before do
|
142
|
-
@output = Srf_output
|
145
|
+
@output = SPEC::Srf_output
|
143
146
|
end
|
144
147
|
|
145
148
|
behaves_like "an srf to sqt converter"
|
@@ -170,7 +173,7 @@ describe "command-line interface srf to sqt" do
|
|
170
173
|
lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
|
171
174
|
end
|
172
175
|
|
173
|
-
base_cmd = "#{Srf_file} -o #{Srf_output}"
|
176
|
+
base_cmd = "#{SPEC::Srf_file} -o #{SPEC::Srf_output}"
|
174
177
|
@basic_conversion = commandline_lambda(base_cmd)
|
175
178
|
@with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
|
176
179
|
@update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 16
|
9
|
+
version: 0.0.16
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John T. Prince
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-09-02 00:00:00 -06:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|