ms-sequest 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +4 -0
- data/bin/srf_to_search.rb +51 -6
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/srf.rb +59 -39
- data/lib/ms/sequest/srf/search.rb +0 -31
- data/lib/ms/sequest/srf/search/tap.rb +39 -0
- metadata +4 -3
data/History
CHANGED
data/bin/srf_to_search.rb
CHANGED
@@ -1,19 +1,64 @@
|
|
1
|
-
#!/usr/bin/
|
1
|
+
#!/usr/bin/ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
4
|
+
require 'optparse'
|
5
5
|
require 'ms/sequest/srf/search'
|
6
6
|
|
7
|
+
opt = {
|
8
|
+
:format => 'mgf'
|
9
|
+
}
|
10
|
+
|
11
|
+
opts = OptionParser.new do |op|
|
12
|
+
op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
|
13
|
+
op.separator "outputs: <file>.mgf"
|
14
|
+
op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
|
15
|
+
end
|
16
|
+
|
7
17
|
if ARGV.size == 0
|
8
|
-
|
18
|
+
puts opts
|
19
|
+
exit
|
9
20
|
end
|
10
21
|
|
22
|
+
format = opt[:format]
|
23
|
+
|
24
|
+
ARGV.each do |srf_file|
|
25
|
+
base = srf_file.sub(/\.srf$/i, '')
|
26
|
+
newfile =
|
27
|
+
case format
|
28
|
+
when 'dta'
|
29
|
+
base
|
30
|
+
when 'mgf'
|
31
|
+
base << '.' << format
|
32
|
+
end
|
33
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
|
34
|
+
# options just speed up reading since we don't need .out info anyway
|
35
|
+
case format
|
36
|
+
when 'mgf'
|
37
|
+
srf.to_mgf(newfile)
|
38
|
+
when 'dta'
|
39
|
+
srf.to_dta_files(newfile)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
=begin
|
54
|
+
|
55
|
+
#require 'tap/task'
|
11
56
|
task_class = Ms::Sequest::Srf::SrfToSearch
|
12
57
|
|
13
58
|
parser = ConfigParser.new do |opts|
|
14
59
|
opts.separator "configurations"
|
15
60
|
opts.add task_class.configurations
|
16
|
-
|
61
|
+
|
17
62
|
opts.on "--help", "Print this help" do
|
18
63
|
puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
19
64
|
puts
|
@@ -23,11 +68,11 @@ parser = ConfigParser.new do |opts|
|
|
23
68
|
end
|
24
69
|
|
25
70
|
parser.parse!(ARGV)
|
26
|
-
|
71
|
+
|
27
72
|
task = task_class.new(parser.config)
|
28
73
|
|
29
74
|
ARGV.each do |file|
|
30
75
|
task.execute(file)
|
31
76
|
end
|
32
77
|
|
33
|
-
|
78
|
+
=end
|
data/lib/ms/sequest.rb
CHANGED
data/lib/ms/sequest/srf.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# standard lib
|
3
3
|
require 'set'
|
4
4
|
require 'fileutils'
|
5
|
+
require 'scanf'
|
5
6
|
|
6
7
|
# other gems
|
7
8
|
require 'arrayclass'
|
@@ -12,14 +13,15 @@ require 'ms/id/protein'
|
|
12
13
|
require 'ms/id/search'
|
13
14
|
require 'ms/sequest/params'
|
14
15
|
|
15
|
-
# for conversions
|
16
|
-
require 'ms/sequest/srf/search'
|
17
|
-
require 'ms/sequest/srf/sqt'
|
18
16
|
|
19
17
|
module Ms ; end
|
20
18
|
module Ms::Sequest ; end
|
21
19
|
|
22
20
|
class Ms::Sequest::Srf
|
21
|
+
|
22
|
+
class NoSequestParamsError < ArgumentError
|
23
|
+
end
|
24
|
+
|
23
25
|
include Ms::Id::Search
|
24
26
|
|
25
27
|
# inherits peps and prots from Search
|
@@ -70,6 +72,7 @@ class Ms::Sequest::Srf
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
75
|
+
|
73
76
|
# opts:
|
74
77
|
# :filter_by_precursor_mass_tolerance => true | false (default true)
|
75
78
|
# # this will filter by the sequest params prec tolerance as is
|
@@ -87,6 +90,9 @@ class Ms::Sequest::Srf
|
|
87
90
|
# :read_pephits => true | false (default true)
|
88
91
|
# # will attempt to read peptide hit information (equivalent to .out
|
89
92
|
# # files), otherwise, just reads the dta information.
|
93
|
+
#
|
94
|
+
# :params => <path/to/sequest.params> Some srf files do not include
|
95
|
+
# their sequest params file - include it here if necessary.
|
90
96
|
def initialize(filename=nil, opts={})
|
91
97
|
@peps = []
|
92
98
|
|
@@ -164,33 +170,38 @@ class Ms::Sequest::Srf
|
|
164
170
|
# opts are the same as for 'new'
|
165
171
|
def from_file(filename, opts)
|
166
172
|
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
|
167
|
-
|
173
|
+
|
174
|
+
@params =
|
175
|
+
if opts[:params]
|
176
|
+
Ms::Sequest::Params.new(opts[:params])
|
177
|
+
else
|
178
|
+
Ms::Sequest::Srf.get_sequest_params(filename)
|
179
|
+
end
|
180
|
+
|
168
181
|
dup_references = 0
|
169
182
|
dup_refs_gt_0 = false
|
170
|
-
if params
|
171
|
-
dup_references = params.print_duplicate_references.to_i
|
183
|
+
if @params
|
184
|
+
dup_references = @params.print_duplicate_references.to_i
|
172
185
|
if dup_references == 0
|
173
|
-
warn
|
174
|
-
|
175
|
-
WARNING: This srf file lists only 1 protein per peptide! (based on the
|
176
|
-
print_duplicate_references parameter in the sequest.params file used in its
|
177
|
-
creation) So, downstream output will likewise only contain a single protein
|
178
|
-
for each peptide hit. In many instances this is OK since downstream programs
|
179
|
-
will recalculate protein-to-peptide linkages from the database file anyway.
|
180
|
-
For complete protein lists per peptide hit, .srf files must be created with
|
181
|
-
print_duplicate_references > 0. HINT: to capture all duplicate references,
|
182
|
-
set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
183
|
-
|
184
|
-
|
186
|
+
# warn %Q{
|
187
|
+
#*****************************************************************************
|
188
|
+
#WARNING: This srf file lists only 1 protein per peptide! (based on the
|
189
|
+
#print_duplicate_references parameter in the sequest.params file used in its
|
190
|
+
#creation) So, downstream output will likewise only contain a single protein
|
191
|
+
#for each peptide hit. In many instances this is OK since downstream programs
|
192
|
+
#will recalculate protein-to-peptide linkages from the database file anyway.
|
193
|
+
#For complete protein lists per peptide hit, .srf files must be created with
|
194
|
+
#print_duplicate_references > 0. HINT: to capture all duplicate references,
|
195
|
+
#set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
196
|
+
#*****************************************************************************
|
197
|
+
# }
|
185
198
|
else
|
186
199
|
dup_refs_gt_0 = true
|
187
200
|
end
|
188
|
-
else
|
189
|
-
warn "no params file found in srf, could be combined file or truncated/corrupt file!"
|
190
201
|
end
|
191
202
|
|
192
203
|
File.open(filename, 'rb') do |fh|
|
193
|
-
@header = Ms::Sequest::Srf::Header.new.from_io(fh)
|
204
|
+
@header = Ms::Sequest::Srf::Header.new.from_io(fh)
|
194
205
|
@version = @header.version
|
195
206
|
|
196
207
|
unpack_35 = case @version
|
@@ -215,6 +226,8 @@ END
|
|
215
226
|
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
216
227
|
@dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
|
217
228
|
if opts[:read_pephits]
|
229
|
+
# need the params file to know if the duplicate_references is set > 0
|
230
|
+
raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
|
218
231
|
@out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
|
219
232
|
if fh.eof?
|
220
233
|
#warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
|
@@ -228,38 +241,44 @@ END
|
|
228
241
|
if @params.nil?
|
229
242
|
fh.pos = start_pos_in_case
|
230
243
|
# seek to the index
|
231
|
-
fh.scanf "\000\000\000\000"
|
244
|
+
fh.scanf "\000\000\000\000" do |m|
|
245
|
+
puts "MATCHING NULLS: "
|
246
|
+
p m
|
247
|
+
end
|
248
|
+
warn "no params file, no index, corrupt file"
|
232
249
|
else # we have a params file
|
233
250
|
# This is very sensitive to the grab_params method in sequest params
|
234
251
|
fh.read(12) ## gap between last params entry and index
|
235
252
|
end
|
236
253
|
@index = read_scan_index(fh,@header.num_dta_files)
|
237
|
-
#p @index
|
238
254
|
end
|
239
255
|
|
240
256
|
|
241
257
|
### UPDATE SOME THINGS:
|
242
258
|
# give each hit a base_name, first_scan, last_scan
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
259
|
+
if opts[:read_pephits] && !@header.combined
|
260
|
+
@index.each_with_index do |ind,i|
|
261
|
+
mass_measured = @dta_files[i][0]
|
262
|
+
@out_files[i][0,3] = *ind
|
263
|
+
pep_hits = @out_files[i][6]
|
264
|
+
@peps.push( *pep_hits )
|
265
|
+
pep_hits.each do |pep_hit|
|
266
|
+
pep_hit[14,4] = @base_name, *ind
|
267
|
+
# add the deltamass
|
268
|
+
pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
|
269
|
+
pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
|
270
|
+
pep_hit[18] = self ## link with the srf object
|
271
|
+
end
|
254
272
|
end
|
255
|
-
end
|
256
273
|
|
257
|
-
|
274
|
+
filter_by_precursor_mass_tolerance! if params
|
258
275
|
|
259
|
-
|
260
|
-
|
261
|
-
|
276
|
+
if opts[:link_protein_hits]
|
277
|
+
(@peps, @prots) = merge!([peps]) do |_prot, _peps|
|
278
|
+
prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
|
279
|
+
end
|
262
280
|
end
|
281
|
+
|
263
282
|
end
|
264
283
|
|
265
284
|
self
|
@@ -761,3 +780,4 @@ end
|
|
761
780
|
|
762
781
|
|
763
782
|
|
783
|
+
|
@@ -1,9 +1,7 @@
|
|
1
1
|
|
2
|
-
require 'tap/task'
|
3
2
|
require 'ms/sequest/srf'
|
4
3
|
require 'ms/mass'
|
5
4
|
|
6
|
-
|
7
5
|
# These are for outputting formats used in MS/MS Search engines
|
8
6
|
|
9
7
|
module Ms
|
@@ -101,35 +99,6 @@ module Ms
|
|
101
99
|
end
|
102
100
|
end
|
103
101
|
|
104
|
-
# Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
|
105
|
-
# searching
|
106
|
-
#
|
107
|
-
# outputs the appropriate file or directory structure for <file>.srf:
|
108
|
-
# <file>.mgf # file for mgf
|
109
|
-
# <file> # the basename directory for dta
|
110
|
-
class SrfToSearch < Tap::Task
|
111
|
-
config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
|
112
|
-
def process(srf_file)
|
113
|
-
base = srf_file.sub(/\.srf$/i, '')
|
114
|
-
newfile =
|
115
|
-
case format
|
116
|
-
when 'dta'
|
117
|
-
base
|
118
|
-
when 'mgf'
|
119
|
-
base << '.' << format
|
120
|
-
end
|
121
|
-
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
|
122
|
-
# options just speed up reading since we don't need .out info anyway
|
123
|
-
case format
|
124
|
-
when 'mgf'
|
125
|
-
srf.to_mgf(newfile)
|
126
|
-
when 'dta'
|
127
|
-
srf.to_dta_files(newfile)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
|
133
102
|
end # Srf
|
134
103
|
end # Sequest
|
135
104
|
end # Ms
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'tap/task'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Sequest
|
5
|
+
class Srf
|
6
|
+
|
7
|
+
# Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
|
8
|
+
# searching
|
9
|
+
#
|
10
|
+
# outputs the appropriate file or directory structure for <file>.srf:
|
11
|
+
# <file>.mgf # file for mgf
|
12
|
+
# <file> # the basename directory for dta
|
13
|
+
class SrfToSearch < Tap::Task
|
14
|
+
config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
|
15
|
+
def process(srf_file)
|
16
|
+
base = srf_file.sub(/\.srf$/i, '')
|
17
|
+
newfile =
|
18
|
+
case format
|
19
|
+
when 'dta'
|
20
|
+
base
|
21
|
+
when 'mgf'
|
22
|
+
base << '.' << format
|
23
|
+
end
|
24
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
|
25
|
+
# options just speed up reading since we don't need .out info anyway
|
26
|
+
case format
|
27
|
+
when 'mgf'
|
28
|
+
srf.to_mgf(newfile)
|
29
|
+
when 'dta'
|
30
|
+
srf.to_dta_files(newfile)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end # Srf
|
37
|
+
end # Sequest
|
38
|
+
end # Ms
|
39
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-sequest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-12-03 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -67,6 +67,7 @@ files:
|
|
67
67
|
- lib/ms/sequest.rb
|
68
68
|
- lib/ms/sequest/sqt.rb
|
69
69
|
- lib/ms/sequest/params.rb
|
70
|
+
- lib/ms/sequest/srf/search/tap.rb
|
70
71
|
- lib/ms/sequest/srf/sqt.rb
|
71
72
|
- lib/ms/sequest/srf/search.rb
|
72
73
|
- lib/ms/sequest/srf.rb
|
@@ -102,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
103
|
requirements: []
|
103
104
|
|
104
105
|
rubyforge_project: mspire
|
105
|
-
rubygems_version: 1.3.
|
106
|
+
rubygems_version: 1.3.5
|
106
107
|
signing_key:
|
107
108
|
specification_version: 3
|
108
109
|
summary: An mspire library supporting SEQUEST, Bioworks, SQT, etc
|