ms-sequest 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/History +4 -0
- data/bin/srf_to_search.rb +51 -6
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/srf.rb +59 -39
- data/lib/ms/sequest/srf/search.rb +0 -31
- data/lib/ms/sequest/srf/search/tap.rb +39 -0
- metadata +4 -3
data/History
CHANGED
data/bin/srf_to_search.rb
CHANGED
@@ -1,19 +1,64 @@
|
|
1
|
-
#!/usr/bin/
|
1
|
+
#!/usr/bin/ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
4
|
+
require 'optparse'
|
5
5
|
require 'ms/sequest/srf/search'
|
6
6
|
|
7
|
+
opt = {
|
8
|
+
:format => 'mgf'
|
9
|
+
}
|
10
|
+
|
11
|
+
opts = OptionParser.new do |op|
|
12
|
+
op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
|
13
|
+
op.separator "outputs: <file>.mgf"
|
14
|
+
op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
|
15
|
+
end
|
16
|
+
|
7
17
|
if ARGV.size == 0
|
8
|
-
|
18
|
+
puts opts
|
19
|
+
exit
|
9
20
|
end
|
10
21
|
|
22
|
+
format = opt[:format]
|
23
|
+
|
24
|
+
ARGV.each do |srf_file|
|
25
|
+
base = srf_file.sub(/\.srf$/i, '')
|
26
|
+
newfile =
|
27
|
+
case format
|
28
|
+
when 'dta'
|
29
|
+
base
|
30
|
+
when 'mgf'
|
31
|
+
base << '.' << format
|
32
|
+
end
|
33
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
|
34
|
+
# options just speed up reading since we don't need .out info anyway
|
35
|
+
case format
|
36
|
+
when 'mgf'
|
37
|
+
srf.to_mgf(newfile)
|
38
|
+
when 'dta'
|
39
|
+
srf.to_dta_files(newfile)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
=begin
|
54
|
+
|
55
|
+
#require 'tap/task'
|
11
56
|
task_class = Ms::Sequest::Srf::SrfToSearch
|
12
57
|
|
13
58
|
parser = ConfigParser.new do |opts|
|
14
59
|
opts.separator "configurations"
|
15
60
|
opts.add task_class.configurations
|
16
|
-
|
61
|
+
|
17
62
|
opts.on "--help", "Print this help" do
|
18
63
|
puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
19
64
|
puts
|
@@ -23,11 +68,11 @@ parser = ConfigParser.new do |opts|
|
|
23
68
|
end
|
24
69
|
|
25
70
|
parser.parse!(ARGV)
|
26
|
-
|
71
|
+
|
27
72
|
task = task_class.new(parser.config)
|
28
73
|
|
29
74
|
ARGV.each do |file|
|
30
75
|
task.execute(file)
|
31
76
|
end
|
32
77
|
|
33
|
-
|
78
|
+
=end
|
data/lib/ms/sequest.rb
CHANGED
data/lib/ms/sequest/srf.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# standard lib
|
3
3
|
require 'set'
|
4
4
|
require 'fileutils'
|
5
|
+
require 'scanf'
|
5
6
|
|
6
7
|
# other gems
|
7
8
|
require 'arrayclass'
|
@@ -12,14 +13,15 @@ require 'ms/id/protein'
|
|
12
13
|
require 'ms/id/search'
|
13
14
|
require 'ms/sequest/params'
|
14
15
|
|
15
|
-
# for conversions
|
16
|
-
require 'ms/sequest/srf/search'
|
17
|
-
require 'ms/sequest/srf/sqt'
|
18
16
|
|
19
17
|
module Ms ; end
|
20
18
|
module Ms::Sequest ; end
|
21
19
|
|
22
20
|
class Ms::Sequest::Srf
|
21
|
+
|
22
|
+
class NoSequestParamsError < ArgumentError
|
23
|
+
end
|
24
|
+
|
23
25
|
include Ms::Id::Search
|
24
26
|
|
25
27
|
# inherits peps and prots from Search
|
@@ -70,6 +72,7 @@ class Ms::Sequest::Srf
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
75
|
+
|
73
76
|
# opts:
|
74
77
|
# :filter_by_precursor_mass_tolerance => true | false (default true)
|
75
78
|
# # this will filter by the sequest params prec tolerance as is
|
@@ -87,6 +90,9 @@ class Ms::Sequest::Srf
|
|
87
90
|
# :read_pephits => true | false (default true)
|
88
91
|
# # will attempt to read peptide hit information (equivalent to .out
|
89
92
|
# # files), otherwise, just reads the dta information.
|
93
|
+
#
|
94
|
+
# :params => <path/to/sequest.params> Some srf files do not include
|
95
|
+
# their sequest params file - include it here if necessary.
|
90
96
|
def initialize(filename=nil, opts={})
|
91
97
|
@peps = []
|
92
98
|
|
@@ -164,33 +170,38 @@ class Ms::Sequest::Srf
|
|
164
170
|
# opts are the same as for 'new'
|
165
171
|
def from_file(filename, opts)
|
166
172
|
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
|
167
|
-
|
173
|
+
|
174
|
+
@params =
|
175
|
+
if opts[:params]
|
176
|
+
Ms::Sequest::Params.new(opts[:params])
|
177
|
+
else
|
178
|
+
Ms::Sequest::Srf.get_sequest_params(filename)
|
179
|
+
end
|
180
|
+
|
168
181
|
dup_references = 0
|
169
182
|
dup_refs_gt_0 = false
|
170
|
-
if params
|
171
|
-
dup_references = params.print_duplicate_references.to_i
|
183
|
+
if @params
|
184
|
+
dup_references = @params.print_duplicate_references.to_i
|
172
185
|
if dup_references == 0
|
173
|
-
warn
|
174
|
-
|
175
|
-
WARNING: This srf file lists only 1 protein per peptide! (based on the
|
176
|
-
print_duplicate_references parameter in the sequest.params file used in its
|
177
|
-
creation) So, downstream output will likewise only contain a single protein
|
178
|
-
for each peptide hit. In many instances this is OK since downstream programs
|
179
|
-
will recalculate protein-to-peptide linkages from the database file anyway.
|
180
|
-
For complete protein lists per peptide hit, .srf files must be created with
|
181
|
-
print_duplicate_references > 0. HINT: to capture all duplicate references,
|
182
|
-
set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
183
|
-
|
184
|
-
|
186
|
+
# warn %Q{
|
187
|
+
#*****************************************************************************
|
188
|
+
#WARNING: This srf file lists only 1 protein per peptide! (based on the
|
189
|
+
#print_duplicate_references parameter in the sequest.params file used in its
|
190
|
+
#creation) So, downstream output will likewise only contain a single protein
|
191
|
+
#for each peptide hit. In many instances this is OK since downstream programs
|
192
|
+
#will recalculate protein-to-peptide linkages from the database file anyway.
|
193
|
+
#For complete protein lists per peptide hit, .srf files must be created with
|
194
|
+
#print_duplicate_references > 0. HINT: to capture all duplicate references,
|
195
|
+
#set the sequest parameter 'print_duplicate_references' to 100 or greater.
|
196
|
+
#*****************************************************************************
|
197
|
+
# }
|
185
198
|
else
|
186
199
|
dup_refs_gt_0 = true
|
187
200
|
end
|
188
|
-
else
|
189
|
-
warn "no params file found in srf, could be combined file or truncated/corrupt file!"
|
190
201
|
end
|
191
202
|
|
192
203
|
File.open(filename, 'rb') do |fh|
|
193
|
-
@header = Ms::Sequest::Srf::Header.new.from_io(fh)
|
204
|
+
@header = Ms::Sequest::Srf::Header.new.from_io(fh)
|
194
205
|
@version = @header.version
|
195
206
|
|
196
207
|
unpack_35 = case @version
|
@@ -215,6 +226,8 @@ END
|
|
215
226
|
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
216
227
|
@dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
|
217
228
|
if opts[:read_pephits]
|
229
|
+
# need the params file to know if the duplicate_references is set > 0
|
230
|
+
raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil?
|
218
231
|
@out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
|
219
232
|
if fh.eof?
|
220
233
|
#warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
|
@@ -228,38 +241,44 @@ END
|
|
228
241
|
if @params.nil?
|
229
242
|
fh.pos = start_pos_in_case
|
230
243
|
# seek to the index
|
231
|
-
fh.scanf "\000\000\000\000"
|
244
|
+
fh.scanf "\000\000\000\000" do |m|
|
245
|
+
puts "MATCHING NULLS: "
|
246
|
+
p m
|
247
|
+
end
|
248
|
+
warn "no params file, no index, corrupt file"
|
232
249
|
else # we have a params file
|
233
250
|
# This is very sensitive to the grab_params method in sequest params
|
234
251
|
fh.read(12) ## gap between last params entry and index
|
235
252
|
end
|
236
253
|
@index = read_scan_index(fh,@header.num_dta_files)
|
237
|
-
#p @index
|
238
254
|
end
|
239
255
|
|
240
256
|
|
241
257
|
### UPDATE SOME THINGS:
|
242
258
|
# give each hit a base_name, first_scan, last_scan
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
259
|
+
if opts[:read_pephits] && !@header.combined
|
260
|
+
@index.each_with_index do |ind,i|
|
261
|
+
mass_measured = @dta_files[i][0]
|
262
|
+
@out_files[i][0,3] = *ind
|
263
|
+
pep_hits = @out_files[i][6]
|
264
|
+
@peps.push( *pep_hits )
|
265
|
+
pep_hits.each do |pep_hit|
|
266
|
+
pep_hit[14,4] = @base_name, *ind
|
267
|
+
# add the deltamass
|
268
|
+
pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
|
269
|
+
pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
|
270
|
+
pep_hit[18] = self ## link with the srf object
|
271
|
+
end
|
254
272
|
end
|
255
|
-
end
|
256
273
|
|
257
|
-
|
274
|
+
filter_by_precursor_mass_tolerance! if params
|
258
275
|
|
259
|
-
|
260
|
-
|
261
|
-
|
276
|
+
if opts[:link_protein_hits]
|
277
|
+
(@peps, @prots) = merge!([peps]) do |_prot, _peps|
|
278
|
+
prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
|
279
|
+
end
|
262
280
|
end
|
281
|
+
|
263
282
|
end
|
264
283
|
|
265
284
|
self
|
@@ -761,3 +780,4 @@ end
|
|
761
780
|
|
762
781
|
|
763
782
|
|
783
|
+
|
@@ -1,9 +1,7 @@
|
|
1
1
|
|
2
|
-
require 'tap/task'
|
3
2
|
require 'ms/sequest/srf'
|
4
3
|
require 'ms/mass'
|
5
4
|
|
6
|
-
|
7
5
|
# These are for outputting formats used in MS/MS Search engines
|
8
6
|
|
9
7
|
module Ms
|
@@ -101,35 +99,6 @@ module Ms
|
|
101
99
|
end
|
102
100
|
end
|
103
101
|
|
104
|
-
# Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
|
105
|
-
# searching
|
106
|
-
#
|
107
|
-
# outputs the appropriate file or directory structure for <file>.srf:
|
108
|
-
# <file>.mgf # file for mgf
|
109
|
-
# <file> # the basename directory for dta
|
110
|
-
class SrfToSearch < Tap::Task
|
111
|
-
config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
|
112
|
-
def process(srf_file)
|
113
|
-
base = srf_file.sub(/\.srf$/i, '')
|
114
|
-
newfile =
|
115
|
-
case format
|
116
|
-
when 'dta'
|
117
|
-
base
|
118
|
-
when 'mgf'
|
119
|
-
base << '.' << format
|
120
|
-
end
|
121
|
-
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
|
122
|
-
# options just speed up reading since we don't need .out info anyway
|
123
|
-
case format
|
124
|
-
when 'mgf'
|
125
|
-
srf.to_mgf(newfile)
|
126
|
-
when 'dta'
|
127
|
-
srf.to_dta_files(newfile)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
|
133
102
|
end # Srf
|
134
103
|
end # Sequest
|
135
104
|
end # Ms
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'tap/task'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Sequest
|
5
|
+
class Srf
|
6
|
+
|
7
|
+
# Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
|
8
|
+
# searching
|
9
|
+
#
|
10
|
+
# outputs the appropriate file or directory structure for <file>.srf:
|
11
|
+
# <file>.mgf # file for mgf
|
12
|
+
# <file> # the basename directory for dta
|
13
|
+
class SrfToSearch < Tap::Task
|
14
|
+
config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
|
15
|
+
def process(srf_file)
|
16
|
+
base = srf_file.sub(/\.srf$/i, '')
|
17
|
+
newfile =
|
18
|
+
case format
|
19
|
+
when 'dta'
|
20
|
+
base
|
21
|
+
when 'mgf'
|
22
|
+
base << '.' << format
|
23
|
+
end
|
24
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
|
25
|
+
# options just speed up reading since we don't need .out info anyway
|
26
|
+
case format
|
27
|
+
when 'mgf'
|
28
|
+
srf.to_mgf(newfile)
|
29
|
+
when 'dta'
|
30
|
+
srf.to_dta_files(newfile)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end # Srf
|
37
|
+
end # Sequest
|
38
|
+
end # Ms
|
39
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-sequest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-12-03 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -67,6 +67,7 @@ files:
|
|
67
67
|
- lib/ms/sequest.rb
|
68
68
|
- lib/ms/sequest/sqt.rb
|
69
69
|
- lib/ms/sequest/params.rb
|
70
|
+
- lib/ms/sequest/srf/search/tap.rb
|
70
71
|
- lib/ms/sequest/srf/sqt.rb
|
71
72
|
- lib/ms/sequest/srf/search.rb
|
72
73
|
- lib/ms/sequest/srf.rb
|
@@ -102,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
103
|
requirements: []
|
103
104
|
|
104
105
|
rubyforge_project: mspire
|
105
|
-
rubygems_version: 1.3.
|
106
|
+
rubygems_version: 1.3.5
|
106
107
|
signing_key:
|
107
108
|
specification_version: 3
|
108
109
|
summary: An mspire library supporting SEQUEST, Bioworks, SQT, etc
|